62 Commits

Author SHA1 Message Date
John Alanbrook
dcc9659e6b Merge branch 'runtime_rework' into fix_gc 2026-02-14 22:11:31 -06:00
John Alanbrook
2f7f2233b8 compiling 2026-02-14 22:08:55 -06:00
John Alanbrook
a765872017 remove if/else dispatch from compile chain 2026-02-14 17:57:48 -06:00
John Alanbrook
a93218e1ff faster streamline 2026-02-14 17:14:43 -06:00
John Alanbrook
f2c4fa2f2b remove redundant check 2026-02-14 16:49:16 -06:00
John Alanbrook
5fe05c60d3 faster gc 2026-02-14 16:46:11 -06:00
John Alanbrook
e75596ce30 respsect array and object length requests 2026-02-14 15:42:19 -06:00
John Alanbrook
356c51bde3 better array allocation 2026-02-14 14:44:00 -06:00
John Alanbrook
89421e11a4 pull out prettify mcode 2026-02-14 14:14:34 -06:00
John Alanbrook
e5fc04fecd faster mach compile 2026-02-14 14:02:15 -06:00
John Alanbrook
f49ca530bb fix delete gc bug 2026-02-13 21:52:37 -06:00
John Alanbrook
83263379bd ocaml style rooting macros 2026-02-13 20:46:31 -06:00
John Alanbrook
e80e615634 fix array gc bug; new gc error chasing 2026-02-13 16:58:42 -06:00
John Alanbrook
c1430fd59b Merge branch 'fix_gc' into runtime_rework 2026-02-13 15:42:37 -06:00
John Alanbrook
db73eb4eeb Merge branch 'mcode_streamline' into runtime_rework 2026-02-13 15:42:20 -06:00
John Alanbrook
f2556c5622 proper shop caching 2026-02-13 09:04:25 -06:00
John Alanbrook
291304f75d new way to track actor bad memory access 2026-02-13 09:03:33 -06:00
John Alanbrook
3795533554 clean up bytecode 2026-02-13 09:03:00 -06:00
John Alanbrook
d26a96bc62 cached bootstrap 2026-02-13 08:11:35 -06:00
John Alanbrook
0acaabd5fa merge add 2026-02-13 08:09:12 -06:00
John Alanbrook
1ba060668e growable buddy memory runtime 2026-02-13 07:59:52 -06:00
John Alanbrook
77fa058135 mach loading 2026-02-13 07:26:49 -06:00
John Alanbrook
f7e2ff13b5 guard hoisting 2026-02-13 06:32:58 -06:00
John Alanbrook
36fd0a35f9 Merge branch 'fix_gc' into mcode_streamline 2026-02-13 05:59:11 -06:00
John Alanbrook
77c02bf9bf simplify text 2026-02-13 05:59:01 -06:00
John Alanbrook
f251691146 Merge branch 'mach_memory' into mcode_streamline 2026-02-13 05:58:21 -06:00
John Alanbrook
e9ea6ec299 Merge branch 'runtime_rework' into mach_memory 2026-02-13 05:54:28 -06:00
John Alanbrook
bf5fdbc688 backward inference 2026-02-13 05:39:25 -06:00
John Alanbrook
b960d03eeb immediate ascii for string path 2026-02-13 05:35:11 -06:00
John Alanbrook
b4d42fb83d stone pool renamed to constant pool - more appropriate 2026-02-13 05:17:22 -06:00
John Alanbrook
0a680a0cd3 gc print 2026-02-13 05:03:45 -06:00
John Alanbrook
9f0fd84f4f fix growing gc 2026-02-13 04:33:32 -06:00
John Alanbrook
cb9d6e0c0e mmap for poison heap 2026-02-13 04:03:36 -06:00
John Alanbrook
4f18a0b524 tco 2026-02-13 03:57:18 -06:00
John Alanbrook
f296a0c10d fix segv 2026-02-13 03:08:27 -06:00
John Alanbrook
1df6553577 Merge branch 'runtime_rework' into mcode_streamline 2026-02-13 02:52:54 -06:00
John Alanbrook
30a9cfee79 simplify gc model 2026-02-13 02:33:25 -06:00
John Alanbrook
6fff96d9d9 lower intrinsics in mcode 2026-02-13 02:31:16 -06:00
John Alanbrook
4a50d0587d guards in mcode 2026-02-13 02:30:41 -06:00
John Alanbrook
e346348eb5 Merge branch 'fix_gc' into mcode_streamline 2026-02-12 19:15:13 -06:00
John Alanbrook
ff560973f3 Merge branch 'fix_gc' into runtime_rework 2026-02-12 18:57:44 -06:00
John Alanbrook
de4b3079d4 organize 2026-02-12 18:53:06 -06:00
John Alanbrook
29227e655b Merge branch 'pretty_mcode' into mcode_streamline 2026-02-12 18:48:17 -06:00
John Alanbrook
588e88373e Merge branch 'fix_ternary' into pretty_mcode 2026-02-12 18:46:04 -06:00
John Alanbrook
9aca365771 Merge branch 'runtime_rework' into pretty_mcode 2026-02-12 18:44:56 -06:00
John Alanbrook
c56d4d5c3c some cleanup 2026-02-12 18:44:09 -06:00
John Alanbrook
c1e101b24f benchmarks 2026-02-12 18:41:15 -06:00
John Alanbrook
9f0dfbc6a2 fix ternary operator in object literals 2026-02-12 18:33:43 -06:00
John Alanbrook
5c9403a43b compiler optimization output 2026-02-12 18:27:19 -06:00
John Alanbrook
89e34ba71d comprehensive testing for regression analysis 2026-02-12 18:15:03 -06:00
John Alanbrook
73bfa8d7b1 rm some functions 2026-02-12 18:08:56 -06:00
John Alanbrook
4aedb8b0c5 Merge branch 'cli_audit' into ir_artifact 2026-02-12 17:20:45 -06:00
John Alanbrook
ec072f3b63 Merge branch 'runtime_rework' into ir_artifact 2026-02-12 17:18:23 -06:00
John Alanbrook
65755d9c0c fix using old mach 2026-02-12 17:17:12 -06:00
John Alanbrook
19524b3a53 faster json decode 2026-02-12 17:06:48 -06:00
John Alanbrook
f901332c5b clean up cli 2026-02-12 16:45:10 -06:00
John Alanbrook
add136c140 Merge branch 'pretty_mcode' into runtime_rework 2026-02-12 16:36:58 -06:00
John Alanbrook
c1a99dfd4c mcode looks better 2026-02-12 16:36:53 -06:00
John Alanbrook
7b46c6e947 update docs 2026-02-12 16:34:45 -06:00
John Alanbrook
1efb0b1bc9 run with mcode 2026-02-12 16:14:46 -06:00
John Alanbrook
0ba2783b48 Merge branch 'bytecode_cleanup' into mach 2026-02-12 14:08:45 -06:00
John Alanbrook
900db912a5 streamline mcode 2026-02-12 09:43:13 -06:00
113 changed files with 722101 additions and 4312 deletions

View File

@@ -103,6 +103,7 @@ var v = a[] // pop: v is 3, a is [1, 2]
- Most files don't have headers; files in a package are not shared between packages
- No undefined in C API: use `JS_IsNull` and `JS_NULL` only
- A C file with correct macros (`CELL_USE_FUNCS` etc) is loaded as a module by its name (e.g., `png.c` in a package → `use('<package>/png')`)
- Use `JS_FRAME`/`JS_ROOT`/`JS_RETURN` macros for any C function that allocates multiple heap objects. Any `JS_New*`/`JS_SetProperty*` call can trigger GC.
## Project Layout
@@ -113,6 +114,19 @@ var v = a[] // pop: v is 3, a is [1, 2]
- `packages/` — core packages
- `Makefile` — build system (`make` to rebuild, `make bootstrap` for first build)
## Testing
After any C runtime changes, run all three test suites before considering the work done:
```
make # rebuild
./cell --dev vm_suite # VM-level tests (641 tests)
./cell --dev test suite # language-level tests (493 tests)
./cell --dev fuzz # fuzzer (100 iterations)
```
All three must pass with 0 failures.
## Documentation
The `docs/` folder is the single source of truth. The website at `website/` mounts it via Hugo. Key files:

View File

@@ -11,7 +11,7 @@
CELL_SHOP = $(HOME)/.cell
CELL_CORE_PACKAGE = $(CELL_SHOP)/packages/core
# .cm sources that compile to .mach bytecode
# .cm sources that compile to .mcode bytecode
MACH_SOURCES = tokenize.cm parse.cm fold.cm mcode.cm \
internal/bootstrap.cm internal/engine.cm
@@ -48,12 +48,12 @@ cell_main: source/main.c libcell_runtime.dylib
# Regenerate .mach bytecode when any .cm source changes
.mach.stamp: $(MACH_SOURCES)
./cell --core . regen.cm
./cell --dev regen
@touch $@
# Force-regenerate all .mach bytecode files
regen:
./cell --core . regen.cm
./cell --core . regen
@touch .mach.stamp
# Create the cell shop directories

View File

@@ -381,19 +381,21 @@ static const JSCFunctionListEntry js_reader_funcs[] = {
JSValue js_miniz_use(JSContext *js)
{
JS_FRAME(js);
JS_NewClassID(&js_reader_class_id);
JS_NewClass(js, js_reader_class_id, &js_reader_class);
JSValue reader_proto = JS_NewObject(js);
JS_SetPropertyFunctionList(js, reader_proto, js_reader_funcs, sizeof(js_reader_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_reader_class_id, reader_proto);
JS_ROOT(reader_proto, JS_NewObject(js));
JS_SetPropertyFunctionList(js, reader_proto.val, js_reader_funcs, sizeof(js_reader_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_reader_class_id, reader_proto.val);
JS_NewClassID(&js_writer_class_id);
JS_NewClass(js, js_writer_class_id, &js_writer_class);
JSValue writer_proto = JS_NewObject(js);
JS_SetPropertyFunctionList(js, writer_proto, js_writer_funcs, sizeof(js_writer_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_writer_class_id, writer_proto);
JSValue export = JS_NewObject(js);
JS_SetPropertyFunctionList(js, export, js_miniz_funcs, sizeof(js_miniz_funcs)/sizeof(JSCFunctionListEntry));
return export;
JS_ROOT(writer_proto, JS_NewObject(js));
JS_SetPropertyFunctionList(js, writer_proto.val, js_writer_funcs, sizeof(js_writer_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_writer_class_id, writer_proto.val);
JS_ROOT(export, JS_NewObject(js));
JS_SetPropertyFunctionList(js, export.val, js_miniz_funcs, sizeof(js_miniz_funcs)/sizeof(JSCFunctionListEntry));
JS_RETURN(export.val);
}

167
bench.ce
View File

@@ -8,7 +8,7 @@ var os = use('os')
var testlib = use('internal/testlib')
var math = use('math/radians')
if (!args) args = []
var _args = args == null ? [] : args
var target_pkg = null // null = current package
var target_bench = null // null = all benchmarks, otherwise specific bench file
@@ -55,14 +55,19 @@ function stddev(arr, mean_val) {
function percentile(arr, p) {
if (length(arr) == 0) return 0
var sorted = sort(arr)
var idx = floor(arr) * p / 100
var idx = floor(length(arr) * p / 100)
if (idx >= length(arr)) idx = length(arr) - 1
return sorted[idx]
}
// Parse arguments similar to test.ce
function parse_args() {
if (length(args) == 0) {
var name = null
var lock = null
var resolved = null
var bench_path = null
if (length(_args) == 0) {
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
@@ -71,7 +76,7 @@ function parse_args() {
return true
}
if (args[0] == 'all') {
if (_args[0] == 'all') {
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
@@ -80,28 +85,28 @@ function parse_args() {
return true
}
if (args[0] == 'package') {
if (length(args) < 2) {
if (_args[0] == 'package') {
if (length(_args) < 2) {
log.console('Usage: cell bench package <name> [bench]')
log.console(' cell bench package all')
return false
}
if (args[1] == 'all') {
if (_args[1] == 'all') {
all_pkgs = true
log.console('Benchmarking all packages...')
return true
}
var name = args[1]
var lock = shop.load_lock()
name = _args[1]
lock = shop.load_lock()
if (lock[name]) {
target_pkg = name
} else if (starts_with(name, '/') && testlib.is_valid_package(name)) {
target_pkg = name
} else {
if (testlib.is_valid_package('.')) {
var resolved = pkg.alias_to_package(null, name)
resolved = pkg.alias_to_package(null, name)
if (resolved) {
target_pkg = resolved
} else {
@@ -114,8 +119,8 @@ function parse_args() {
}
}
if (length(args) >= 3) {
target_bench = args[2]
if (length(_args) >= 3) {
target_bench = _args[2]
}
log.console(`Benchmarking package: ${target_pkg}`)
@@ -123,7 +128,7 @@ function parse_args() {
}
// cell bench benches/suite or cell bench <path>
var bench_path = args[0]
bench_path = _args[0]
// Normalize path - add benches/ prefix if not present
if (!starts_with(bench_path, 'benches/') && !starts_with(bench_path, '/')) {
@@ -160,12 +165,15 @@ function collect_benches(package_name, specific_bench) {
var files = pkg.list_files(package_name)
var bench_files = []
arrfor(files, function(f) {
var bench_name = null
var match_name = null
var match_base = null
if (starts_with(f, "benches/") && ends_with(f, ".cm")) {
if (specific_bench) {
var bench_name = text(f, 0, -3)
var match_name = specific_bench
bench_name = text(f, 0, -3)
match_name = specific_bench
if (!starts_with(match_name, 'benches/')) match_name = 'benches/' + match_name
var match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
if (bench_name != match_base) return
}
push(bench_files, f)
@@ -180,24 +188,25 @@ function calibrate_batch_size(bench_fn, is_batch) {
var n = MIN_BATCH_SIZE
var dt = 0
var start = 0
var new_n = 0
var calc = 0
var target_n = 0
// Find a batch size that takes at least MIN_SAMPLE_NS
while (n < MAX_BATCH_SIZE) {
// Ensure n is a valid number before calling
if (!is_number(n) || n < 1) {
n = 1
break
}
var start = os.now()
start = os.now()
bench_fn(n)
dt = os.now() - start
if (dt >= MIN_SAMPLE_NS) break
// Double the batch size
var new_n = n * 2
// Check if multiplication produced a valid number
new_n = n * 2
if (!is_number(new_n) || new_n > MAX_BATCH_SIZE) {
n = MAX_BATCH_SIZE
break
@@ -207,10 +216,9 @@ function calibrate_batch_size(bench_fn, is_batch) {
// Adjust to target sample duration
if (dt > 0 && dt < TARGET_SAMPLE_NS && is_number(n) && is_number(dt)) {
var calc = n * TARGET_SAMPLE_NS / dt
calc = n * TARGET_SAMPLE_NS / dt
if (is_number(calc) && calc > 0) {
var target_n = floor(calc)
// Check if floor returned a valid number
target_n = floor(calc)
if (is_number(target_n) && target_n > 0) {
if (target_n > MAX_BATCH_SIZE) target_n = MAX_BATCH_SIZE
if (target_n < MIN_BATCH_SIZE) target_n = MIN_BATCH_SIZE
@@ -219,7 +227,6 @@ function calibrate_batch_size(bench_fn, is_batch) {
}
}
// Safety check - ensure we always return a valid batch size
if (!is_number(n) || n < 1) {
n = 1
}
@@ -230,72 +237,70 @@ function calibrate_batch_size(bench_fn, is_batch) {
// Run a single benchmark function
function run_single_bench(bench_fn, bench_name) {
var timings_per_op = []
// Detect benchmark format:
// 1. Object with { setup, run, teardown } - structured format
// 2. Function that accepts (n) - batch format
// 3. Function that accepts () - legacy format
var is_structured = is_object(bench_fn) && bench_fn.run
var is_batch = false
var batch_size = 1
var setup_fn = null
var run_fn = null
var teardown_fn = null
var calibrate_fn = null
var _detect = null
var i = 0
var state = null
var start = 0
var duration = 0
var ns_per_op = 0
if (is_structured) {
setup_fn = bench_fn.setup || function() { return null }
run_fn = bench_fn.run
teardown_fn = bench_fn.teardown || function(state) {}
teardown_fn = bench_fn.teardown || function(s) {}
// Check if run function accepts batch size
try {
_detect = function() {
var test_state = setup_fn()
run_fn(1, test_state)
is_batch = true
if (teardown_fn) teardown_fn(test_state)
} catch (e) {
} disruption {
is_batch = false
}
_detect()
// Create wrapper for calibration
var calibrate_fn = function(n) {
var state = setup_fn()
run_fn(n, state)
if (teardown_fn) teardown_fn(state)
calibrate_fn = function(n) {
var s = setup_fn()
run_fn(n, s)
if (teardown_fn) teardown_fn(s)
}
batch_size = calibrate_batch_size(calibrate_fn, is_batch)
// Safety check for structured benchmarks
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
} else {
// Simple function format
try {
_detect = function() {
bench_fn(1)
is_batch = true
} catch (e) {
} disruption {
is_batch = false
}
_detect()
batch_size = calibrate_batch_size(bench_fn, is_batch)
}
// Safety check - ensure batch_size is valid
if (!batch_size || batch_size < 1) {
batch_size = 1
}
// Warmup phase
for (var i = 0; i < WARMUP_BATCHES; i++) {
// Ensure batch_size is valid before warmup
for (i = 0; i < WARMUP_BATCHES; i++) {
if (!is_number(batch_size) || batch_size < 1) {
var type_str = is_null(batch_size) ? 'null' : is_number(batch_size) ? 'number' : is_text(batch_size) ? 'text' : is_object(batch_size) ? 'object' : is_array(batch_size) ? 'array' : is_function(batch_size) ? 'function' : is_logical(batch_size) ? 'logical' : 'unknown'
log.console(`WARNING: batch_size became ${type_str} = ${batch_size}, resetting to 1`)
batch_size = 1
}
if (is_structured) {
var state = setup_fn()
state = setup_fn()
if (is_batch) {
run_fn(batch_size, state)
} else {
@@ -312,35 +317,34 @@ function run_single_bench(bench_fn, bench_name) {
}
// Measurement phase - collect SAMPLES timing samples
for (var i = 0; i < SAMPLES; i++) {
// Double-check batch_size is valid (should never happen, but defensive)
for (i = 0; i < SAMPLES; i++) {
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
if (is_structured) {
var state = setup_fn()
var start = os.now()
state = setup_fn()
start = os.now()
if (is_batch) {
run_fn(batch_size, state)
} else {
run_fn(state)
}
var duration = os.now() - start
duration = os.now() - start
if (teardown_fn) teardown_fn(state)
var ns_per_op = is_batch ? duration / batch_size : duration
ns_per_op = is_batch ? duration / batch_size : duration
push(timings_per_op, ns_per_op)
} else {
var start = os.now()
start = os.now()
if (is_batch) {
bench_fn(batch_size)
} else {
bench_fn()
}
var duration = os.now() - start
duration = os.now() - start
var ns_per_op = is_batch ? duration / batch_size : duration
ns_per_op = is_batch ? duration / batch_size : duration
push(timings_per_op, ns_per_op)
}
}
@@ -354,7 +358,6 @@ function run_single_bench(bench_fn, bench_name) {
var p95_ns = percentile(timings_per_op, 95)
var p99_ns = percentile(timings_per_op, 99)
// Calculate ops/s from median
var ops_per_sec = 0
if (median_ns > 0) {
ops_per_sec = floor(1000000000 / median_ns)
@@ -408,18 +411,21 @@ function run_benchmarks(package_name, specific_bench) {
arrfor(bench_files, function(f) {
var mod_path = text(f, 0, -3)
var load_error = false
var bench_mod = null
var use_pkg = null
var benches = []
var error_result = null
var file_result = {
name: f,
benchmarks: []
}
try {
var bench_mod
var use_pkg = package_name ? package_name : fd.realpath('.')
var _load_file = function() {
use_pkg = package_name ? package_name : fd.realpath('.')
bench_mod = shop.use(mod_path, use_pkg)
var benches = []
if (is_function(bench_mod)) {
push(benches, {name: 'main', fn: bench_mod})
} else if (is_object(bench_mod)) {
@@ -432,8 +438,11 @@ function run_benchmarks(package_name, specific_bench) {
if (length(benches) > 0) {
log.console(` ${f}`)
arrfor(benches, function(b) {
try {
var result = run_single_bench(b.fn, b.name)
var bench_error = false
var result = null
var _run_bench = function() {
result = run_single_bench(b.fn, b.name)
result.package = pkg_result.package
push(file_result.benchmarks, result)
pkg_result.total++
@@ -444,25 +453,32 @@ function run_benchmarks(package_name, specific_bench) {
if (result.batch_size > 1) {
log.console(` batch: ${result.batch_size} samples: ${result.samples}`)
}
} catch (e) {
log.console(` ERROR ${b.name}: ${e}`)
log.error(e)
var error_result = {
} disruption {
bench_error = true
}
_run_bench()
if (bench_error) {
log.console(` ERROR ${b.name}`)
error_result = {
package: pkg_result.package,
name: b.name,
error: e.toString()
error: "benchmark disrupted"
}
push(file_result.benchmarks, error_result)
pkg_result.total++
}
})
}
} catch (e) {
log.console(` Error loading ${f}: ${e}`)
var error_result = {
} disruption {
load_error = true
}
_load_file()
if (load_error) {
log.console(` Error loading ${f}`)
error_result = {
package: pkg_result.package,
name: "load_module",
error: `Error loading module: ${e}`
error: "error loading module"
}
push(file_result.benchmarks, error_result)
pkg_result.total++
@@ -478,15 +494,16 @@ function run_benchmarks(package_name, specific_bench) {
// Run all benchmarks
var all_results = []
var packages = null
if (all_pkgs) {
if (testlib.is_valid_package('.')) {
push(all_results, run_benchmarks(null, null))
}
var packages = shop.list_packages()
arrfor(packages, function(pkg) {
push(all_results, run_benchmarks(pkg, null))
packages = shop.list_packages()
arrfor(packages, function(p) {
push(all_results, run_benchmarks(p, null))
})
} else {
push(all_results, run_benchmarks(target_pkg, target_bench))

183
bench_native.ce Normal file
View File

@@ -0,0 +1,183 @@
// bench_native.ce — compare VM vs native execution speed
//
// Usage:
// cell --dev bench_native.ce <module.cm> [iterations]
//
// Compiles (if needed) and benchmarks a module via both VM and native dylib.
// Reports median/mean timing per benchmark + speedup ratio.
var os = use('os')
var fd = use('fd')
if (length(args) < 1) {
print('usage: cell --dev bench_native.ce <module.cm> [iterations]')
return
}
var file = args[0]
var name = file
if (ends_with(name, '.cm')) {
name = text(name, 0, length(name) - 3)
}
var iterations = 11
if (length(args) > 1) {
iterations = number(args[1])
}
def WARMUP = 3
var safe = replace(replace(name, '/', '_'), '-', '_')
var symbol = 'js_' + safe + '_use'
var dylib_path = './' + file + '.dylib'
// --- Statistics ---
var stat_sort = function(arr) {
return sort(arr)
}
var stat_median = function(arr) {
if (length(arr) == 0) return 0
var sorted = stat_sort(arr)
var mid = floor(length(arr) / 2)
if (length(arr) % 2 == 0) {
return (sorted[mid - 1] + sorted[mid]) / 2
}
return sorted[mid]
}
var stat_mean = function(arr) {
if (length(arr) == 0) return 0
var sum = reduce(arr, function(a, b) { return a + b })
return sum / length(arr)
}
var format_ns = function(ns) {
if (ns < 1000) return text(round(ns)) + 'ns'
if (ns < 1000000) return text(round(ns / 1000 * 100) / 100) + 'us'
if (ns < 1000000000) return text(round(ns / 1000000 * 100) / 100) + 'ms'
return text(round(ns / 1000000000 * 100) / 100) + 's'
}
// --- Collect benchmarks from module ---
var collect_benches = function(mod) {
var benches = []
if (is_function(mod)) {
push(benches, {name: 'main', fn: mod})
} else if (is_object(mod)) {
var keys = array(mod)
var i = 0
while (i < length(keys)) {
var k = keys[i]
if (is_function(mod[k])) {
push(benches, {name: k, fn: mod[k]})
}
i = i + 1
}
}
return benches
}
// --- Run one benchmark function ---
var run_bench = function(fn, label) {
var samples = []
var i = 0
var t1 = 0
var t2 = 0
// warmup
i = 0
while (i < WARMUP) {
fn(1)
i = i + 1
}
// collect samples
i = 0
while (i < iterations) {
t1 = os.now()
fn(1)
t2 = os.now()
push(samples, t2 - t1)
i = i + 1
}
return {
label: label,
median: stat_median(samples),
mean: stat_mean(samples)
}
}
// --- Load VM module ---
print('loading VM module: ' + file)
var vm_mod = use(name)
var vm_benches = collect_benches(vm_mod)
if (length(vm_benches) == 0) {
print('no benchmarkable functions found in ' + file)
return
}
// --- Load native module ---
var native_mod = null
var native_benches = []
var has_native = fd.is_file(dylib_path)
if (has_native) {
print('loading native module: ' + dylib_path)
var lib = os.dylib_open(dylib_path)
native_mod = os.dylib_symbol(lib, symbol)
native_benches = collect_benches(native_mod)
} else {
print('no ' + dylib_path + ' found -- VM-only benchmarking')
print(' hint: cell --dev compile.ce ' + file)
}
// --- Run benchmarks ---
print('')
print('samples: ' + text(iterations) + ' (warmup: ' + text(WARMUP) + ')')
print('')
var pad = function(s, n) {
while (length(s) < n) s = s + ' '
return s
}
var i = 0
while (i < length(vm_benches)) {
var b = vm_benches[i]
var vm_result = run_bench(b.fn, 'vm')
print(pad(b.name, 20) + ' VM: ' + pad(format_ns(vm_result.median), 12) + ' (median) ' + format_ns(vm_result.mean) + ' (mean)')
// find matching native bench
var j = 0
var found = false
while (j < length(native_benches)) {
if (native_benches[j].name == b.name) {
var nat_result = run_bench(native_benches[j].fn, 'native')
print(pad('', 20) + ' NT: ' + pad(format_ns(nat_result.median), 12) + ' (median) ' + format_ns(nat_result.mean) + ' (mean)')
if (nat_result.median > 0) {
var speedup = vm_result.median / nat_result.median
print(pad('', 20) + ' speedup: ' + text(round(speedup * 100) / 100) + 'x')
}
found = true
}
j = j + 1
}
if (has_native && !found) {
print(pad('', 20) + ' NT: (no matching function)')
}
print('')
i = i + 1
}

232
benches/actor_patterns.cm Normal file
View File

@@ -0,0 +1,232 @@
// actor_patterns.cm — Actor concurrency benchmarks
// Message passing, fan-out/fan-in, mailbox throughput.
// These use structured benchmarks with setup/run/teardown.
// Note: actor benchmarks are measured differently from pure compute.
// Each iteration sends messages and waits for results, so they're
// inherently slower but test real concurrency costs.
// Simple ping-pong: two actors sending messages back and forth
// Since we can't create real actors from a module, we simulate
// the message-passing patterns with function call overhead that
// mirrors what the actor dispatch does.
// Simulate message dispatch overhead
function make_mailbox() {
return {
queue: [],
delivered: 0
}
}
function send(mailbox, msg) {
push(mailbox.queue, msg)
return null
}
function receive(mailbox) {
if (length(mailbox.queue) == 0) return null
mailbox.delivered++
return pop(mailbox.queue)
}
function drain(mailbox) {
var count = 0
while (length(mailbox.queue) > 0) {
pop(mailbox.queue)
count++
}
return count
}
// Ping-pong: simulate two actors exchanging messages
function ping_pong(rounds) {
var box_a = make_mailbox()
var box_b = make_mailbox()
var i = 0
var msg = null
send(box_a, {type: "ping", val: 0})
for (i = 0; i < rounds; i++) {
// A receives and sends to B
msg = receive(box_a)
if (msg) {
send(box_b, {type: "pong", val: msg.val + 1})
}
// B receives and sends to A
msg = receive(box_b)
if (msg) {
send(box_a, {type: "ping", val: msg.val + 1})
}
}
return box_a.delivered + box_b.delivered
}
// Fan-out: one sender, N receivers
function fan_out(n_receivers, messages_per) {
var receivers = []
var i = 0
var j = 0
for (i = 0; i < n_receivers; i++) {
push(receivers, make_mailbox())
}
// Send messages to all receivers
for (j = 0; j < messages_per; j++) {
for (i = 0; i < n_receivers; i++) {
send(receivers[i], {seq: j, data: j * 17})
}
}
// All receivers drain
var total = 0
for (i = 0; i < n_receivers; i++) {
total += drain(receivers[i])
}
return total
}
// Fan-in: N senders, one receiver
function fan_in(n_senders, messages_per) {
var inbox = make_mailbox()
var i = 0
var j = 0
// Each sender sends messages
for (i = 0; i < n_senders; i++) {
for (j = 0; j < messages_per; j++) {
send(inbox, {sender: i, seq: j, data: i * 100 + j})
}
}
// Receiver processes all
var total = 0
var msg = null
msg = receive(inbox)
while (msg) {
total += msg.data
msg = receive(inbox)
}
return total
}
// Pipeline: chain of processors
function pipeline(stages, items) {
var boxes = []
var i = 0
var j = 0
var msg = null
for (i = 0; i <= stages; i++) {
push(boxes, make_mailbox())
}
// Feed input
for (i = 0; i < items; i++) {
send(boxes[0], {val: i})
}
// Process each stage
for (j = 0; j < stages; j++) {
msg = receive(boxes[j])
while (msg) {
send(boxes[j + 1], {val: msg.val * 2 + 1})
msg = receive(boxes[j])
}
}
// Drain output
var total = 0
msg = receive(boxes[stages])
while (msg) {
total += msg.val
msg = receive(boxes[stages])
}
return total
}
// Request-response pattern (simulate RPC)
function request_response(n_requests) {
var client_box = make_mailbox()
var server_box = make_mailbox()
var i = 0
var req = null
var resp = null
var total = 0
for (i = 0; i < n_requests; i++) {
// Client sends request
send(server_box, {id: i, payload: i * 3, reply_to: client_box})
// Server processes
req = receive(server_box)
if (req) {
send(req.reply_to, {id: req.id, result: req.payload * 2 + 1})
}
// Client receives response
resp = receive(client_box)
if (resp) {
total += resp.result
}
}
return total
}
return {
// Ping-pong: 10K rounds
ping_pong_10k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += ping_pong(10000)
}
return x
},
// Fan-out: 100 receivers, 100 messages each
fan_out_100x100: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += fan_out(100, 100)
}
return x
},
// Fan-in: 100 senders, 100 messages each
fan_in_100x100: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += fan_in(100, 100)
}
return x
},
// Pipeline: 10 stages, 1000 items
pipeline_10x1k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += pipeline(10, 1000)
}
return x
},
// Request-response: 5K requests
rpc_5k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += request_response(5000)
}
return x
}
}

141
benches/cli_tool.cm Normal file
View File

@@ -0,0 +1,141 @@
// cli_tool.cm — CLI tool simulation (macro benchmark)
// Parse args + process data + transform + format output.
// Simulates a realistic small utility program.
var json = use('json')
// Generate fake records
function generate_records(n) {
var records = []
var x = 42
var i = 0
var status_vals = ["active", "inactive", "pending", "archived"]
var dept_vals = ["eng", "sales", "ops", "hr", "marketing"]
for (i = 0; i < n; i++) {
x = ((x * 1103515245 + 12345) & 0x7FFFFFFF) | 0
push(records, {
id: i + 1,
name: `user_${i}`,
score: (x % 1000) / 10,
status: status_vals[i % 4],
department: dept_vals[i % 5]
})
}
return records
}
// Filter records by field value
function filter_records(records, field, value) {
var result = []
var i = 0
for (i = 0; i < length(records); i++) {
if (records[i][field] == value) {
push(result, records[i])
}
}
return result
}
// Group by a field
function group_by(records, field) {
var groups = {}
var i = 0
var key = null
for (i = 0; i < length(records); i++) {
key = records[i][field]
if (!key) key = "unknown"
if (!groups[key]) groups[key] = []
push(groups[key], records[i])
}
return groups
}
// Aggregate: compute stats per group
function aggregate(groups) {
var keys = array(groups)
var result = []
var i = 0
var j = 0
var grp = null
var total = 0
var mn = 0
var mx = 0
for (i = 0; i < length(keys); i++) {
grp = groups[keys[i]]
total = 0
mn = 999999
mx = 0
for (j = 0; j < length(grp); j++) {
total += grp[j].score
if (grp[j].score < mn) mn = grp[j].score
if (grp[j].score > mx) mx = grp[j].score
}
push(result, {
group: keys[i],
count: length(grp),
average: total / length(grp),
low: mn,
high: mx
})
}
return result
}
// Full pipeline: load → filter → sort → group → aggregate → encode
function run_pipeline(n_records) {
// Generate data
var records = generate_records(n_records)
// Filter to active records
var filtered = filter_records(records, "status", "active")
// Sort by score
filtered = sort(filtered, "score")
// Limit to first 50
if (length(filtered) > 50) {
filtered = array(filtered, 0, 50)
}
// Group and aggregate
var groups = group_by(filtered, "department")
var stats = aggregate(groups)
stats = sort(stats, "average")
// Encode as JSON
var output = json.encode(stats)
return length(output)
}
return {
// Small dataset (100 records)
cli_pipeline_100: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += run_pipeline(100)
}
return x
},
// Medium dataset (1000 records)
cli_pipeline_1k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += run_pipeline(1000)
}
return x
},
// Large dataset (10K records)
cli_pipeline_10k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += run_pipeline(10000)
}
return x
}
}

162
benches/deltablue.cm Normal file
View File

@@ -0,0 +1,162 @@
// deltablue.cm — Constraint solver kernel (DeltaBlue-inspired)
// Dynamic dispatch, pointer chasing, object-heavy workload.
def REQUIRED = 0
def STRONG = 1
def NORMAL = 2
def WEAK = 3
def WEAKEST = 4
function make_variable(name, value) {
return {
name: name,
value: value,
constraints: [],
determined_by: null,
stay: true,
mark: 0
}
}
function make_constraint(strength, variables, satisfy_fn) {
return {
strength: strength,
variables: variables,
satisfy: satisfy_fn,
output: null
}
}
// Constraint propagation: simple forward solver
function propagate(vars, constraints) {
var changed = true
var passes = 0
var max_passes = length(constraints) * 3
var i = 0
var c = null
var old_val = 0
while (changed && passes < max_passes) {
changed = false
passes++
for (i = 0; i < length(constraints); i++) {
c = constraints[i]
old_val = c.output ? c.output.value : null
c.satisfy(c)
if (c.output && c.output.value != old_val) {
changed = true
}
}
}
return passes
}
// Build a chain of equality constraints: v[i] = v[i-1] + 1
function build_chain(n) {
var vars = []
var constraints = []
var i = 0
for (i = 0; i < n; i++) {
push(vars, make_variable(`v${i}`, 0))
}
// Set first variable
vars[0].value = 1
var c = null
for (i = 1; i < n; i++) {
c = make_constraint(NORMAL, [vars[i - 1], vars[i]], function(self) {
self.variables[1].value = self.variables[0].value + 1
self.output = self.variables[1]
})
push(constraints, c)
push(vars[i].constraints, c)
}
return {vars: vars, constraints: constraints}
}
// Build a projection: pairs of variables with scaling constraints
function build_projection(n) {
var src = []
var dst = []
var constraints = []
var i = 0
for (i = 0; i < n; i++) {
push(src, make_variable(`src${i}`, i * 10))
push(dst, make_variable(`dst${i}`, 0))
}
var scale_c = null
for (i = 0; i < n; i++) {
scale_c = make_constraint(STRONG, [src[i], dst[i]], function(self) {
self.variables[1].value = self.variables[0].value * 2 + 1
self.output = self.variables[1]
})
push(constraints, scale_c)
push(dst[i].constraints, scale_c)
}
return {src: src, dst: dst, constraints: constraints}
}
// Edit constraint: change a source, re-propagate
function run_edits(system, edits) {
var i = 0
var total_passes = 0
for (i = 0; i < edits; i++) {
system.vars[0].value = i
total_passes += propagate(system.vars, system.constraints)
}
return total_passes
}
return {
// Chain of 100 variables, propagate
chain_100: function(n) {
var i = 0
var chain = null
var x = 0
for (i = 0; i < n; i++) {
chain = build_chain(100)
x += propagate(chain.vars, chain.constraints)
}
return x
},
// Chain of 500 variables, propagate
chain_500: function(n) {
var i = 0
var chain = null
var x = 0
for (i = 0; i < n; i++) {
chain = build_chain(500)
x += propagate(chain.vars, chain.constraints)
}
return x
},
// Projection of 100 pairs
projection_100: function(n) {
var i = 0
var proj = null
var x = 0
for (i = 0; i < n; i++) {
proj = build_projection(100)
x += propagate(proj.src, proj.constraints)
}
return x
},
// Edit and re-propagate (incremental update)
chain_edit_100: function(n) {
var chain = build_chain(100)
var i = 0
var x = 0
for (i = 0; i < n; i++) {
chain.vars[0].value = i
x += propagate(chain.vars, chain.constraints)
}
return x
}
}

126
benches/fibonacci.cm Normal file
View File

@@ -0,0 +1,126 @@
// fibonacci.cm — Fibonacci variants kernel
// Tests recursion overhead, memoization patterns, iteration vs recursion.
// Naive recursive (exponential) — measures call overhead
function fib_naive(n) {
if (n <= 1) return n
return fib_naive(n - 1) + fib_naive(n - 2)
}
// Iterative (linear)
function fib_iter(n) {
var a = 0
var b = 1
var i = 0
var tmp = 0
for (i = 0; i < n; i++) {
tmp = a + b
a = b
b = tmp
}
return a
}
// Memoized recursive (tests object property lookup + recursion)
function make_memo_fib() {
var cache = {}
var fib = function(n) {
var key = text(n)
if (cache[key]) return cache[key]
var result = null
if (n <= 1) {
result = n
} else {
result = fib(n - 1) + fib(n - 2)
}
cache[key] = result
return result
}
return fib
}
// CPS (continuation passing style) — tests closure creation
function fib_cps(n, cont) {
if (n <= 1) return cont(n)
return fib_cps(n - 1, function(a) {
return fib_cps(n - 2, function(b) {
return cont(a + b)
})
})
}
// Matrix exponentiation style (accumulator)
function fib_matrix(n) {
var a = 1
var b = 0
var c = 0
var d = 1
var ta = 0
var tb = 0
var m = n
while (m > 0) {
if (m % 2 == 1) {
ta = a * d + b * c // wrong but stresses numeric ops
tb = b * d + a * c
a = ta
b = tb
}
ta = c * c + d * d
tb = d * (2 * c + d)
c = ta
d = tb
m = floor(m / 2)
}
return b
}
return {
fib_naive_25: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_naive(25)
return x
},
fib_naive_30: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_naive(30)
return x
},
fib_iter_80: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_iter(80)
return x
},
fib_memo_100: function(n) {
var i = 0
var x = 0
var fib = null
for (i = 0; i < n; i++) {
fib = make_memo_fib()
x += fib(100)
}
return x
},
fib_cps_20: function(n) {
var i = 0
var x = 0
var identity = function(v) { return v }
for (i = 0; i < n; i++) {
x += fib_cps(20, identity)
}
return x
},
fib_matrix_80: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_matrix(80)
return x
}
}

159
benches/hash_workload.cm Normal file
View File

@@ -0,0 +1,159 @@
// hash_workload.cm — Hash-heavy / word-count / map-reduce kernel
// Stresses record (object) creation, property access, and string handling.
function make_words(count) {
// Generate a repeating word list to simulate text processing
var base_words = [
"the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
"and", "cat", "sat", "on", "mat", "with", "hat", "bat",
"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
"hello", "world", "foo", "bar", "baz", "qux", "quux", "corge"
]
var words = []
var i = 0
for (i = 0; i < count; i++) {
push(words, base_words[i % length(base_words)])
}
return words
}
// Word frequency count
function word_count(words) {
var freq = {}
var i = 0
var w = null
for (i = 0; i < length(words); i++) {
w = words[i]
if (freq[w]) {
freq[w] = freq[w] + 1
} else {
freq[w] = 1
}
}
return freq
}
// Find top-N words by frequency
function top_n(freq, n) {
var keys = array(freq)
var pairs = []
var i = 0
for (i = 0; i < length(keys); i++) {
push(pairs, {word: keys[i], count: freq[keys[i]]})
}
var sorted = sort(pairs, "count")
// Return last N (highest counts)
var result = []
var start = length(sorted) - n
if (start < 0) start = 0
for (i = start; i < length(sorted); i++) {
push(result, sorted[i])
}
return result
}
// Histogram: group words by length
function group_by_length(words) {
var groups = {}
var i = 0
var w = null
var k = null
for (i = 0; i < length(words); i++) {
w = words[i]
k = text(length(w))
if (!groups[k]) groups[k] = []
push(groups[k], w)
}
return groups
}
// Simple hash table with chaining (stress property access patterns)
function hash_table_ops(n) {
var table = {}
var i = 0
var k = null
var collisions = 0
// Insert phase
for (i = 0; i < n; i++) {
k = `key_${i % 512}`
if (table[k]) collisions++
table[k] = i
}
// Lookup phase
var found = 0
for (i = 0; i < n; i++) {
k = `key_${i % 512}`
if (table[k]) found++
}
// Delete phase
var deleted = 0
for (i = 0; i < n; i += 3) {
k = `key_${i % 512}`
if (table[k]) {
delete table[k]
deleted++
}
}
return found - deleted + collisions
}
var words_1k = make_words(1000)
var words_10k = make_words(10000)
return {
// Word count on 1K words
wordcount_1k: function(n) {
var i = 0
var freq = null
for (i = 0; i < n; i++) {
freq = word_count(words_1k)
}
return freq
},
// Word count on 10K words
wordcount_10k: function(n) {
var i = 0
var freq = null
for (i = 0; i < n; i++) {
freq = word_count(words_10k)
}
return freq
},
// Word count + top-10 extraction
wordcount_top10: function(n) {
var i = 0
var freq = null
var top = null
for (i = 0; i < n; i++) {
freq = word_count(words_10k)
top = top_n(freq, 10)
}
return top
},
// Group words by length
group_by_len: function(n) {
var i = 0
var groups = null
for (i = 0; i < n; i++) {
groups = group_by_length(words_10k)
}
return groups
},
// Hash table insert/lookup/delete
hash_table: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += hash_table_ops(2048)
}
return x
}
}

167
benches/json_walk.cm Normal file
View File

@@ -0,0 +1,167 @@
// json_walk.cm — JSON parse + walk + serialize kernel
// Stresses strings, records, arrays, and recursive traversal.
var json = use('json')
function make_nested_object(depth, breadth) {
var obj = {}
var i = 0
var k = null
if (depth <= 0) {
for (i = 0; i < breadth; i++) {
k = `key_${i}`
obj[k] = i * 3.14
}
return obj
}
for (i = 0; i < breadth; i++) {
k = `node_${i}`
obj[k] = make_nested_object(depth - 1, breadth)
}
obj.value = depth
obj.name = `level_${depth}`
return obj
}
function make_array_data(size) {
var arr = []
var i = 0
for (i = 0; i < size; i++) {
push(arr, {
id: i,
name: `item_${i}`,
active: i % 2 == 0,
score: i * 1.5,
tags: [`tag_${i % 5}`, `tag_${(i + 1) % 5}`]
})
}
return arr
}
// Walk an object tree, counting nodes
function walk_count(obj) {
var count = 1
var keys = null
var i = 0
var v = null
if (is_object(obj)) {
keys = array(obj)
for (i = 0; i < length(keys); i++) {
v = obj[keys[i]]
if (is_object(v) || is_array(v)) {
count += walk_count(v)
}
}
} else if (is_array(obj)) {
for (i = 0; i < length(obj); i++) {
v = obj[i]
if (is_object(v) || is_array(v)) {
count += walk_count(v)
}
}
}
return count
}
// Walk and extract all numbers
function walk_sum(obj) {
var sum = 0
var keys = null
var i = 0
var v = null
if (is_object(obj)) {
keys = array(obj)
for (i = 0; i < length(keys); i++) {
v = obj[keys[i]]
if (is_number(v)) {
sum += v
} else if (is_object(v) || is_array(v)) {
sum += walk_sum(v)
}
}
} else if (is_array(obj)) {
for (i = 0; i < length(obj); i++) {
v = obj[i]
if (is_number(v)) {
sum += v
} else if (is_object(v) || is_array(v)) {
sum += walk_sum(v)
}
}
}
return sum
}
// Pre-build test data strings
var nested_obj = make_nested_object(3, 4)
var nested_json = json.encode(nested_obj)
var array_data = make_array_data(200)
var array_json = json.encode(array_data)
return {
// Parse nested JSON
json_parse_nested: function(n) {
var i = 0
var obj = null
for (i = 0; i < n; i++) {
obj = json.decode(nested_json)
}
return obj
},
// Parse array-of-records JSON
json_parse_array: function(n) {
var i = 0
var arr = null
for (i = 0; i < n; i++) {
arr = json.decode(array_json)
}
return arr
},
// Encode nested object to JSON
json_encode_nested: function(n) {
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = json.encode(nested_obj)
}
return s
},
// Encode array to JSON
json_encode_array: function(n) {
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = json.encode(array_data)
}
return s
},
// Parse + walk + count
json_roundtrip_walk: function(n) {
var i = 0
var obj = null
var count = 0
for (i = 0; i < n; i++) {
obj = json.decode(nested_json)
count += walk_count(obj)
}
return count
},
// Parse + sum all numbers + re-encode
json_roundtrip_full: function(n) {
var i = 0
var obj = null
var sum = 0
var out = null
for (i = 0; i < n; i++) {
obj = json.decode(array_json)
sum += walk_sum(obj)
out = json.encode(obj)
}
return sum
}
}

View File

@@ -1,24 +1,24 @@
// micro_ops.bench.ce (or .cm depending on your convention)
// micro_ops.cm — microbenchmarks for core operations
// Note: We use a function-local sink in each benchmark to avoid cross-contamination
function blackhole(sink, x) {
// Prevent dead-code elimination
return (sink + (x | 0)) | 0
}
function make_obj_xy(x, y) {
return { x, y }
return {x: x, y: y}
}
function make_obj_yx(x, y) {
// Different insertion order to force a different shape in many engines
return { y, x }
// Different insertion order to force a different shape
return {y: y, x: x}
}
function make_shapes(n) {
var out = []
for (var i = 0; i < n; i++) {
var o = { a: i }
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = {a: i}
o[`p${i}`] = i
push(out, o)
}
@@ -27,13 +27,15 @@ function make_shapes(n) {
function make_packed_array(n) {
var a = []
for (var i = 0; i < n; i++) push(a, i)
var i = 0
for (i = 0; i < n; i++) push(a, i)
return a
}
function make_holey_array(n) {
var a = []
for (var i = 0; i < n; i += 2) a[i] = i
var i = 0
for (i = 0; i < n; i += 2) a[i] = i
return a
}
@@ -41,7 +43,8 @@ return {
// 0) Baseline loop cost
loop_empty: function(n) {
var sink = 0
for (var i = 0; i < n; i++) {}
var i = 0
for (i = 0; i < n; i++) {}
return blackhole(sink, n)
},
@@ -49,35 +52,40 @@ return {
i32_add: function(n) {
var sink = 0
var x = 1
for (var i = 0; i < n; i++) x = (x + 3) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + 3) | 0
return blackhole(sink, x)
},
f64_add: function(n) {
var sink = 0
var x = 1.0
for (var i = 0; i < n; i++) x = x + 3.14159
var i = 0
for (i = 0; i < n; i++) x = x + 3.14159
return blackhole(sink, x | 0)
},
mixed_add: function(n) {
var sink = 0
var x = 1
for (var i = 0; i < n; i++) x = x + 0.25
var i = 0
for (i = 0; i < n; i++) x = x + 0.25
return blackhole(sink, x | 0)
},
bit_ops: function(n) {
var sink = 0
var x = 0x12345678
for (var i = 0; i < n; i++) x = ((x << 5) ^ (x >>> 3)) | 0
var i = 0
for (i = 0; i < n; i++) x = ((x << 5) ^ (x >>> 3)) | 0
return blackhole(sink, x)
},
overflow_path: function(n) {
var sink = 0
var x = 0x70000000
for (var i = 0; i < n; i++) x = (x + 0x10000000) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + 0x10000000) | 0
return blackhole(sink, x)
},
@@ -85,7 +93,8 @@ return {
branch_predictable: function(n) {
var sink = 0
var x = 0
for (var i = 0; i < n; i++) {
var i = 0
for (i = 0; i < n; i++) {
if ((i & 7) != 0) x++
else x += 2
}
@@ -95,7 +104,8 @@ return {
branch_alternating: function(n) {
var sink = 0
var x = 0
for (var i = 0; i < n; i++) {
var i = 0
for (i = 0; i < n; i++) {
if ((i & 1) == 0) x++
else x += 2
}
@@ -105,29 +115,47 @@ return {
// 3) Calls
call_direct: function(n) {
var sink = 0
function f(a) { return (a + 1) | 0 }
var f = function(a) { return (a + 1) | 0 }
var x = 0
for (var i = 0; i < n; i++) x = f(x)
var i = 0
for (i = 0; i < n; i++) x = f(x)
return blackhole(sink, x)
},
call_indirect: function(n) {
var sink = 0
function f(a) { return (a + 1) | 0 }
var f = function(a) { return (a + 1) | 0 }
var g = f
var x = 0
for (var i = 0; i < n; i++) x = g(x)
var i = 0
for (i = 0; i < n; i++) x = g(x)
return blackhole(sink, x)
},
call_closure: function(n) {
var sink = 0
function make_adder(k) {
var make_adder = function(k) {
return function(a) { return (a + k) | 0 }
}
var add3 = make_adder(3)
var x = 0
for (var i = 0; i < n; i++) x = add3(x)
var i = 0
for (i = 0; i < n; i++) x = add3(x)
return blackhole(sink, x)
},
call_multi_arity: function(n) {
var sink = 0
var f0 = function() { return 1 }
var f1 = function(a) { return a + 1 }
var f2 = function(a, b) { return a + b }
var f3 = function(a, b, c) { return a + b + c }
var f4 = function(a, b, c, d) { return a + b + c + d }
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + f0() + f1(i) + f2(i, 1) + f3(i, 1, 2) + f4(i, 1, 2, 3)) | 0
}
return blackhole(sink, x)
},
@@ -136,7 +164,8 @@ return {
var sink = 0
var o = make_obj_xy(1, 2)
var x = 0
for (var i = 0; i < n; i++) x = (x + o.x) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + o.x) | 0
return blackhole(sink, x)
},
@@ -145,20 +174,38 @@ return {
var a = make_obj_xy(1, 2)
var b = make_obj_yx(1, 2)
var x = 0
for (var i = 0; i < n; i++) {
var o = (i & 1) == 0 ? a : b
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = (i & 1) == 0 ? a : b
x = (x + o.x) | 0
}
return blackhole(sink, x)
},
prop_read_poly_4: function(n) {
var sink = 0
var shapes = [
{x: 1, y: 2},
{y: 2, x: 1},
{x: 1, z: 3, y: 2},
{w: 0, x: 1, y: 2}
]
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + shapes[i & 3].x) | 0
}
return blackhole(sink, x)
},
prop_read_mega: function(n) {
var sink = 0
var objs = make_shapes(32)
var x = 0
for (var i = 0; i < n; i++) {
var o = objs[i & 31]
x = (x + o.a) | 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + objs[i & 31].a) | 0
}
return blackhole(sink, x)
},
@@ -166,7 +213,8 @@ return {
prop_write_mono: function(n) {
var sink = 0
var o = make_obj_xy(1, 2)
for (var i = 0; i < n; i++) o.x = (o.x + 1) | 0
var i = 0
for (i = 0; i < n; i++) o.x = (o.x + 1) | 0
return blackhole(sink, o.x)
},
@@ -175,14 +223,16 @@ return {
var sink = 0
var a = make_packed_array(1024)
var x = 0
for (var i = 0; i < n; i++) x = (x + a[i & 1023]) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + a[i & 1023]) | 0
return blackhole(sink, x)
},
array_write_packed: function(n) {
var sink = 0
var a = make_packed_array(1024)
for (var i = 0; i < n; i++) a[i & 1023] = i
var i = 0
for (i = 0; i < n; i++) a[i & 1023] = i
return blackhole(sink, a[17] | 0)
},
@@ -190,9 +240,10 @@ return {
var sink = 0
var a = make_holey_array(2048)
var x = 0
for (var i = 0; i < n; i++) {
var v = a[(i & 2047)]
// If "missing" is a special value in your language, this stresses that path too
var i = 0
var v = null
for (i = 0; i < n; i++) {
v = a[(i & 2047)]
if (v) x = (x + v) | 0
}
return blackhole(sink, x)
@@ -201,21 +252,97 @@ return {
array_push_steady: function(n) {
var sink = 0
var x = 0
for (var j = 0; j < n; j++) {
var a = []
for (var i = 0; i < 256; i++) push(a, i)
var j = 0
var i = 0
var a = null
for (j = 0; j < n; j++) {
a = []
for (i = 0; i < 256; i++) push(a, i)
x = (x + length(a)) | 0
}
return blackhole(sink, x)
},
array_push_pop: function(n) {
var sink = 0
var a = []
var x = 0
var i = 0
var v = 0
for (i = 0; i < n; i++) {
push(a, i)
if (length(a) > 64) {
v = pop(a)
x = (x + v) | 0
}
}
return blackhole(sink, x)
},
array_indexed_sum: function(n) {
var sink = 0
var a = make_packed_array(1024)
var x = 0
var j = 0
var i = 0
for (j = 0; j < n; j++) {
x = 0
for (i = 0; i < 1024; i++) {
x = (x + a[i]) | 0
}
}
return blackhole(sink, x)
},
// 6) Strings
string_concat_small: function(n) {
var sink = 0
var x = 0
for (var j = 0; j < n; j++) {
var s = ""
for (var i = 0; i < 16; i++) s = s + "x"
var j = 0
var i = 0
var s = null
for (j = 0; j < n; j++) {
s = ""
for (i = 0; i < 16; i++) s = s + "x"
x = (x + length(s)) | 0
}
return blackhole(sink, x)
},
string_concat_medium: function(n) {
var sink = 0
var x = 0
var j = 0
var i = 0
var s = null
for (j = 0; j < n; j++) {
s = ""
for (i = 0; i < 100; i++) s = s + "abcdefghij"
x = (x + length(s)) | 0
}
return blackhole(sink, x)
},
string_interpolation: function(n) {
var sink = 0
var x = 0
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = `item_${i}_value_${i * 2}`
x = (x + length(s)) | 0
}
return blackhole(sink, x)
},
string_slice: function(n) {
var sink = 0
var base = "the quick brown fox jumps over the lazy dog"
var x = 0
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = text(base, i % 10, i % 10 + 10)
x = (x + length(s)) | 0
}
return blackhole(sink, x)
@@ -225,8 +352,10 @@ return {
alloc_tiny_objects: function(n) {
var sink = 0
var x = 0
for (var i = 0; i < n; i++) {
var o = { a: i, b: i + 1, c: i + 2 }
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = {a: i, b: i + 1, c: i + 2}
x = (x + o.b) | 0
}
return blackhole(sink, x)
@@ -235,9 +364,12 @@ return {
alloc_linked_list: function(n) {
var sink = 0
var head = null
for (var i = 0; i < n; i++) head = { v: i, next: head }
var i = 0
var x = 0
var p = head
var p = null
for (i = 0; i < n; i++) head = {v: i, next: head}
x = 0
p = head
while (p) {
x = (x + p.v) | 0
p = p.next
@@ -245,18 +377,118 @@ return {
return blackhole(sink, x)
},
// 8) meme-specific (adapt these to your exact semantics)
meme_clone_read: function(n) {
// If meme(obj) clones like Object.create / prototypal clone, this hits it hard.
// Replace with your exact meme call form.
alloc_arrays: function(n) {
var sink = 0
var base = { x: 1, y: 2 }
var x = 0
for (var i = 0; i < n; i++) {
var o = meme(base)
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = [i, i + 1, i + 2, i + 3]
x = (x + a[2]) | 0
}
return blackhole(sink, x)
},
alloc_short_lived: function(n) {
var sink = 0
var x = 0
var i = 0
var o = null
// Allocate objects that immediately become garbage
for (i = 0; i < n; i++) {
o = {val: i, data: {inner: i + 1}}
x = (x + o.data.inner) | 0
}
return blackhole(sink, x)
},
alloc_long_lived_pressure: function(n) {
var sink = 0
var store = []
var x = 0
var i = 0
var o = null
// Keep first 1024 objects alive, churn the rest
for (i = 0; i < n; i++) {
o = {val: i, data: i * 2}
if (i < 1024) {
push(store, o)
}
x = (x + o.data) | 0
}
return blackhole(sink, x)
},
// 8) Meme (prototype clone)
meme_clone_read: function(n) {
var sink = 0
var base = {x: 1, y: 2}
var x = 0
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = meme(base)
x = (x + o.x) | 0
}
return blackhole(sink, x)
},
// 9) Guard / type check paths
guard_hot_number: function(n) {
// Monomorphic number path — guards should hoist
var sink = 0
var x = 1
var i = 0
for (i = 0; i < n; i++) x = x + 1
return blackhole(sink, x | 0)
},
guard_mixed_types: function(n) {
// Alternating number/text — guards must stay
var sink = 0
var vals = [1, "a", 2, "b", 3, "c", 4, "d"]
var x = 0
var i = 0
for (i = 0; i < n; i++) {
if (is_number(vals[i & 7])) x = (x + vals[i & 7]) | 0
}
return blackhole(sink, x)
},
// 10) Reduce / higher-order
reduce_sum: function(n) {
var sink = 0
var a = make_packed_array(256)
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + reduce(a, function(acc, v) { return acc + v }, 0)) | 0
}
return blackhole(sink, x)
},
filter_evens: function(n) {
var sink = 0
var a = make_packed_array(256)
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + length(filter(a, function(v) { return v % 2 == 0 }))) | 0
}
return blackhole(sink, x)
},
arrfor_sum: function(n) {
var sink = 0
var a = make_packed_array(256)
var x = 0
var i = 0
var sum = 0
for (i = 0; i < n; i++) {
sum = 0
arrfor(a, function(v) { sum += v })
x = (x + sum) | 0
}
return blackhole(sink, x)
}
}

249
benches/module_load.cm Normal file
View File

@@ -0,0 +1,249 @@
// module_load.cm — Module loading simulation (macro benchmark)
// Simulates parsing many small modules, linking, and running.
// Tests the "build scenario" pattern.
var json = use('json')
// Simulate a small module: parse token stream + build AST + evaluate
function tokenize(src) {
var tokens = []
var i = 0
var ch = null
var chars = array(src)
var buf = ""
for (i = 0; i < length(chars); i++) {
ch = chars[i]
if (ch == " " || ch == "\n" || ch == "\t") {
if (length(buf) > 0) {
push(tokens, buf)
buf = ""
}
} else if (ch == "(" || ch == ")" || ch == "+" || ch == "-"
|| ch == "*" || ch == "=" || ch == ";" || ch == ",") {
if (length(buf) > 0) {
push(tokens, buf)
buf = ""
}
push(tokens, ch)
} else {
buf = buf + ch
}
}
if (length(buf) > 0) push(tokens, buf)
return tokens
}
// Build a simple AST from tokens
function parse_tokens(tokens) {
var ast = []
var i = 0
var tok = null
var node = null
for (i = 0; i < length(tokens); i++) {
tok = tokens[i]
if (tok == "var" || tok == "def") {
node = {type: "decl", kind: tok, name: null, value: null}
i++
if (i < length(tokens)) node.name = tokens[i]
i++ // skip =
i++
if (i < length(tokens)) node.value = tokens[i]
push(ast, node)
} else if (tok == "return") {
node = {type: "return", value: null}
i++
if (i < length(tokens)) node.value = tokens[i]
push(ast, node)
} else if (tok == "function") {
node = {type: "func", name: null, body: []}
i++
if (i < length(tokens)) node.name = tokens[i]
// Skip to matching )
while (i < length(tokens) && tokens[i] != ")") i++
push(ast, node)
} else {
push(ast, {type: "expr", value: tok})
}
}
return ast
}
// Evaluate: simple symbol table + resolution
function evaluate(ast, env) {
var result = null
var i = 0
var node = null
for (i = 0; i < length(ast); i++) {
node = ast[i]
if (node.type == "decl") {
env[node.name] = node.value
} else if (node.type == "return") {
result = node.value
if (env[result]) result = env[result]
} else if (node.type == "func") {
env[node.name] = node
}
}
return result
}
// Generate fake module source code
function generate_module(id, dep_count) {
var src = ""
var i = 0
src = src + "var _id = " + text(id) + ";\n"
for (i = 0; i < dep_count; i++) {
src = src + "var dep" + text(i) + " = use(mod_" + text(i) + ");\n"
}
src = src + "var x = " + text(id * 17) + ";\n"
src = src + "var y = " + text(id * 31) + ";\n"
src = src + "function compute(a, b) { return a + b; }\n"
src = src + "var result = compute(x, y);\n"
src = src + "return result;\n"
return src
}
// Simulate loading N modules with dependency chains
function simulate_build(n_modules, deps_per_module) {
var modules = []
var loaded = {}
var i = 0
var j = 0
var src = null
var tokens = null
var ast = null
var env = null
var result = null
var total_tokens = 0
var total_nodes = 0
// Generate all module sources
for (i = 0; i < n_modules; i++) {
src = generate_module(i, deps_per_module)
push(modules, src)
}
// "Load" each module: tokenize → parse → evaluate
for (i = 0; i < n_modules; i++) {
tokens = tokenize(modules[i])
total_tokens += length(tokens)
ast = parse_tokens(tokens)
total_nodes += length(ast)
env = {}
// Resolve dependencies
for (j = 0; j < deps_per_module; j++) {
if (j < i) {
env["dep" + text(j)] = loaded["mod_" + text(j)]
}
}
result = evaluate(ast, env)
loaded["mod_" + text(i)] = result
}
return {
modules: n_modules,
total_tokens: total_tokens,
total_nodes: total_nodes,
last_result: result
}
}
// Dependency graph analysis (topological sort simulation)
function topo_sort(n_modules, deps_per_module) {
// Build adjacency list
var adj = {}
var in_degree = {}
var i = 0
var j = 0
var name = null
var dep = null
for (i = 0; i < n_modules; i++) {
name = "mod_" + text(i)
adj[name] = []
in_degree[name] = 0
}
for (i = 0; i < n_modules; i++) {
name = "mod_" + text(i)
for (j = 0; j < deps_per_module; j++) {
if (j < i) {
dep = "mod_" + text(j)
push(adj[dep], name)
in_degree[name] = in_degree[name] + 1
}
}
}
// Kahn's algorithm
var queue = []
var keys = array(in_degree)
for (i = 0; i < length(keys); i++) {
if (in_degree[keys[i]] == 0) push(queue, keys[i])
}
var order = []
var current = null
var neighbors = null
var qi = 0
while (qi < length(queue)) {
current = queue[qi]
qi++
push(order, current)
neighbors = adj[current]
if (neighbors) {
for (i = 0; i < length(neighbors); i++) {
in_degree[neighbors[i]] = in_degree[neighbors[i]] - 1
if (in_degree[neighbors[i]] == 0) push(queue, neighbors[i])
}
}
}
return order
}
return {
// Small build: 50 modules, 3 deps each
build_50: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = simulate_build(50, 3)
}
return result
},
// Medium build: 200 modules, 5 deps each
build_200: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = simulate_build(200, 5)
}
return result
},
// Large build: 500 modules, 5 deps each
build_500: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = simulate_build(500, 5)
}
return result
},
// Topo sort of 500 module dependency graph
topo_sort_500: function(n) {
var i = 0
var order = null
for (i = 0; i < n; i++) {
order = topo_sort(500, 5)
}
return order
}
}

160
benches/nbody.cm Normal file
View File

@@ -0,0 +1,160 @@
// nbody.cm — N-body gravitational simulation kernel
// Pure numeric + allocation workload. Classic VM benchmark.
var math = use('math/radians')
def PI = 3.141592653589793
def SOLAR_MASS = 4 * PI * PI
def DAYS_PER_YEAR = 365.24
function make_system() {
// Sun + 4 Jovian planets
var sun = {x: 0, y: 0, z: 0, vx: 0, vy: 0, vz: 0, mass: SOLAR_MASS}
var jupiter = {
x: 4.84143144246472090,
y: -1.16032004402742839,
z: -0.103622044471123109,
vx: 0.00166007664274403694 * DAYS_PER_YEAR,
vy: 0.00769901118419740425 * DAYS_PER_YEAR,
vz: -0.0000690460016972063023 * DAYS_PER_YEAR,
mass: 0.000954791938424326609 * SOLAR_MASS
}
var saturn = {
x: 8.34336671824457987,
y: 4.12479856412430479,
z: -0.403523417114321381,
vx: -0.00276742510726862411 * DAYS_PER_YEAR,
vy: 0.00499852801234917238 * DAYS_PER_YEAR,
vz: 0.0000230417297573763929 * DAYS_PER_YEAR,
mass: 0.000285885980666130812 * SOLAR_MASS
}
var uranus = {
x: 12.8943695621391310,
y: -15.1111514016986312,
z: -0.223307578892655734,
vx: 0.00296460137564761618 * DAYS_PER_YEAR,
vy: 0.00237847173959480950 * DAYS_PER_YEAR,
vz: -0.0000296589568540237556 * DAYS_PER_YEAR,
mass: 0.0000436624404335156298 * SOLAR_MASS
}
var neptune = {
x: 15.3796971148509165,
y: -25.9193146099879641,
z: 0.179258772950371181,
vx: 0.00268067772490389322 * DAYS_PER_YEAR,
vy: 0.00162824170038242295 * DAYS_PER_YEAR,
vz: -0.0000951592254519715870 * DAYS_PER_YEAR,
mass: 0.0000515138902046611451 * SOLAR_MASS
}
var bodies = [sun, jupiter, saturn, uranus, neptune]
// Offset momentum
var px = 0
var py = 0
var pz = 0
var i = 0
for (i = 0; i < length(bodies); i++) {
px += bodies[i].vx * bodies[i].mass
py += bodies[i].vy * bodies[i].mass
pz += bodies[i].vz * bodies[i].mass
}
sun.vx = -px / SOLAR_MASS
sun.vy = -py / SOLAR_MASS
sun.vz = -pz / SOLAR_MASS
return bodies
}
function advance(bodies, dt) {
var n = length(bodies)
var i = 0
var j = 0
var bi = null
var bj = null
var dx = 0
var dy = 0
var dz = 0
var dist_sq = 0
var dist = 0
var mag = 0
for (i = 0; i < n; i++) {
bi = bodies[i]
for (j = i + 1; j < n; j++) {
bj = bodies[j]
dx = bi.x - bj.x
dy = bi.y - bj.y
dz = bi.z - bj.z
dist_sq = dx * dx + dy * dy + dz * dz
dist = math.sqrt(dist_sq)
mag = dt / (dist_sq * dist)
bi.vx -= dx * bj.mass * mag
bi.vy -= dy * bj.mass * mag
bi.vz -= dz * bj.mass * mag
bj.vx += dx * bi.mass * mag
bj.vy += dy * bi.mass * mag
bj.vz += dz * bi.mass * mag
}
}
for (i = 0; i < n; i++) {
bi = bodies[i]
bi.x += dt * bi.vx
bi.y += dt * bi.vy
bi.z += dt * bi.vz
}
}
function energy(bodies) {
var e = 0
var n = length(bodies)
var i = 0
var j = 0
var bi = null
var bj = null
var dx = 0
var dy = 0
var dz = 0
for (i = 0; i < n; i++) {
bi = bodies[i]
e += 0.5 * bi.mass * (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz)
for (j = i + 1; j < n; j++) {
bj = bodies[j]
dx = bi.x - bj.x
dy = bi.y - bj.y
dz = bi.z - bj.z
e -= (bi.mass * bj.mass) / math.sqrt(dx * dx + dy * dy + dz * dz)
}
}
return e
}
return {
nbody_1k: function(n) {
var i = 0
var j = 0
var bodies = null
for (i = 0; i < n; i++) {
bodies = make_system()
for (j = 0; j < 1000; j++) advance(bodies, 0.01)
energy(bodies)
}
},
nbody_10k: function(n) {
var i = 0
var j = 0
var bodies = null
for (i = 0; i < n; i++) {
bodies = make_system()
for (j = 0; j < 10000; j++) advance(bodies, 0.01)
energy(bodies)
}
}
}

154
benches/ray_tracer.cm Normal file
View File

@@ -0,0 +1,154 @@
// ray_tracer.cm — Simple ray tracer kernel
// Control flow + numeric + allocation. Classic VM benchmark.
var math = use('math/radians')
function vec(x, y, z) {
return {x: x, y: y, z: z}
}
function vadd(a, b) {
return {x: a.x + b.x, y: a.y + b.y, z: a.z + b.z}
}
function vsub(a, b) {
return {x: a.x - b.x, y: a.y - b.y, z: a.z - b.z}
}
function vmul(v, s) {
return {x: v.x * s, y: v.y * s, z: v.z * s}
}
function vdot(a, b) {
return a.x * b.x + a.y * b.y + a.z * b.z
}
function vnorm(v) {
var len = math.sqrt(vdot(v, v))
if (len == 0) return vec(0, 0, 0)
return vmul(v, 1 / len)
}
function make_sphere(center, radius, color) {
return {
center: center,
radius: radius,
color: color
}
}
function intersect_sphere(origin, dir, sphere) {
var oc = vsub(origin, sphere.center)
var b = vdot(oc, dir)
var c = vdot(oc, oc) - sphere.radius * sphere.radius
var disc = b * b - c
if (disc < 0) return -1
var sq = math.sqrt(disc)
var t1 = -b - sq
var t2 = -b + sq
if (t1 > 0.001) return t1
if (t2 > 0.001) return t2
return -1
}
function make_scene() {
var spheres = [
make_sphere(vec(0, -1, 5), 1, vec(1, 0, 0)),
make_sphere(vec(2, 0, 6), 1, vec(0, 1, 0)),
make_sphere(vec(-2, 0, 4), 1, vec(0, 0, 1)),
make_sphere(vec(0, 1, 4.5), 0.5, vec(1, 1, 0)),
make_sphere(vec(1, -0.5, 3), 0.3, vec(1, 0, 1)),
make_sphere(vec(0, -101, 5), 100, vec(0.5, 0.5, 0.5))
]
var light = vnorm(vec(1, 1, -1))
return {spheres: spheres, light: light}
}
function trace(origin, dir, scene) {
var closest_t = 999999
var closest_sphere = null
var i = 0
var t = 0
for (i = 0; i < length(scene.spheres); i++) {
t = intersect_sphere(origin, dir, scene.spheres[i])
if (t > 0 && t < closest_t) {
closest_t = t
closest_sphere = scene.spheres[i]
}
}
if (!closest_sphere) return vec(0.2, 0.3, 0.5) // sky color
var hit = vadd(origin, vmul(dir, closest_t))
var normal = vnorm(vsub(hit, closest_sphere.center))
var diffuse = vdot(normal, scene.light)
if (diffuse < 0) diffuse = 0
// Shadow check
var shadow_origin = vadd(hit, vmul(normal, 0.001))
var in_shadow = false
for (i = 0; i < length(scene.spheres); i++) {
if (scene.spheres[i] != closest_sphere) {
t = intersect_sphere(shadow_origin, scene.light, scene.spheres[i])
if (t > 0) {
in_shadow = true
break
}
}
}
var ambient = 0.15
var intensity = in_shadow ? ambient : ambient + diffuse * 0.85
return vmul(closest_sphere.color, intensity)
}
function render(width, height, scene) {
var aspect = width / height
var fov = 1.0
var total_r = 0
var total_g = 0
var total_b = 0
var y = 0
var x = 0
var u = 0
var v = 0
var dir = null
var color = null
var origin = vec(0, 0, 0)
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
u = (2 * (x + 0.5) / width - 1) * aspect * fov
v = (1 - 2 * (y + 0.5) / height) * fov
dir = vnorm(vec(u, v, 1))
color = trace(origin, dir, scene)
total_r += color.x
total_g += color.y
total_b += color.z
}
}
return {r: total_r, g: total_g, b: total_b}
}
var scene = make_scene()
return {
raytrace_32x32: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = render(32, 32, scene)
}
return result
},
raytrace_64x64: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = render(64, 64, scene)
}
return result
}
}

251
benches/richards.cm Normal file
View File

@@ -0,0 +1,251 @@
// richards.cm — Richards benchmark (scheduler simulation)
// Object-ish workload: dynamic dispatch, state machines, queuing.
def IDLE = 0
def WORKER = 1
def HANDLER_A = 2
def HANDLER_B = 3
def DEVICE_A = 4
def DEVICE_B = 5
def NUM_TASKS = 6
def TASK_RUNNING = 0
def TASK_WAITING = 1
def TASK_HELD = 2
def TASK_SUSPENDED = 3
function make_packet(link, id, kind) {
return {link: link, id: id, kind: kind, datum: 0, data: array(4, 0)}
}
function scheduler() {
var tasks = array(NUM_TASKS, null)
var current = null
var queue_count = 0
var hold_count = 0
var v1 = 0
var v2 = 0
var w_id = HANDLER_A
var w_datum = 0
var h_a_queue = null
var h_a_count = 0
var h_b_queue = null
var h_b_count = 0
var dev_a_pkt = null
var dev_b_pkt = null
var find_next = function() {
var best = null
var i = 0
for (i = 0; i < NUM_TASKS; i++) {
if (tasks[i] && tasks[i].state == TASK_RUNNING) {
if (!best || tasks[i].priority > best.priority) {
best = tasks[i]
}
}
}
return best
}
var hold_self = function() {
hold_count++
if (current) current.state = TASK_HELD
return find_next()
}
var release = function(id) {
var t = tasks[id]
if (!t) return find_next()
if (t.state == TASK_HELD) t.state = TASK_RUNNING
if (t.priority > (current ? current.priority : -1)) return t
return current
}
var queue_packet = function(pkt) {
var t = tasks[pkt.id]
var p = null
if (!t) return find_next()
queue_count++
pkt.link = null
pkt.id = current ? current.id : 0
if (!t.queue) {
t.queue = pkt
t.state = TASK_RUNNING
if (t.priority > (current ? current.priority : -1)) return t
} else {
p = t.queue
while (p.link) p = p.link
p.link = pkt
}
return current
}
// Idle task
tasks[IDLE] = {id: IDLE, priority: 0, queue: null, state: TASK_RUNNING,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
v1--
if (v1 == 0) return hold_self()
if ((v2 & 1) == 0) {
v2 = v2 >> 1
return release(DEVICE_A)
}
v2 = (v2 >> 1) ^ 0xD008
return release(DEVICE_B)
}
}
// Worker task
tasks[WORKER] = {id: WORKER, priority: 1000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var i = 0
if (!pkt) return hold_self()
w_id = (w_id == HANDLER_A) ? HANDLER_B : HANDLER_A
pkt.id = w_id
pkt.datum = 0
for (i = 0; i < 4; i++) {
w_datum++
if (w_datum > 26) w_datum = 1
pkt.data[i] = 65 + w_datum
}
return queue_packet(pkt)
}
}
// Handler A
tasks[HANDLER_A] = {id: HANDLER_A, priority: 2000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { h_a_queue = pkt; h_a_count++ }
if (h_a_queue) {
p = h_a_queue
h_a_queue = p.link
if (h_a_count < 3) return queue_packet(p)
return release(DEVICE_A)
}
return hold_self()
}
}
// Handler B
tasks[HANDLER_B] = {id: HANDLER_B, priority: 3000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { h_b_queue = pkt; h_b_count++ }
if (h_b_queue) {
p = h_b_queue
h_b_queue = p.link
if (h_b_count < 3) return queue_packet(p)
return release(DEVICE_B)
}
return hold_self()
}
}
// Device A
tasks[DEVICE_A] = {id: DEVICE_A, priority: 4000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { dev_a_pkt = pkt; return hold_self() }
if (dev_a_pkt) {
p = dev_a_pkt
dev_a_pkt = null
return queue_packet(p)
}
return hold_self()
}
}
// Device B
tasks[DEVICE_B] = {id: DEVICE_B, priority: 5000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { dev_b_pkt = pkt; return hold_self() }
if (dev_b_pkt) {
p = dev_b_pkt
dev_b_pkt = null
return queue_packet(p)
}
return hold_self()
}
}
var run = function(iterations) {
var i = 0
var pkt1 = null
var pkt2 = null
var steps = 0
var pkt = null
var next = null
v1 = iterations
v2 = 0xBEEF
queue_count = 0
hold_count = 0
w_id = HANDLER_A
w_datum = 0
h_a_queue = null
h_a_count = 0
h_b_queue = null
h_b_count = 0
dev_a_pkt = null
dev_b_pkt = null
for (i = 0; i < NUM_TASKS; i++) {
if (tasks[i]) {
tasks[i].state = (i == IDLE) ? TASK_RUNNING : TASK_SUSPENDED
tasks[i].queue = null
}
}
pkt1 = make_packet(null, WORKER, 1)
pkt2 = make_packet(pkt1, WORKER, 1)
tasks[WORKER].queue = pkt2
tasks[WORKER].state = TASK_RUNNING
current = find_next()
while (current && steps < iterations * 10) {
pkt = current.queue
if (pkt) {
current.queue = pkt.link
current.queue_count++
}
next = current.fn(pkt)
if (next) current = next
else current = find_next()
steps++
}
return {queue_count: queue_count, hold_count: hold_count, steps: steps}
}
return {run: run}
}
return {
richards_100: function(n) {
var i = 0
var s = null
var result = null
for (i = 0; i < n; i++) {
s = scheduler()
result = s.run(100)
}
return result
},
richards_1k: function(n) {
var i = 0
var s = null
var result = null
for (i = 0; i < n; i++) {
s = scheduler()
result = s.run(1000)
}
return result
}
}

180
benches/sorting.cm Normal file
View File

@@ -0,0 +1,180 @@
// sorting.cm — Sorting and searching kernel
// Array manipulation, comparison-heavy, allocation patterns.
function make_random_array(n, seed) {
var a = []
var x = seed
var i = 0
for (i = 0; i < n; i++) {
x = ((x * 1103515245 + 12345) & 0x7FFFFFFF) | 0
push(a, x % 10000)
}
return a
}
function make_descending(n) {
var a = []
var i = 0
for (i = n - 1; i >= 0; i--) push(a, i)
return a
}
// Manual quicksort (tests recursion + array mutation)
function qsort(arr, lo, hi) {
var i = lo
var j = hi
var pivot = arr[floor((lo + hi) / 2)]
var tmp = 0
if (lo >= hi) return null
while (i <= j) {
while (arr[i] < pivot) i++
while (arr[j] > pivot) j--
if (i <= j) {
tmp = arr[i]
arr[i] = arr[j]
arr[j] = tmp
i++
j--
}
}
if (lo < j) qsort(arr, lo, j)
if (i < hi) qsort(arr, i, hi)
return null
}
// Merge sort (tests allocation + array creation)
function msort(arr) {
var n = length(arr)
if (n <= 1) return arr
var mid = floor(n / 2)
var left = msort(array(arr, 0, mid))
var right = msort(array(arr, mid, n))
return merge(left, right)
}
function merge(a, b) {
var result = []
var i = 0
var j = 0
while (i < length(a) && j < length(b)) {
if (a[i] <= b[j]) {
push(result, a[i])
i++
} else {
push(result, b[j])
j++
}
}
while (i < length(a)) {
push(result, a[i])
i++
}
while (j < length(b)) {
push(result, b[j])
j++
}
return result
}
// Binary search
function bsearch(arr, target) {
var lo = 0
var hi = length(arr) - 1
var mid = 0
while (lo <= hi) {
mid = floor((lo + hi) / 2)
if (arr[mid] == target) return mid
if (arr[mid] < target) lo = mid + 1
else hi = mid - 1
}
return -1
}
// Sort objects by field
function sort_records(n) {
var records = []
var x = 42
var i = 0
for (i = 0; i < n; i++) {
x = ((x * 1103515245 + 12345) & 0x7FFFFFFF) | 0
push(records, {id: i, score: x % 10000, name: `item_${i}`})
}
return sort(records, "score")
}
return {
// Quicksort 1K random integers
qsort_1k: function(n) {
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = make_random_array(1000, i)
qsort(a, 0, length(a) - 1)
}
return a
},
// Quicksort 10K random integers
qsort_10k: function(n) {
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = make_random_array(10000, i)
qsort(a, 0, length(a) - 1)
}
return a
},
// Merge sort 1K (allocation heavy)
msort_1k: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = msort(make_random_array(1000, i))
}
return result
},
// Built-in sort 1K
builtin_sort_1k: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = sort(make_random_array(1000, i))
}
return result
},
// Sort worst case (descending → ascending)
sort_worst_case: function(n) {
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = make_descending(1000)
qsort(a, 0, length(a) - 1)
}
return a
},
// Binary search in sorted array
bsearch_1k: function(n) {
var sorted = make_random_array(1000, 42)
sorted = sort(sorted)
var found = 0
var i = 0
for (i = 0; i < n; i++) {
if (bsearch(sorted, sorted[i % 1000]) >= 0) found++
}
return found
},
// Sort records by field
sort_records_500: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = sort_records(500)
}
return result
}
}

82
benches/spectral_norm.cm Normal file
View File

@@ -0,0 +1,82 @@
// spectral_norm.cm — Spectral norm kernel
// Pure numeric, dense array access, mathematical computation.
var math = use('math/radians')
function eval_a(i, j) {
return 1.0 / ((i + j) * (i + j + 1) / 2 + i + 1)
}
function eval_a_times_u(n, u, au) {
var i = 0
var j = 0
var sum = 0
for (i = 0; i < n; i++) {
sum = 0
for (j = 0; j < n; j++) {
sum += eval_a(i, j) * u[j]
}
au[i] = sum
}
}
function eval_at_times_u(n, u, atu) {
var i = 0
var j = 0
var sum = 0
for (i = 0; i < n; i++) {
sum = 0
for (j = 0; j < n; j++) {
sum += eval_a(j, i) * u[j]
}
atu[i] = sum
}
}
function eval_ata_times_u(n, u, atau) {
var v = array(n, 0)
eval_a_times_u(n, u, v)
eval_at_times_u(n, v, atau)
}
function spectral_norm(n) {
var u = array(n, 1)
var v = array(n, 0)
var i = 0
var vbv = 0
var vv = 0
for (i = 0; i < 10; i++) {
eval_ata_times_u(n, u, v)
eval_ata_times_u(n, v, u)
}
vbv = 0
vv = 0
for (i = 0; i < n; i++) {
vbv += u[i] * v[i]
vv += v[i] * v[i]
}
return math.sqrt(vbv / vv)
}
return {
spectral_100: function(n) {
var i = 0
var result = 0
for (i = 0; i < n; i++) {
result = spectral_norm(100)
}
return result
},
spectral_200: function(n) {
var i = 0
var result = 0
for (i = 0; i < n; i++) {
result = spectral_norm(200)
}
return result
}
}

View File

@@ -0,0 +1,188 @@
// string_processing.cm — String-heavy kernel
// Concat, split, search, replace, interning path stress.
function make_lorem(paragraphs) {
var base = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat"
var result = ""
var i = 0
for (i = 0; i < paragraphs; i++) {
if (i > 0) result = result + " "
result = result + base
}
return result
}
// Build a lookup table from text
function build_index(txt) {
var words = array(txt, " ")
var index = {}
var i = 0
var w = null
for (i = 0; i < length(words); i++) {
w = words[i]
if (!index[w]) {
index[w] = []
}
push(index[w], i)
}
return index
}
// Levenshtein-like distance (simplified)
function edit_distance(a, b) {
var la = length(a)
var lb = length(b)
if (la == 0) return lb
if (lb == 0) return la
// Use flat array for 2 rows of DP matrix
var prev = array(lb + 1, 0)
var curr = array(lb + 1, 0)
var i = 0
var j = 0
var cost = 0
var del = 0
var ins = 0
var sub = 0
var tmp = null
var ca = array(a)
var cb = array(b)
for (j = 0; j <= lb; j++) prev[j] = j
for (i = 1; i <= la; i++) {
curr[0] = i
for (j = 1; j <= lb; j++) {
cost = ca[i - 1] == cb[j - 1] ? 0 : 1
del = prev[j] + 1
ins = curr[j - 1] + 1
sub = prev[j - 1] + cost
curr[j] = del
if (ins < curr[j]) curr[j] = ins
if (sub < curr[j]) curr[j] = sub
}
tmp = prev
prev = curr
curr = tmp
}
return prev[lb]
}
var lorem_5 = make_lorem(5)
var lorem_20 = make_lorem(20)
return {
// Split text into words and count
string_split_count: function(n) {
var i = 0
var words = null
var count = 0
for (i = 0; i < n; i++) {
words = array(lorem_5, " ")
count += length(words)
}
return count
},
// Build word index (split + hash + array ops)
string_index_build: function(n) {
var i = 0
var idx = null
for (i = 0; i < n; i++) {
idx = build_index(lorem_5)
}
return idx
},
// Search for substrings
string_search: function(n) {
var targets = ["dolor", "minim", "quis", "magna", "ipsum"]
var i = 0
var j = 0
var count = 0
for (i = 0; i < n; i++) {
for (j = 0; j < length(targets); j++) {
if (search(lorem_20, targets[j])) count++
}
}
return count
},
// Replace operations
string_replace: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = replace(lorem_5, "dolor", "DOLOR")
result = replace(result, "ipsum", "IPSUM")
result = replace(result, "amet", "AMET")
}
return result
},
// String concatenation builder
string_builder: function(n) {
var i = 0
var j = 0
var s = null
var total = 0
for (i = 0; i < n; i++) {
s = ""
for (j = 0; j < 50; j++) {
s = s + "key=" + text(j) + "&value=" + text(j * 17) + "&"
}
total += length(s)
}
return total
},
// Edit distance (DP + array + string ops)
edit_distance: function(n) {
var words = ["kitten", "sitting", "saturday", "sunday", "intention", "execution"]
var i = 0
var j = 0
var total = 0
for (i = 0; i < n; i++) {
for (j = 0; j < length(words) - 1; j++) {
total += edit_distance(words[j], words[j + 1])
}
}
return total
},
// Upper/lower/trim chain
string_transforms: function(n) {
var src = " Hello World "
var i = 0
var x = 0
var result = null
for (i = 0; i < n; i++) {
result = trim(src)
result = upper(result)
result = lower(result)
x += length(result)
}
return x
},
// Starts_with / ends_with (interning path)
string_prefix_suffix: function(n) {
var strs = [
"application/json",
"text/html",
"image/png",
"application/xml",
"text/plain"
]
var i = 0
var j = 0
var count = 0
for (i = 0; i < n; i++) {
for (j = 0; j < length(strs); j++) {
if (starts_with(strs[j], "application/")) count++
if (ends_with(strs[j], "/json")) count++
if (starts_with(strs[j], "text/")) count++
}
}
return count
}
}

137
benches/tree_ops.cm Normal file
View File

@@ -0,0 +1,137 @@
// tree_ops.cm — Tree data structure operations kernel
// Pointer chasing, recursion, allocation patterns.
// Binary tree: create, walk, transform, check
function make_tree(depth) {
if (depth <= 0) return {val: 1, left: null, right: null}
return {
val: depth,
left: make_tree(depth - 1),
right: make_tree(depth - 1)
}
}
function tree_check(node) {
if (!node) return 0
if (!node.left) return node.val
return node.val + tree_check(node.left) - tree_check(node.right)
}
function tree_sum(node) {
if (!node) return 0
return node.val + tree_sum(node.left) + tree_sum(node.right)
}
function tree_depth(node) {
if (!node) return 0
var l = tree_depth(node.left)
var r = tree_depth(node.right)
return 1 + (l > r ? l : r)
}
function tree_count(node) {
if (!node) return 0
return 1 + tree_count(node.left) + tree_count(node.right)
}
// Transform tree: map values
function tree_map(node, fn) {
if (!node) return null
return {
val: fn(node.val),
left: tree_map(node.left, fn),
right: tree_map(node.right, fn)
}
}
// Flatten tree to array (in-order)
function tree_flatten(node, result) {
if (!node) return null
tree_flatten(node.left, result)
push(result, node.val)
tree_flatten(node.right, result)
return null
}
// Build sorted tree from array (balanced)
function build_balanced(arr, lo, hi) {
if (lo > hi) return null
var mid = floor((lo + hi) / 2)
return {
val: arr[mid],
left: build_balanced(arr, lo, mid - 1),
right: build_balanced(arr, mid + 1, hi)
}
}
// Find a value in BST
function bst_find(node, val) {
if (!node) return false
if (val == node.val) return true
if (val < node.val) return bst_find(node.left, val)
return bst_find(node.right, val)
}
return {
// Binary tree create + check (allocation heavy)
tree_create_check: function(n) {
var i = 0
var t = null
var x = 0
for (i = 0; i < n; i++) {
t = make_tree(10)
x += tree_check(t)
}
return x
},
// Deep tree traversals
tree_traversal: function(n) {
var t = make_tree(12)
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x += tree_sum(t) + tree_depth(t) + tree_count(t)
}
return x
},
// Tree map (create new tree from old)
tree_transform: function(n) {
var t = make_tree(10)
var i = 0
var mapped = null
for (i = 0; i < n; i++) {
mapped = tree_map(t, function(v) { return v * 2 + 1 })
}
return mapped
},
// Flatten + rebuild (array <-> tree conversion)
tree_flatten_rebuild: function(n) {
var t = make_tree(10)
var i = 0
var flat = null
var rebuilt = null
for (i = 0; i < n; i++) {
flat = []
tree_flatten(t, flat)
rebuilt = build_balanced(flat, 0, length(flat) - 1)
}
return rebuilt
},
// BST search (pointer chasing)
bst_search: function(n) {
// Build a balanced BST of 1024 elements
var data = []
var i = 0
for (i = 0; i < 1024; i++) push(data, i)
var bst = build_balanced(data, 0, 1023)
var found = 0
for (i = 0; i < n; i++) {
if (bst_find(bst, i % 1024)) found++
}
return found
}
}

12869
boot/bootstrap.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

42700
boot/engine.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

108686
boot/fold.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

142361
boot/mcode.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

157424
boot/parse.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

11500
boot/qbe.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

80726
boot/qbe_emit.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

153
boot/seed_bootstrap.cm Normal file
View File

@@ -0,0 +1,153 @@
// seed_bootstrap.cm — minimal bootstrap for regenerating boot files
// Loads only the compiler pipeline, runs a script directly (no engine/actors)
// Usage: ./cell --dev --seed regen
//
// Hidden env: os, core_path, shop_path, args, json
var load_internal = os.load_internal
var fd = load_internal("js_fd_use")
var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
function use_basic(path) {
if (use_cache[path])
return use_cache[path]
var result = load_internal("js_" + replace(path, '/', '_') + "_use")
if (result) {
use_cache[path] = result
return result
}
return null
}
// Load a module from boot .mcode — no caching, just eval
function boot_load(name) {
var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
var mcode_json = null
if (!fd.is_file(mcode_path)) {
print("seed: missing boot mcode: " + mcode_path + "\n")
disrupt
}
mcode_json = text(fd.slurp(mcode_path))
return mach_eval_mcode(name, mcode_json, {use: use_basic})
}
var tokenize_mod = boot_load("tokenize")
var parse_mod = boot_load("parse")
var fold_mod = boot_load("fold")
var mcode_mod = boot_load("mcode")
var streamline_mod = boot_load("streamline")
use_cache['tokenize'] = tokenize_mod
use_cache['parse'] = parse_mod
use_cache['fold'] = fold_mod
use_cache['mcode'] = mcode_mod
use_cache['streamline'] = streamline_mod
function analyze(src, filename) {
var tok_result = tokenize_mod(src, filename)
var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
var _i = 0
var e = null
var has_errors = ast.errors != null && length(ast.errors) > 0
if (has_errors) {
while (_i < length(ast.errors)) {
e = ast.errors[_i]
if (e.line != null) {
print(`${filename}:${text(e.line)}:${text(e.column)}: error: ${e.message}`)
} else {
print(`${filename}: error: ${e.message}`)
}
_i = _i + 1
}
disrupt
}
return fold_mod(ast)
}
function run_ast(name, ast, env) {
var compiled = mcode_mod(ast)
var optimized = streamline_mod(compiled)
var mcode_json = json.encode(optimized)
return mach_eval_mcode(name, mcode_json, env)
}
function use_fn(path) {
var result = null
var file_path = null
var script = null
var ast = null
var mcode_path = null
var mcode_json = null
if (use_cache[path])
return use_cache[path]
// Try C embed
result = load_internal("js_" + replace(path, '/', '_') + "_use")
if (result) {
use_cache[path] = result
return result
}
// Try boot mcode
mcode_path = core_path + '/boot/' + path + '.cm.mcode'
if (fd.is_file(mcode_path)) {
mcode_json = text(fd.slurp(mcode_path))
result = mach_eval_mcode(path, mcode_json, {use: use_fn})
use_cache[path] = result
return result
}
// Try .cm source (CWD then core)
file_path = path + '.cm'
if (!fd.is_file(file_path))
file_path = core_path + '/' + path + '.cm'
if (fd.is_file(file_path)) {
script = text(fd.slurp(file_path))
ast = analyze(script, file_path)
result = run_ast(path, ast, {use: use_fn})
use_cache[path] = result
return result
}
print("seed: module not found: " + path + "\n")
disrupt
}
// Run the program from args
var program = args[0]
var user_args = []
var _j = 1
var prog_path = null
var script = null
var ast = null
if (!program) {
print("seed: no program specified\n")
disrupt
}
while (_j < length(args)) {
push(user_args, args[_j])
_j = _j + 1
}
prog_path = program + '.ce'
if (!fd.is_file(prog_path))
prog_path = core_path + '/' + program + '.ce'
if (!fd.is_file(prog_path)) {
prog_path = program + '.cm'
if (!fd.is_file(prog_path))
prog_path = core_path + '/' + program + '.cm'
}
if (!fd.is_file(prog_path)) {
print("seed: program not found: " + program + "\n")
disrupt
}
script = text(fd.slurp(prog_path))
ast = analyze(script, prog_path)
run_ast(program, ast, {use: use_fn, args: user_args})

8240
boot/seed_bootstrap.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

62636
boot/streamline.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

56709
boot/tokenize.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

25758
boot/verify_ir.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,42 +0,0 @@
// bootstrap.ce — regenerate .mach bytecode files consumed by the mach engine
// usage: cell bootstrap.ce
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var files = [
{src: "tokenize.cm", name: "tokenize", out: "tokenize.mach"},
{src: "parse.cm", name: "parse", out: "parse.mach"},
{src: "fold.cm", name: "fold", out: "fold.mach"},
{src: "mcode.cm", name: "mcode", out: "mcode.mach"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"},
{src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"}
]
var i = 0
var entry = null
var src = null
var tok_result = null
var ast = null
var folded = null
var ast_json = null
var bytecode = null
var f = null
while (i < length(files)) {
entry = files[i]
src = text(fd.slurp(entry.src))
tok_result = tokenize(src, entry.src)
ast = parse(tok_result.tokens, src, entry.src, tokenize)
folded = fold(ast)
ast_json = json.encode(folded)
bytecode = mach_compile_ast(entry.name, ast_json)
f = fd.open(entry.out, "w")
fd.write(f, bytecode)
fd.close(f)
print(`wrote ${entry.out}`)
i = i + 1
}

365
build.cm
View File

@@ -214,36 +214,22 @@ function compute_link_key(objects, ldflags, target_ldflags, target, cc) {
return content_hash(text(parts, '\n'))
}
// Build a dynamic library for a package
// Output goes to .cell/lib/<package_name>.<ext>
// Dynamic libraries do NOT link against core; undefined symbols are resolved at dlopen time
// Uses content-addressed store + symlink for caching
Build.build_dynamic = function(pkg, target = Build.detect_host_target(), buildtype = 'release') {
var objects = Build.build_package(pkg, target, true, buildtype) // exclude main.c
// Build a per-module dynamic library for a single C file
// Returns the content-addressed dylib path in .cell/build/<hash>.<target>.dylib
Build.build_module_dylib = function(pkg, file, target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var obj = Build.compile_file(pkg, file, _target, _buildtype)
if (length(objects) == 0) {
log.console('No C files in ' + pkg)
return null
}
var lib_dir = shop.get_lib_dir()
var store_dir = lib_dir + '/store'
ensure_dir(lib_dir)
ensure_dir(store_dir)
var lib_name = shop.lib_name_for_package(pkg)
var dylib_ext = toolchains[target].system == 'windows' ? '.dll' : (toolchains[target].system == 'darwin' ? '.dylib' : '.so')
var stable_path = lib_dir + '/' + lib_name + dylib_ext
// Get link flags (with sigil replacement)
var ldflags = replace_sigils_array(pkg_tools.get_flags(pkg, 'LDFLAGS', target))
var target_ldflags = toolchains[target].c_link_args || []
var cc = toolchains[target].cpp || toolchains[target].c
var pkg_dir = shop.get_package_dir(pkg)
var tc = toolchains[_target]
var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so')
var cc = tc.cpp || tc.c
var local_dir = get_local_dir()
var tc = toolchains[target]
var pkg_dir = shop.get_package_dir(pkg)
// Resolve relative -L paths in ldflags for hash computation
// Get link flags
var ldflags = replace_sigils_array(pkg_tools.get_flags(pkg, 'LDFLAGS', _target))
var target_ldflags = tc.c_link_args || []
var resolved_ldflags = []
arrfor(ldflags, function(flag) {
if (starts_with(flag, '-L') && !starts_with(flag, '-L/')) {
@@ -252,36 +238,21 @@ Build.build_dynamic = function(pkg, target = Build.detect_host_target(), buildty
push(resolved_ldflags, flag)
})
// Compute link key
var link_key = compute_link_key(objects, resolved_ldflags, target_ldflags, target, cc)
var store_path = store_dir + '/' + lib_name + '-' + link_key + dylib_ext
// Content-addressed output: hash of (object + link flags + target)
var link_key = compute_link_key([obj], resolved_ldflags, target_ldflags, _target, cc)
var build_dir = get_build_dir()
ensure_dir(build_dir)
var dylib_path = build_dir + '/' + link_key + '.' + _target + dylib_ext
// Check if already linked in store
if (fd.is_file(store_path)) {
// Ensure symlink points to the store file
if (fd.is_link(stable_path)) {
var current_target = fd.readlink(stable_path)
if (current_target == store_path) {
// Already up to date
return stable_path
}
fd.unlink(stable_path)
} else if (fd.is_file(stable_path)) {
fd.unlink(stable_path)
}
fd.symlink(store_path, stable_path)
return stable_path
}
if (fd.is_file(dylib_path))
return dylib_path
// Build link command
var cmd_parts = [cc, '-shared', '-fPIC']
// Platform-specific flags for undefined symbols (resolved at dlopen) and size optimization
if (tc.system == 'darwin') {
cmd_parts = array(cmd_parts, [
'-undefined', 'dynamic_lookup',
'-Wl,-dead_strip',
'-Wl,-install_name,' + stable_path,
'-Wl,-rpath,@loader_path/../local',
'-Wl,-rpath,' + local_dir
])
@@ -293,41 +264,54 @@ Build.build_dynamic = function(pkg, target = Build.detect_host_target(), buildty
'-Wl,-rpath,' + local_dir
])
} else if (tc.system == 'windows') {
// Windows DLLs: use --allow-shlib-undefined for mingw
push(cmd_parts, '-Wl,--allow-shlib-undefined')
}
// Add .cell/local to library search path
push(cmd_parts, '-L"' + local_dir + '"')
arrfor(objects, function(obj) {
push(cmd_parts, '"' + obj + '"')
})
// Do NOT link against core library - symbols resolved at dlopen time
push(cmd_parts, '"' + obj + '"')
cmd_parts = array(cmd_parts, resolved_ldflags)
cmd_parts = array(cmd_parts, target_ldflags)
push(cmd_parts, '-o')
push(cmd_parts, '"' + store_path + '"')
push(cmd_parts, '"' + dylib_path + '"')
var cmd_str = text(cmd_parts, ' ')
log.console('Linking ' + lib_name + dylib_ext)
log.console('Linking module ' + file + ' -> ' + fd.basename(dylib_path))
var ret = os.system(cmd_str)
if (ret != 0) {
throw Error('Linking failed: ' + pkg)
print('Linking failed: ' + file); disrupt
}
// Update symlink to point to the new store file
if (fd.is_link(stable_path)) {
fd.unlink(stable_path)
} else if (fd.is_file(stable_path)) {
fd.unlink(stable_path)
}
fd.symlink(store_path, stable_path)
return dylib_path
}
return stable_path
// Build a dynamic library for a package (one dylib per C file)
// Returns array of {file, symbol, dylib} for each module
// Also writes a manifest mapping symbols to dylib paths
Build.build_dynamic = function(pkg, target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var c_files = pkg_tools.get_c_files(pkg, _target, true)
var results = []
var manifest = {}
arrfor(c_files, function(file) {
var sym_name = shop.c_symbol_for_file(pkg, file)
var dylib = Build.build_module_dylib(pkg, file, _target, _buildtype)
push(results, {file: file, symbol: sym_name, dylib: dylib})
manifest[sym_name] = dylib
})
// Write manifest so the loader can find per-module dylibs by symbol
if (length(results) > 0) {
var lib_dir = shop.get_lib_dir()
ensure_dir(lib_dir)
var lib_name = shop.lib_name_for_package(pkg)
var manifest_path = lib_dir + '/' + lib_name + '.manifest.json'
var json = use('json')
fd.slurpwrite(manifest_path, stone(blob(json.encode(manifest))))
}
return results
}
// ============================================================================
@@ -412,43 +396,236 @@ Build.build_static = function(packages, target = Build.detect_host_target(), out
return output
}
// ============================================================================
// Native .cm compilation (source → mcode → QBE IL → .o → .dylib)
// ============================================================================
// Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement)
function qbe_insert_dead_labels(il_text) {
var lines = array(il_text, "\n")
var result = []
var dead_id = 0
var need_label = false
var i = 0
var line = null
var trimmed = null
while (i < length(lines)) {
line = lines[i]
trimmed = trim(line)
if (need_label && !starts_with(trimmed, '@') && !starts_with(trimmed, '}') && length(trimmed) > 0) {
push(result, "@_dead_" + text(dead_id))
dead_id = dead_id + 1
need_label = false
}
if (starts_with(trimmed, '@') || starts_with(trimmed, '}') || length(trimmed) == 0) {
need_label = false
}
if (starts_with(trimmed, 'ret ') || starts_with(trimmed, 'jmp ')) {
need_label = true
}
push(result, line)
i = i + 1
}
return text(result, "\n")
}
// Compile a .cm source file to a native .dylib via QBE
// Returns the content-addressed dylib path
Build.compile_native = function(src_path, target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
if (!fd.is_file(src_path)) {
print('Source file not found: ' + src_path); disrupt
}
var tc = toolchains[_target]
var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so')
var cc = tc.c
// Step 1: Read source and compile through pipeline
var content = fd.slurp(src_path)
var src = text(content)
var tokenize = use('tokenize')
var parse = use('parse')
var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var qbe_macros = use('qbe')
var qbe_emit = use('qbe_emit')
var tok_result = tokenize(src, src_path)
var ast = parse(tok_result.tokens, src, src_path, tokenize)
var folded = fold(ast)
var compiled = mcode_mod(folded)
var optimized = streamline_mod(compiled)
// Step 2: Generate QBE IL
var il = qbe_emit(optimized, qbe_macros)
// Step 3: Post-process (insert dead labels)
il = qbe_insert_dead_labels(il)
// Content hash for cache key
var hash = content_hash(src + '\n' + _target + '\nnative')
var build_dir = get_build_dir()
ensure_dir(build_dir)
var dylib_path = build_dir + '/' + hash + '.' + _target + dylib_ext
if (fd.is_file(dylib_path))
return dylib_path
// Step 4: Write QBE IL to temp file
var tmp = '/tmp/cell_native_' + hash
var ssa_path = tmp + '.ssa'
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/cell_qbe_rt.o'
fd.slurpwrite(ssa_path, stone(blob(il)))
// Step 5: QBE compile to assembly
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
if (rc != 0) {
print('QBE compilation failed for: ' + src_path); disrupt
}
// Step 6: Assemble
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('Assembly failed for: ' + src_path); disrupt
}
// Step 7: Compile QBE runtime stubs if needed
if (!fd.is_file(rt_o_path)) {
var qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
if (rc != 0) {
print('QBE runtime stubs compilation failed'); disrupt
}
}
// Step 8: Link dylib
var link_cmd = cc + ' -shared -fPIC'
if (tc.system == 'darwin') {
link_cmd = link_cmd + ' -undefined dynamic_lookup'
} else if (tc.system == 'linux') {
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
}
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
rc = os.system(link_cmd)
if (rc != 0) {
print('Linking native dylib failed for: ' + src_path); disrupt
}
log.console('Built native: ' + fd.basename(dylib_path))
return dylib_path
}
// ============================================================================
// Module table generation (for static builds)
// ============================================================================
// Compile a .cm module to mach bytecode blob
// Returns the raw mach bytes as a blob
Build.compile_cm_to_mach = function(src_path) {
if (!fd.is_file(src_path)) {
print('Source file not found: ' + src_path); disrupt
}
var src = text(fd.slurp(src_path))
var tokenize = use('tokenize')
var parse = use('parse')
var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var json = use('json')
var tok_result = tokenize(src, src_path)
var ast = parse(tok_result.tokens, src, src_path, tokenize)
var folded = fold(ast)
var compiled = mcode_mod(folded)
var optimized = streamline_mod(compiled)
return mach_compile_mcode_bin(src_path, json.encode(optimized))
}
// Generate a module_table.c file that embeds mach bytecode for .cm modules
// modules: array of {name, src_path} — name is the module name, src_path is the .cm file
// output: path to write the generated .c file
Build.generate_module_table = function(modules, output) {
var lines = []
var json = use('json')
push(lines, '/* Generated module table — do not edit */')
push(lines, '#include <stddef.h>')
push(lines, '#include <string.h>')
push(lines, '')
push(lines, 'struct cell_embedded_entry {')
push(lines, ' const char *name;')
push(lines, ' const unsigned char *data;')
push(lines, ' size_t size;')
push(lines, '};')
push(lines, '')
var entries = []
arrfor(modules, function(mod) {
var safe = replace(replace(replace(mod.name, '/', '_'), '.', '_'), '-', '_')
var mach = Build.compile_cm_to_mach(mod.src_path)
var bytes = array(mach)
var hex = []
arrfor(bytes, function(b) {
push(hex, '0x' + text(b, 'h2'))
})
push(lines, 'static const unsigned char mod_' + safe + '_data[] = {')
push(lines, ' ' + text(hex, ', '))
push(lines, '};')
push(lines, '')
push(entries, safe)
log.console('Embedded: ' + mod.name + ' (' + text(length(bytes)) + ' bytes)')
})
// Lookup function
push(lines, 'const struct cell_embedded_entry *cell_embedded_module_lookup(const char *name) {')
arrfor(modules, function(mod, i) {
var safe = entries[i]
push(lines, ' if (strcmp(name, "' + mod.name + '") == 0) {')
push(lines, ' static const struct cell_embedded_entry e = {"' + mod.name + '", mod_' + safe + '_data, sizeof(mod_' + safe + '_data)};')
push(lines, ' return &e;')
push(lines, ' }')
})
push(lines, ' return (void *)0;')
push(lines, '}')
var c_text = text(lines, '\n')
fd.slurpwrite(output, stone(blob(c_text)))
log.console('Generated ' + output)
return output
}
// ============================================================================
// Convenience functions
// ============================================================================
// Build dynamic libraries for all installed packages
Build.build_all_dynamic = function(target, buildtype = 'release') {
target = target || Build.detect_host_target()
Build.build_all_dynamic = function(target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var packages = shop.list_packages()
var results = []
// Build core first
if (find(packages, 'core') != null) {
try {
var lib = Build.build_dynamic('core', target, buildtype)
push(results, { package: 'core', library: lib })
} catch (e) {
log.error('Failed to build core: ' + text(e))
push(results, { package: 'core', error: e })
}
if (find(packages, function(p) { return p == 'core' }) != null) {
var core_mods = Build.build_dynamic('core', _target, _buildtype)
push(results, {package: 'core', modules: core_mods})
}
// Build other packages
arrfor(packages, function(pkg) {
if (pkg == 'core') return
try {
var lib = Build.build_dynamic(pkg, target, buildtype)
push(results, { package: pkg, library: lib })
} catch (e) {
log.error('Failed to build ' + pkg + ': ')
log.console(e.message)
log.console(e.stack)
push(results, { package: pkg, error: e })
}
var pkg_mods = Build.build_dynamic(pkg, _target, _buildtype)
push(results, {package: pkg, modules: pkg_mods})
})
return results
}

View File

@@ -1,7 +1,7 @@
// compile.ce — compile a .cm module to native .dylib via QBE
//
// Usage:
// cell --core . compile.ce <file.cm>
// cell --dev compile.ce <file.cm>
//
// Produces <file>.dylib in the current directory.
@@ -9,7 +9,7 @@ var fd = use('fd')
var os = use('os')
if (length(args) < 1) {
print('usage: cell --core . compile.ce <file.cm>')
print('usage: cell --dev compile.ce <file.cm>')
return
}
@@ -26,39 +26,22 @@ var ssa_path = tmp + '.ssa'
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/qbe_rt.o'
var dylib_path = base + '.dylib'
var dylib_path = file + '.dylib'
var cwd = fd.getcwd()
var rc = 0
// Step 1: emit QBE IL
print('emit qbe...')
rc = os.system('cd ' + cwd + ' && ./cell --core . --emit-qbe ' + file + ' > ' + ssa_path)
rc = os.system('cd ' + cwd + ' && ./cell --dev qbe.ce ' + file + ' > ' + ssa_path)
if (rc != 0) {
print('failed to emit qbe il')
return
}
// Step 2: post-process — insert dead labels after ret/jmp, append wrapper
// Use awk via shell to avoid blob/slurpwrite issues with long strings
print('post-process...')
var awk_cmd = `awk '
need_label && /^[[:space:]]*[^@}]/ && NF > 0 {
print "@_dead_" dead_id; dead_id++; need_label=0
}
/^@/ || /^}/ || NF==0 { need_label=0 }
/^[[:space:]]*ret / || /^[[:space:]]*jmp / { need_label=1; print; next }
{ print }
' ` + ssa_path + ` > ` + tmp + `_fixed.ssa`
rc = os.system(awk_cmd)
if (rc != 0) {
print('post-process failed')
return
}
// Append wrapper function — called as symbol(ctx) by os.dylib_symbol.
// Step 2: append wrapper function — called as symbol(ctx) by os.dylib_symbol.
// Delegates to cell_rt_module_entry which heap-allocates a frame
// (so closures survive) and calls cell_main.
var wrapper_cmd = `printf '\nexport function l $` + symbol + `(l %%ctx) {\n@entry\n %%result =l call $cell_rt_module_entry(l %%ctx)\n ret %%result\n}\n' >> ` + tmp + `_fixed.ssa`
var wrapper_cmd = `printf '\nexport function l $` + symbol + `(l %%ctx) {\n@entry\n %%result =l call $cell_rt_module_entry(l %%ctx)\n ret %%result\n}\n' >> ` + ssa_path
rc = os.system(wrapper_cmd)
if (rc != 0) {
print('wrapper append failed')
@@ -67,7 +50,7 @@ if (rc != 0) {
// Step 3: compile QBE IL to assembly
print('qbe compile...')
rc = os.system('~/.local/bin/qbe -o ' + s_path + ' ' + tmp + '_fixed.ssa')
rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
if (rc != 0) {
print('qbe compilation failed')
return

98
compile_seed.ce Normal file
View File

@@ -0,0 +1,98 @@
// compile_seed.ce — compile a .cm module to native .dylib via QBE (seed mode)
// Usage: ./cell --dev --seed compile_seed <file.cm>
var fd = use("fd")
var os = use("os")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var qbe_macros = use("qbe")
var qbe_emit = use("qbe_emit")
if (length(args) < 1) {
print("usage: cell --dev --seed compile_seed <file.cm>")
disrupt
}
var file = args[0]
var base = file
if (ends_with(base, ".cm")) {
base = text(base, 0, length(base) - 3)
} else if (ends_with(base, ".ce")) {
base = text(base, 0, length(base) - 3)
}
var safe = replace(replace(replace(base, "/", "_"), "-", "_"), ".", "_")
var symbol = "js_" + safe + "_use"
var tmp = "/tmp/qbe_" + safe
var ssa_path = tmp + ".ssa"
var s_path = tmp + ".s"
var o_path = tmp + ".o"
var rt_o_path = "/tmp/qbe_rt.o"
var dylib_path = file + ".dylib"
var rc = 0
// Step 1: compile to QBE IL
print("compiling " + file + " to QBE IL...")
var src = text(fd.slurp(file))
var result = tokenize(src, file)
var ast = parse(result.tokens, src, file, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
var il = qbe_emit(optimized, qbe_macros)
// Step 2: append wrapper function
var wrapper = `
export function l $${symbol}(l %ctx) {
@entry
%result =l call $cell_rt_module_entry(l %ctx)
ret %result
}
`
il = il + wrapper
// Write IL to file — remove old file first to avoid leftover content
if (fd.is_file(ssa_path)) fd.unlink(ssa_path)
var out_fd = fd.open(ssa_path, 1537, 420)
fd.write(out_fd, il)
fd.close(out_fd)
print("wrote " + ssa_path + " (" + text(length(il)) + " bytes)")
// Step 3: compile QBE IL to assembly
print("qbe compile...")
rc = os.system("qbe -o " + s_path + " " + ssa_path)
if (rc != 0) {
print("qbe compilation failed")
disrupt
}
// Step 4: assemble
print("assemble...")
rc = os.system("cc -c " + s_path + " -o " + o_path)
if (rc != 0) {
print("assembly failed")
disrupt
}
// Step 5: compile runtime stubs
if (!fd.is_file(rt_o_path)) {
print("compile runtime stubs...")
rc = os.system("cc -c source/qbe_helpers.c -o " + rt_o_path + " -fPIC -Isource")
if (rc != 0) {
print("runtime stubs compilation failed")
disrupt
}
}
// Step 6: link dylib
print("link...")
rc = os.system("cc -shared -fPIC -undefined dynamic_lookup " + o_path + " " + rt_o_path + " -o " + dylib_path)
if (rc != 0) {
print("linking failed")
disrupt
}
print("built: " + dylib_path)

View File

@@ -240,7 +240,8 @@ static const JSCFunctionListEntry js_crypto_funcs[] = {
JSValue js_crypto_use(JSContext *js)
{
JSValue obj = JS_NewObject(js);
JS_SetPropertyFunctionList(js, obj, js_crypto_funcs, sizeof(js_crypto_funcs)/sizeof(js_crypto_funcs[0]));
return obj;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_crypto_funcs, sizeof(js_crypto_funcs)/sizeof(js_crypto_funcs[0]));
JS_RETURN(mod.val);
}

View File

@@ -1,27 +1,15 @@
#include "cell.h"
// Return the current stack depth.
JSC_CCALL(debug_stack_depth, return number2js(js,js_debugger_stack_depth(js)))
// TODO: Reimplement stack depth for register VM
JSC_CCALL(debug_stack_depth, return number2js(js, 0))
// Return a backtrace of the current call stack.
JSC_CCALL(debug_build_backtrace, return js_debugger_build_backtrace(js))
// Return the closure variables for a given function.
JSC_CCALL(debug_closure_vars, return js_debugger_closure_variables(js,argv[0]))
JSC_CCALL(debug_set_closure_var,
js_debugger_set_closure_variable(js,argv[0],argv[1],argv[2]);
return JS_NULL;
)
// Return the local variables for a specific stack frame.
JSC_CCALL(debug_local_vars, return js_debugger_local_variables(js, js2number(js,argv[0])))
// Return metadata about a given function.
JSC_CCALL(debug_fn_info, return js_debugger_fn_info(js, argv[0]))
// Return an array of functions in the current backtrace.
JSC_CCALL(debug_backtrace_fns, return js_debugger_backtrace_fns(js))
// TODO: Reimplement debug introspection for register VM
JSC_CCALL(debug_build_backtrace, return JS_NewArray(js))
JSC_CCALL(debug_closure_vars, return JS_NewObject(js))
JSC_CCALL(debug_set_closure_var, return JS_NULL;)
JSC_CCALL(debug_local_vars, return JS_NewObject(js))
JSC_CCALL(debug_fn_info, return JS_NewObject(js))
JSC_CCALL(debug_backtrace_fns, return JS_NewArray(js))
static const JSCFunctionListEntry js_debug_funcs[] = {
MIST_FUNC_DEF(debug, stack_depth, 0),
@@ -34,7 +22,8 @@ static const JSCFunctionListEntry js_debug_funcs[] = {
};
JSValue js_debug_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js,mod,js_debug_funcs,countof(js_debug_funcs));
return mod;
}
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_debug_funcs, countof(js_debug_funcs));
JS_RETURN(mod.val);
}

View File

@@ -1,48 +1,16 @@
#include "cell.h"
JSC_CCALL(os_mem_limit, JS_SetMemoryLimit(JS_GetRuntime(js), js2number(js,argv[0])))
JSC_CCALL(os_max_stacksize, JS_SetMaxStackSize(JS_GetRuntime(js), js2number(js,argv[0])))
JSC_CCALL(os_max_stacksize, JS_SetMaxStackSize(js, js2number(js,argv[0])))
// Compute the approximate size of a single JS value in memory.
// TODO: Reimplement memory usage reporting for new allocator
JSC_CCALL(os_calc_mem,
JSMemoryUsage mu;
JS_ComputeMemoryUsage(JS_GetRuntime(js),&mu);
ret = JS_NewObject(js);
JS_SetPropertyStr(js,ret,"malloc_size",number2js(js,mu.malloc_size));
JS_SetPropertyStr(js,ret,"malloc_limit",number2js(js,mu.malloc_limit));
JS_SetPropertyStr(js,ret,"memory_used_size",number2js(js,mu.memory_used_size));
JS_SetPropertyStr(js,ret,"malloc_count",number2js(js,mu.malloc_count));
JS_SetPropertyStr(js,ret,"memory_used_count",number2js(js,mu.memory_used_count));
JS_SetPropertyStr(js,ret,"str_count",number2js(js,mu.str_count));
JS_SetPropertyStr(js,ret,"str_size",number2js(js,mu.str_size));
JS_SetPropertyStr(js,ret,"obj_count",number2js(js,mu.obj_count));
JS_SetPropertyStr(js,ret,"obj_size",number2js(js,mu.obj_size));
JS_SetPropertyStr(js,ret,"prop_count",number2js(js,mu.prop_count));
JS_SetPropertyStr(js,ret,"prop_size",number2js(js,mu.prop_size));
JS_SetPropertyStr(js,ret,"shape_count",number2js(js,mu.shape_count));
JS_SetPropertyStr(js,ret,"shape_size",number2js(js,mu.shape_size));
JS_SetPropertyStr(js,ret,"js_func_count",number2js(js,mu.js_func_count));
JS_SetPropertyStr(js,ret,"js_func_size",number2js(js,mu.js_func_size));
JS_SetPropertyStr(js,ret,"js_func_code_size",number2js(js,mu.js_func_code_size));
JS_SetPropertyStr(js,ret,"js_func_pc2line_count",number2js(js,mu.js_func_pc2line_count));
JS_SetPropertyStr(js,ret,"js_func_pc2line_size",number2js(js,mu.js_func_pc2line_size));
JS_SetPropertyStr(js,ret,"c_func_count",number2js(js,mu.c_func_count));
JS_SetPropertyStr(js,ret,"array_count",number2js(js,mu.array_count));
JS_SetPropertyStr(js,ret,"fast_array_count",number2js(js,mu.fast_array_count));
JS_SetPropertyStr(js,ret,"fast_array_elements",number2js(js,mu.fast_array_elements));
JS_SetPropertyStr(js,ret,"binary_object_count",number2js(js,mu.binary_object_count));
JS_SetPropertyStr(js,ret,"binary_object_size",number2js(js,mu.binary_object_size));
)
// Disassemble a function object into a string.
JSC_CCALL(js_disassemble,
return js_debugger_fn_bytecode(js, argv[0]);
)
// Return metadata about a given function.
JSC_CCALL(js_fn_info,
return js_debugger_fn_info(js, argv[0]);
)
// TODO: Reimplement for register VM
JSC_CCALL(js_disassemble, return JS_NewArray(js);)
JSC_CCALL(js_fn_info, return JS_NewObject(js);)
static const JSCFunctionListEntry js_js_funcs[] = {
MIST_FUNC_DEF(os, calc_mem, 0),
@@ -53,7 +21,8 @@ static const JSCFunctionListEntry js_js_funcs[] = {
};
JSValue js_js_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js,mod,js_js_funcs,countof(js_js_funcs));
return mod;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_js_funcs, countof(js_js_funcs));
JS_RETURN(mod.val);
}

264
diff.ce Normal file
View File

@@ -0,0 +1,264 @@
// diff.ce — differential testing: run tests optimized vs unoptimized, compare results
//
// Usage:
// cell diff - diff all test files in current package
// cell diff suite - diff a specific test file (tests/suite.cm)
// cell diff tests/foo - diff a specific test file by path
var shop = use('internal/shop')
var pkg = use('package')
var fd = use('fd')
var time = use('time')
var _args = args == null ? [] : args
var analyze = use('os').analyze
var run_ast_fn = use('os').run_ast_fn
var run_ast_noopt_fn = use('os').run_ast_noopt_fn
if (!run_ast_noopt_fn) {
log.console("error: run_ast_noopt_fn not available (rebuild bootstrap)")
$stop()
return
}
// Parse arguments: diff [test_path]
var target_test = null
if (length(_args) > 0) {
target_test = _args[0]
}
function is_valid_package(dir) {
var _dir = dir == null ? '.' : dir
return fd.is_file(_dir + '/cell.toml')
}
if (!is_valid_package('.')) {
log.console('No cell.toml found in current directory')
$stop()
return
}
// Collect test files
function collect_tests(specific_test) {
var files = pkg.list_files(null)
var test_files = []
var i = 0
var f = null
var test_name = null
var match_name = null
var match_base = null
for (i = 0; i < length(files); i++) {
f = files[i]
if (starts_with(f, "tests/") && ends_with(f, ".cm")) {
if (specific_test) {
test_name = text(f, 0, -3)
match_name = specific_test
if (!starts_with(match_name, 'tests/')) match_name = 'tests/' + match_name
match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
if (test_name != match_base) continue
}
push(test_files, f)
}
}
return test_files
}
// Deep comparison of two values
function values_equal(a, b) {
var i = 0
var ka = null
var kb = null
if (a == b) return true
if (is_null(a) && is_null(b)) return true
if (is_null(a) || is_null(b)) return false
if (is_array(a) && is_array(b)) {
if (length(a) != length(b)) return false
i = 0
while (i < length(a)) {
if (!values_equal(a[i], b[i])) return false
i = i + 1
}
return true
}
if (is_object(a) && is_object(b)) {
ka = array(a)
kb = array(b)
if (length(ka) != length(kb)) return false
i = 0
while (i < length(ka)) {
if (!values_equal(a[ka[i]], b[ka[i]])) return false
i = i + 1
}
return true
}
return false
}
function describe(val) {
if (is_null(val)) return "null"
if (is_text(val)) return `"${val}"`
if (is_number(val)) return text(val)
if (is_logical(val)) return text(val)
if (is_function(val)) return "<function>"
if (is_array(val)) return `[array length=${text(length(val))}]`
if (is_object(val)) return `{record keys=${text(length(array(val)))}}`
return "<unknown>"
}
// Run a single test file through both paths
function diff_test_file(file_path) {
var mod_path = text(file_path, 0, -3)
var src_path = fd.realpath('.') + '/' + file_path
var src = null
var ast = null
var mod_opt = null
var mod_noopt = null
var results = {file: file_path, tests: [], passed: 0, failed: 0, errors: []}
var use_pkg = fd.realpath('.')
var opt_error = null
var noopt_error = null
var keys = null
var i = 0
var k = null
var opt_result = null
var noopt_result = null
var opt_err = null
var noopt_err = null
var _run_one_opt = null
var _run_one_noopt = null
// Build env for module loading
var make_env = function() {
return {
use: function(path) {
return shop.use(path, use_pkg)
}
}
}
// Read and parse
var _read = function() {
src = text(fd.slurp(src_path))
ast = analyze(src, src_path)
} disruption {
push(results.errors, `failed to parse ${file_path}`)
return results
}
_read()
if (length(results.errors) > 0) return results
// Run optimized
var _run_opt = function() {
mod_opt = run_ast_fn(mod_path, ast, make_env())
} disruption {
opt_error = "disrupted"
}
_run_opt()
// Run unoptimized
var _run_noopt = function() {
mod_noopt = run_ast_noopt_fn(mod_path, ast, make_env())
} disruption {
noopt_error = "disrupted"
}
_run_noopt()
// Compare module-level behavior
if (opt_error != noopt_error) {
push(results.errors, `module load mismatch: opt=${opt_error != null ? opt_error : "ok"} noopt=${noopt_error != null ? noopt_error : "ok"}`)
results.failed = results.failed + 1
return results
}
if (opt_error != null) {
// Both disrupted during load — that's consistent
results.passed = results.passed + 1
push(results.tests, {name: "<module>", status: "passed"})
return results
}
// If module returns a record of functions, test each one
if (is_object(mod_opt) && is_object(mod_noopt)) {
keys = array(mod_opt)
while (i < length(keys)) {
k = keys[i]
if (is_function(mod_opt[k]) && is_function(mod_noopt[k])) {
opt_result = null
noopt_result = null
opt_err = null
noopt_err = null
_run_one_opt = function() {
opt_result = mod_opt[k]()
} disruption {
opt_err = "disrupted"
}
_run_one_opt()
_run_one_noopt = function() {
noopt_result = mod_noopt[k]()
} disruption {
noopt_err = "disrupted"
}
_run_one_noopt()
if (opt_err != noopt_err) {
push(results.tests, {name: k, status: "failed"})
push(results.errors, `${k}: disruption mismatch opt=${opt_err != null ? opt_err : "ok"} noopt=${noopt_err != null ? noopt_err : "ok"}`)
results.failed = results.failed + 1
} else if (!values_equal(opt_result, noopt_result)) {
push(results.tests, {name: k, status: "failed"})
push(results.errors, `${k}: result mismatch opt=${describe(opt_result)} noopt=${describe(noopt_result)}`)
results.failed = results.failed + 1
} else {
push(results.tests, {name: k, status: "passed"})
results.passed = results.passed + 1
}
}
i = i + 1
}
} else {
// Compare direct return values
if (!values_equal(mod_opt, mod_noopt)) {
push(results.tests, {name: "<return>", status: "failed"})
push(results.errors, `return value mismatch: opt=${describe(mod_opt)} noopt=${describe(mod_noopt)}`)
results.failed = results.failed + 1
} else {
push(results.tests, {name: "<return>", status: "passed"})
results.passed = results.passed + 1
}
}
return results
}
// Main
var test_files = collect_tests(target_test)
log.console(`Differential testing: ${text(length(test_files))} file(s)`)
var total_passed = 0
var total_failed = 0
var i = 0
var result = null
var j = 0
while (i < length(test_files)) {
result = diff_test_file(test_files[i])
log.console(` ${result.file}: ${text(result.passed)} passed, ${text(result.failed)} failed`)
j = 0
while (j < length(result.errors)) {
log.console(` MISMATCH: ${result.errors[j]}`)
j = j + 1
}
total_passed = total_passed + result.passed
total_failed = total_failed + result.failed
i = i + 1
}
log.console(`----------------------------------------`)
log.console(`Diff: ${text(total_passed)} passed, ${text(total_failed)} failed, ${text(total_passed + total_failed)} total`)
if (total_failed > 0) {
log.console(`DIFFERENTIAL FAILURES DETECTED`)
}
$stop()

View File

@@ -34,6 +34,7 @@ pit hello
- [**Actors and Modules**](/docs/actors/) — the execution model
- [**Requestors**](/docs/requestors/) — asynchronous composition
- [**Packages**](/docs/packages/) — code organization and sharing
- [**Shop Architecture**](/docs/shop/) — module resolution, compilation, and caching
## Reference
@@ -56,6 +57,7 @@ Modules loaded with `use()`:
## Tools
- [**Command Line**](/docs/cli/) — the `pit` tool
- [**Testing**](/docs/testing/) — writing and running tests
- [**Writing C Modules**](/docs/c-modules/) — native extensions
## Architecture

View File

@@ -78,12 +78,13 @@ pit build
### pit test
Run tests.
Run tests. See [Testing](/docs/testing/) for the full guide.
```bash
pit test # run tests in current package
pit test all # run all tests
pit test <package> # run tests in specific package
pit test suite --verify --diff # with IR verification and differential testing
```
### pit link

264
docs/compiler-tools.md Normal file
View File

@@ -0,0 +1,264 @@
---
title: "Compiler Inspection Tools"
description: "Tools for inspecting and debugging the compiler pipeline"
weight: 50
type: "docs"
---
ƿit includes a set of tools for inspecting the compiler pipeline at every stage. These are useful for debugging, testing optimizations, and understanding what the compiler does with your code.
## Pipeline Overview
The compiler runs in stages:
```
source → tokenize → parse → fold → mcode → streamline → output
```
Each stage has a corresponding dump tool that lets you see its output.
| Stage | Tool | What it shows |
|-------------|-------------------|----------------------------------------|
| fold | `dump_ast.cm` | Folded AST as JSON |
| mcode | `dump_mcode.cm` | Raw mcode IR before optimization |
| streamline | `dump_stream.cm` | Before/after instruction counts + IR |
| streamline | `dump_types.cm` | Optimized IR with type annotations |
| streamline | `streamline.ce` | Full optimized IR as JSON |
| all | `ir_report.ce` | Structured optimizer flight recorder |
All tools take a source file as input and run the pipeline up to the relevant stage.
## Quick Start
```bash
# see raw mcode IR
./cell --core . dump_mcode.cm myfile.ce
# see what the optimizer changed
./cell --core . dump_stream.cm myfile.ce
# full optimizer report with events
./cell --core . ir_report.ce --full myfile.ce
```
## dump_ast.cm
Prints the folded AST as JSON. This is the output of the parser and constant folder, before mcode generation.
```bash
./cell --core . dump_ast.cm <file.ce|file.cm>
```
## dump_mcode.cm
Prints the raw mcode IR before any optimization. Shows the instruction array as formatted text with opcode, operands, and program counter.
```bash
./cell --core . dump_mcode.cm <file.ce|file.cm>
```
## dump_stream.cm
Shows a before/after comparison of the optimizer. For each function, prints:
- Instruction count before and after
- Number of eliminated instructions
- The streamlined IR (nops hidden by default)
```bash
./cell --core . dump_stream.cm <file.ce|file.cm>
```
## dump_types.cm
Shows the optimized IR with type annotations. Each instruction is followed by the known types of its slot operands, inferred by walking the instruction stream.
```bash
./cell --core . dump_types.cm <file.ce|file.cm>
```
## streamline.ce
Runs the full pipeline (tokenize, parse, fold, mcode, streamline) and outputs the optimized IR as JSON. Useful for piping to `jq` or saving for comparison.
```bash
./cell --core . streamline.ce <file.ce|file.cm>
```
## ir_report.ce
The optimizer flight recorder. Runs the full pipeline with structured logging and outputs machine-readable, diff-friendly JSON. This is the most detailed tool for understanding what the optimizer did and why.
```bash
./cell --core . ir_report.ce [options] <file.ce|file.cm>
```
### Options
| Flag | Description |
|------|-------------|
| `--summary` | Per-pass JSON summaries with instruction counts and timing (default) |
| `--events` | Include rewrite events showing each optimization applied |
| `--types` | Include type delta records showing inferred slot types |
| `--ir-before=PASS` | Print canonical IR before a specific pass |
| `--ir-after=PASS` | Print canonical IR after a specific pass |
| `--ir-all` | Print canonical IR before and after all passes |
| `--full` | Everything: summary + events + types + ir-all |
With no flags, `--summary` is the default.
### Output Format
Output is line-delimited JSON. Each line is a self-contained JSON object with a `type` field:
**`type: "pass"`** — Per-pass summary with categorized instruction counts before and after:
```json
{
"type": "pass",
"pass": "eliminate_type_checks",
"fn": "fib",
"ms": 0.12,
"changed": true,
"before": {"instr": 77, "nop": 0, "guard": 16, "branch": 28, ...},
"after": {"instr": 77, "nop": 1, "guard": 15, "branch": 28, ...},
"changes": {"guards_removed": 1, "nops_added": 1}
}
```
**`type: "event"`** — Individual rewrite event with before/after instructions and reasoning:
```json
{
"type": "event",
"pass": "eliminate_type_checks",
"rule": "incompatible_type_forces_jump",
"at": 3,
"before": [["is_int", 5, 2, 4, 9], ["jump_false", 5, "rel_ni_2", 4, 9]],
"after": ["_nop_tc_1", ["jump", "rel_ni_2", 4, 9]],
"why": {"slot": 2, "known_type": "float", "checked_type": "int"}
}
```
**`type: "types"`** — Inferred type information for a function:
```json
{
"type": "types",
"fn": "fib",
"param_types": {},
"slot_types": {"25": "null"}
}
```
**`type: "ir"`** — Canonical IR text for a function at a specific point:
```json
{
"type": "ir",
"when": "before",
"pass": "all",
"fn": "fib",
"text": "fn fib (args=1, slots=26)\n @0 access s2, 2\n ..."
}
```
### Rewrite Rules
Each pass records events with named rules:
**eliminate_type_checks:**
- `known_type_eliminates_guard` — type already known, guard removed
- `incompatible_type_forces_jump` — type conflicts, conditional jump becomes unconditional
- `num_subsumes_int_float` — num check satisfied by int or float
- `dynamic_to_field` — load_dynamic/store_dynamic narrowed to field access
- `dynamic_to_index` — load_dynamic/store_dynamic narrowed to index access
**simplify_algebra:**
- `add_zero`, `sub_zero`, `mul_one`, `div_one` — identity operations become moves
- `mul_zero` — multiplication by zero becomes constant
- `self_eq`, `self_ne` — same-slot comparisons become constants
**simplify_booleans:**
- `not_jump_false_fusion` — not + jump_false fused into jump_true
- `not_jump_true_fusion` — not + jump_true fused into jump_false
- `double_not` — not + not collapsed to move
**eliminate_moves:**
- `self_move` — move to same slot becomes nop
**eliminate_dead_jumps:**
- `jump_to_next` — jump to immediately following label becomes nop
### Canonical IR Format
The `--ir-all`, `--ir-before`, and `--ir-after` flags produce a deterministic text representation of the IR:
```
fn fib (args=1, slots=26)
@0 access s2, 2
@1 is_int s4, s1 ; [guard]
@2 jump_false s4, "rel_ni_2" ; [branch]
@3 --- nop (tc) ---
@4 jump "rel_ni_2" ; [branch]
@5 lt_int s3, s1, s2
@6 jump "rel_done_4" ; [branch]
rel_ni_2:
@8 is_num s4, s1 ; [guard]
```
Properties:
- `@N` is the raw array index, stable across passes (passes replace, never insert or delete)
- `sN` prefix distinguishes slot operands from literal values
- String operands are quoted
- Labels appear as indented headers with a colon
- Category tags in brackets: `[guard]`, `[branch]`, `[load]`, `[store]`, `[call]`, `[arith]`, `[move]`, `[const]`
- Nops shown as `--- nop (reason) ---` with reason codes: `tc` (type check), `bl` (boolean), `mv` (move), `dj` (dead jump), `ur` (unreachable)
### Examples
```bash
# what passes changed something?
./cell --core . ir_report.ce --summary myfile.ce | jq 'select(.changed)'
# list all rewrite rules that fired
./cell --core . ir_report.ce --events myfile.ce | jq 'select(.type == "event") | .rule'
# diff IR before and after optimization
./cell --core . ir_report.ce --ir-all myfile.ce | jq -r 'select(.type == "ir") | .text'
# full report for analysis
./cell --core . ir_report.ce --full myfile.ce > report.json
```
## ir_stats.cm
A utility module used by `ir_report.ce` and available for custom tooling. Not a standalone tool.
```javascript
var ir_stats = use("ir_stats")
ir_stats.detailed_stats(func) // categorized instruction counts
ir_stats.ir_fingerprint(func) // djb2 hash of instruction array
ir_stats.canonical_ir(func, name, opts) // deterministic text representation
ir_stats.type_snapshot(slot_types) // frozen copy of type map
ir_stats.type_delta(before_types, after_types) // compute type changes
ir_stats.category_tag(op) // classify an opcode
```
### Instruction Categories
`detailed_stats` classifies each instruction into one of these categories:
| Category | Opcodes |
|----------|---------|
| load | `load_field`, `load_index`, `load_dynamic`, `get`, `access` (non-constant) |
| store | `store_field`, `store_index`, `store_dynamic`, `set_var`, `put`, `push` |
| branch | `jump`, `jump_true`, `jump_false`, `jump_not_null` |
| call | `invoke`, `goinvoke` |
| guard | `is_int`, `is_text`, `is_num`, `is_bool`, `is_null`, `is_array`, `is_func`, `is_record`, `is_stone` |
| arith | `add_int`, `sub_int`, ..., `add_float`, ..., `concat`, `neg_int`, `neg_float`, bitwise ops |
| move | `move` |
| const | `int`, `true`, `false`, `null`, `access` (with constant value) |
| label | string entries that are not nops |
| nop | strings starting with `_nop_` |
| other | everything else (`frame`, `setarg`, `array`, `record`, `function`, `return`, etc.) |

169
docs/shop.md Normal file
View File

@@ -0,0 +1,169 @@
---
title: "Shop Architecture"
description: "How the shop resolves, compiles, caches, and loads modules"
weight: 35
type: "docs"
---
The shop is the module resolution and loading engine behind `use()`. It handles finding modules, compiling them, caching the results, and loading C extensions. The shop lives in `internal/shop.cm`.
## Startup Pipeline
When `pit` runs a program, three layers bootstrap in sequence:
```
bootstrap.cm → engine.cm → shop.cm → user program
```
**bootstrap.cm** loads the compiler toolchain (tokenize, parse, fold, mcode, streamline) from pre-compiled bytecode. It defines `analyze()` (source to AST) and `compile_to_blob()` (AST to binary blob). It then loads engine.cm.
**engine.cm** creates the actor runtime (`$_`), defines `use_core()` for loading core modules, and populates the environment that shop receives. It then loads shop.cm via `use_core('internal/shop')`.
**shop.cm** receives its dependencies through the module environment — `analyze`, `run_ast_fn`, `use_cache`, `shop_path`, `runtime_env`, `content_hash`, `cache_path`, and others. It defines `Shop.use()`, which is the function behind every `use()` call in user code.
## Module Resolution
When `use('path')` is called from a package context, the shop resolves the module through a multi-layer search. Both the `.cm` script file and C symbol are resolved independently, and the one with the narrowest scope wins.
### Resolution Order
For a call like `use('sprite')` from package `myapp`:
1. **Own package**`~/.pit/packages/myapp/sprite.cm` and C symbol `js_myapp_sprite_use`
2. **Aliased dependencies** — if `myapp/pit.toml` has `renderer = "gitea.pockle.world/john/renderer"`, checks `renderer/sprite.cm` and its C symbols
3. **Core** — built-in core modules and internal C symbols
For calls without a package context (from core modules), only core is searched.
### Private Modules
Paths starting with `internal/` are private to their package:
```javascript
use('internal/helpers') // OK from within the same package
// Cannot be accessed from other packages
```
### Explicit Package Imports
Paths containing a dot in the first component are treated as explicit package references:
```javascript
use('gitea.pockle.world/john/renderer/sprite')
// Resolves directly to the renderer package's sprite.cm
```
## Compilation and Caching
Every module goes through a content-addressed caching pipeline. The cache key is the BLAKE2 hash of the source content, so changing the source automatically invalidates the cache.
### Cache Hierarchy
When loading a module, the shop checks (in order):
1. **In-memory cache**`use_cache[key]`, checked first on every `use()` call
2. **Native dylib** — pre-compiled platform-specific `.dylib` in the content-addressed store
3. **Cached .mach blob** — binary bytecode in `~/.pit/build/<hash>.mach`
4. **Cached .mcode IR** — JSON IR in `~/.pit/build/<hash>.mcode`
5. **Adjacent .mach/.mcode** — files alongside the source (e.g., `sprite.mach`)
6. **Source compilation** — full pipeline: analyze, mcode, streamline, serialize
Results from steps 4-6 are cached back to the content-addressed store for future loads.
### Content-Addressed Store
All cached artifacts live in `~/.pit/build/` named by the BLAKE2 hash of their source content:
```
~/.pit/build/
├── a1b2c3d4...mach # compiled bytecode blob
├── e5f6a7b8...mach # another compiled module
├── c9d0e1f2...mcode # cached JSON IR
└── f3a4b5c6...macos_arm64.dylib # native compiled module
```
This scheme provides automatic cache invalidation: when source changes, its hash changes, and the old cache entry is simply never looked up again.
### Core Module Caching
Core modules loaded via `use_core()` in engine.cm follow the same pattern. On first startup after a fresh install, core modules are compiled from `.cm.mcode` JSON IR and cached as `.mach` blobs. Subsequent startups load from cache, skipping the JSON parse and compile steps entirely.
User scripts (`.ce` files) are also cached. The first run compiles and caches; subsequent runs with unchanged source load from cache.
## C Extension Resolution
C extensions are resolved alongside script modules. A C module is identified by a symbol name derived from the package and file name:
```
package: gitea.pockle.world/john/prosperon
file: sprite.c
symbol: js_gitea_pockle_world_john_prosperon_sprite_use
```
### C Resolution Sources
1. **Internal symbols** — statically linked into the `pit` binary (core modules)
2. **Per-module dylibs** — loaded from `~/.pit/lib/` via a manifest file
### Manifest Files
Each package with C extensions has a manifest at `~/.pit/lib/<package>.manifest.json` mapping symbol names to dylib paths:
```json
{
"js_mypackage_render_use": "/Users/john/.pit/lib/mypackage_render.dylib",
"js_mypackage_audio_use": "/Users/john/.pit/lib/mypackage_audio.dylib"
}
```
The shop loads manifests lazily on first access and caches them.
### Combined Resolution
When both a `.cm` script and a C symbol exist for the same module name, both are resolved. The C module is loaded first (as the base), then the `.cm` script can extend it:
```javascript
// render.cm — extends the C render module
var c_render = use('internal/render_c')
// Add ƿit-level helpers on top of C functions
return record(c_render, {
draw_circle: function(x, y, r) { /* ... */ }
})
```
## Environment Injection
When a module is loaded, the shop builds an `env` object that becomes the module's set of free variables. This includes:
- **Runtime functions** — `logical`, `some`, `every`, `starts_with`, `ends_with`, `is_actor`, `log`, `send`, `fallback`, `parallel`, `race`, `sequence`
- **Capability injections** — actor intrinsics like `$self`, `$delay`, `$start`, `$receiver`, `$fd`, etc.
- **`use` function** — scoped to the module's package context
The set of injected capabilities is controlled by `script_inject_for()`, which can be tuned per package or file.
## Shop Directory Layout
```
~/.pit/
├── packages/ # installed packages (directories and symlinks)
│ └── core -> ... # symlink to the ƿit core
├── lib/ # compiled C extension dylibs + manifests
├── build/ # content-addressed compilation cache
│ ├── <hash>.mach # cached bytecode blobs
│ ├── <hash>.mcode # cached JSON IR
│ └── <hash>.<target>.dylib # native compiled modules
├── cache/ # downloaded package zip archives
├── lock.toml # installed package versions and commit hashes
└── link.toml # local development link overrides
```
## Key Files
| File | Role |
|------|------|
| `internal/bootstrap.cm` | Loads compiler, defines `analyze()` and `compile_to_blob()` |
| `internal/engine.cm` | Actor runtime, `use_core()`, environment setup |
| `internal/shop.cm` | Module resolution, compilation, caching, C extension loading |
| `internal/os.c` | OS intrinsics: dylib ops, internal symbol lookup, embedded modules |
| `package.cm` | Package directory detection, alias resolution, file listing |
| `link.cm` | Development link management (link.toml read/write) |

3
docs/spec/.pages Normal file
View File

@@ -0,0 +1,3 @@
nav:
- pipeline.md
- mcode.md

View File

@@ -1,11 +1,13 @@
---
title: "Register VM"
description: "Register-based virtual machine (Mach)"
description: "Binary encoding of the Mach bytecode interpreter"
---
## Overview
The Mach VM is a register-based virtual machine using 32-bit instructions. It is modeled after Lua's register VM — operands are register indices rather than stack positions, reducing instruction count and improving performance.
The Mach VM is a register-based virtual machine that directly interprets the [Mcode IR](mcode.md) instruction set as compact 32-bit binary bytecode. It is modeled after Lua's register VM — operands are register indices rather than stack positions, reducing instruction count and improving performance.
The Mach serializer (`mach.c`) converts streamlined mcode JSON into binary instructions. Since the Mach bytecode is a direct encoding of the mcode, the [Mcode IR](mcode.md) reference is the authoritative instruction set documentation.
## Instruction Formats
@@ -45,95 +47,12 @@ Used for unconditional jumps with a 24-bit signed offset.
## Registers
Each function frame has a fixed number of register slots, determined at compile time. Registers hold:
Each function frame has a fixed number of register slots, determined at compile time:
- **R(0)** — `this` binding
- **R(1)..R(arity)** — function arguments
- **R(arity+1)..** — local variables and temporaries
## Instruction Set
### Loading
| Opcode | Format | Description |
|--------|--------|-------------|
| `LOADK` | iABx | `R(A) = K(Bx)` — load from constant pool |
| `LOADI` | iAsBx | `R(A) = sBx` — load small integer |
| `LOADNULL` | iA | `R(A) = null` |
| `LOADTRUE` | iA | `R(A) = true` |
| `LOADFALSE` | iA | `R(A) = false` |
| `MOVE` | iABC | `R(A) = R(B)` — register copy |
### Arithmetic
| Opcode | Format | Description |
|--------|--------|-------------|
| `ADD` | iABC | `R(A) = R(B) + R(C)` |
| `SUB` | iABC | `R(A) = R(B) - R(C)` |
| `MUL` | iABC | `R(A) = R(B) * R(C)` |
| `DIV` | iABC | `R(A) = R(B) / R(C)` |
| `MOD` | iABC | `R(A) = R(B) % R(C)` |
| `POW` | iABC | `R(A) = R(B) ^ R(C)` |
| `NEG` | iABC | `R(A) = -R(B)` |
| `INC` | iABC | `R(A) = R(B) + 1` |
| `DEC` | iABC | `R(A) = R(B) - 1` |
### Comparison
| Opcode | Format | Description |
|--------|--------|-------------|
| `EQ` | iABC | `R(A) = R(B) == R(C)` |
| `NEQ` | iABC | `R(A) = R(B) != R(C)` |
| `LT` | iABC | `R(A) = R(B) < R(C)` |
| `LE` | iABC | `R(A) = R(B) <= R(C)` |
| `GT` | iABC | `R(A) = R(B) > R(C)` |
| `GE` | iABC | `R(A) = R(B) >= R(C)` |
### Property Access
| Opcode | Format | Description |
|--------|--------|-------------|
| `GETFIELD` | iABC | `R(A) = R(B)[K(C)]` — named property |
| `SETFIELD` | iABC | `R(A)[K(B)] = R(C)` — set named property |
| `GETINDEX` | iABC | `R(A) = R(B)[R(C)]` — computed property |
| `SETINDEX` | iABC | `R(A)[R(B)] = R(C)` — set computed property |
### Variable Resolution
| Opcode | Format | Description |
|--------|--------|-------------|
| `GETNAME` | iABx | Unresolved variable (compiler placeholder) |
| `GETINTRINSIC` | iABx | Global intrinsic / built-in |
| `GETENV` | iABx | Module environment variable |
| `GETUP` | iABC | `R(A) = UpFrame(B).slots[C]` — closure upvalue |
| `SETUP` | iABC | `UpFrame(A).slots[B] = R(C)` — set closure upvalue |
### Control Flow
| Opcode | Format | Description |
|--------|--------|-------------|
| `JMP` | isJ | Unconditional jump |
| `JMPTRUE` | iAsBx | Jump if `R(A)` is true |
| `JMPFALSE` | iAsBx | Jump if `R(A)` is false |
| `JMPNULL` | iAsBx | Jump if `R(A)` is null |
### Function Calls
| Opcode | Format | Description |
|--------|--------|-------------|
| `CALL` | iABC | Call `R(A)` with `B` args starting at `R(A+1)`, `C`=keep result |
| `RETURN` | iA | Return `R(A)` |
| `RETNIL` | — | Return null |
| `CLOSURE` | iABx | Create closure from function pool entry `Bx` |
### Object / Array
| Opcode | Format | Description |
|--------|--------|-------------|
| `NEWOBJECT` | iA | `R(A) = {}` |
| `NEWARRAY` | iABC | `R(A) = array(B)` |
| `PUSH` | iABC | Push `R(B)` to array `R(A)` |
## JSCodeRegister
The compiled output for a function:
@@ -149,7 +68,7 @@ struct JSCodeRegister {
uint32_t func_count; // nested function count
JSCodeRegister **functions; // nested function table
JSValue name; // function name
uint16_t disruption_pc; // exception handler offset
uint16_t disruption_pc; // disruption handler offset
};
```

View File

@@ -1,23 +1,287 @@
---
title: "Mcode IR"
description: "JSON-based intermediate representation"
description: "Instruction set reference for the JSON-based intermediate representation"
---
## Overview
Mcode is a JSON-based intermediate representation that can be interpreted directly. It represents the same operations as the Mach register VM but uses string-based instruction dispatch rather than binary opcodes. Mcode is intended as an intermediate step toward native code compilation.
## Pipeline
Mcode is the intermediate representation at the center of the ƿit compilation pipeline. All source code is lowered to mcode before execution or native compilation. The mcode instruction set is the **authoritative reference** for the operations supported by the ƿit runtime — the Mach VM bytecode is a direct binary encoding of these same instructions.
```
Source → Tokenize → Parse (AST) → Fold → Mcode (JSON) → Streamline → Mach VM (default)
→ Mcode Interpreter
→ QBE → Native
Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine
```
Mcode is produced by `mcode.cm`, which lowers the folded AST to JSON instruction arrays. The streamline optimizer (`streamline.cm`) then eliminates redundant operations. The result is serialized to binary bytecode by the Mach compiler (`mach.c`), interpreted directly by `mcode.c`, or lowered to QBE IL by `qbe_emit.cm` for native compilation. See [Compilation Pipeline](pipeline.md) for the full overview.
Mcode is produced by `mcode.cm`, optimized by `streamline.cm`, then either serialized to 32-bit bytecode for the Mach VM (`mach.c`), or lowered to QBE/LLVM IL for native compilation (`qbe_emit.cm`). See [Compilation Pipeline](pipeline.md) for the full overview.
### Function Proxy Decomposition
## Module Structure
An `.mcode` file is a JSON object representing a compiled module:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Module name (typically the source filename) |
| `filename` | string | Source filename |
| `data` | object | Constant pool — string and number literals used by instructions |
| `main` | function | The top-level function (module body) |
| `functions` | array | Nested function definitions (referenced by `function dest, id`) |
### Function Record
Each function (both `main` and entries in `functions`) has:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Function name (`"<anonymous>"` for lambdas) |
| `filename` | string | Source filename |
| `nr_args` | integer | Number of parameters |
| `nr_slots` | integer | Total register slots needed (args + locals + temporaries) |
| `nr_close_slots` | integer | Number of closure slots captured from parent scope |
| `disruption_pc` | integer | Instruction index of the disruption handler (0 if none) |
| `instructions` | array | Instruction arrays and label strings |
Slot 0 is reserved. Slots 1 through `nr_args` hold parameters. Remaining slots up to `nr_slots - 1` are locals and temporaries.
## Instruction Format
Each instruction is a JSON array. The first element is the instruction name (string), followed by operands. The last two elements are line and column numbers for source mapping:
```json
["add_int", dest, a, b, line, col]
["load_field", dest, obj, "key", line, col]
["jump", "label_name"]
```
Operands are register slot numbers (integers), constant values (strings, numbers), or label names (strings).
## Instruction Reference
### Loading and Constants
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `access` | `dest, name` | Load variable by name (intrinsic or environment) |
| `int` | `dest, value` | Load integer constant |
| `true` | `dest` | Load boolean `true` |
| `false` | `dest` | Load boolean `false` |
| `null` | `dest` | Load `null` |
| `move` | `dest, src` | Copy register value |
| `function` | `dest, id` | Load nested function by index |
| `regexp` | `dest, pattern` | Create regexp object |
### Arithmetic — Integer
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `add_int` | `dest, a, b` | `dest = a + b` (integer) |
| `sub_int` | `dest, a, b` | `dest = a - b` (integer) |
| `mul_int` | `dest, a, b` | `dest = a * b` (integer) |
| `div_int` | `dest, a, b` | `dest = a / b` (integer) |
| `mod_int` | `dest, a, b` | `dest = a % b` (integer) |
| `neg_int` | `dest, src` | `dest = -src` (integer) |
### Arithmetic — Float
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `add_float` | `dest, a, b` | `dest = a + b` (float) |
| `sub_float` | `dest, a, b` | `dest = a - b` (float) |
| `mul_float` | `dest, a, b` | `dest = a * b` (float) |
| `div_float` | `dest, a, b` | `dest = a / b` (float) |
| `mod_float` | `dest, a, b` | `dest = a % b` (float) |
| `neg_float` | `dest, src` | `dest = -src` (float) |
### Arithmetic — Generic
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `pow` | `dest, a, b` | `dest = a ^ b` (exponentiation) |
### Text
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `concat` | `dest, a, b` | `dest = a ~ b` (text concatenation) |
### Comparison — Integer
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_int` | `dest, a, b` | `dest = a == b` (integer) |
| `ne_int` | `dest, a, b` | `dest = a != b` (integer) |
| `lt_int` | `dest, a, b` | `dest = a < b` (integer) |
| `le_int` | `dest, a, b` | `dest = a <= b` (integer) |
| `gt_int` | `dest, a, b` | `dest = a > b` (integer) |
| `ge_int` | `dest, a, b` | `dest = a >= b` (integer) |
### Comparison — Float
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_float` | `dest, a, b` | `dest = a == b` (float) |
| `ne_float` | `dest, a, b` | `dest = a != b` (float) |
| `lt_float` | `dest, a, b` | `dest = a < b` (float) |
| `le_float` | `dest, a, b` | `dest = a <= b` (float) |
| `gt_float` | `dest, a, b` | `dest = a > b` (float) |
| `ge_float` | `dest, a, b` | `dest = a >= b` (float) |
### Comparison — Text
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_text` | `dest, a, b` | `dest = a == b` (text) |
| `ne_text` | `dest, a, b` | `dest = a != b` (text) |
| `lt_text` | `dest, a, b` | `dest = a < b` (lexicographic) |
| `le_text` | `dest, a, b` | `dest = a <= b` (lexicographic) |
| `gt_text` | `dest, a, b` | `dest = a > b` (lexicographic) |
| `ge_text` | `dest, a, b` | `dest = a >= b` (lexicographic) |
### Comparison — Boolean
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_bool` | `dest, a, b` | `dest = a == b` (boolean) |
| `ne_bool` | `dest, a, b` | `dest = a != b` (boolean) |
### Comparison — Special
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `is_identical` | `dest, a, b` | Object identity check (same reference) |
| `eq_tol` | `dest, a, b` | Equality with tolerance |
| `ne_tol` | `dest, a, b` | Inequality with tolerance |
### Type Checks
Inlined from intrinsic function calls. Each sets `dest` to `true` or `false`.
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `is_int` | `dest, src` | Check if integer |
| `is_num` | `dest, src` | Check if number (integer or float) |
| `is_text` | `dest, src` | Check if text |
| `is_bool` | `dest, src` | Check if logical |
| `is_null` | `dest, src` | Check if null |
| `is_array` | `dest, src` | Check if array |
| `is_func` | `dest, src` | Check if function |
| `is_record` | `dest, src` | Check if record (object) |
| `is_stone` | `dest, src` | Check if stone (immutable) |
| `is_proxy` | `dest, src` | Check if function proxy (arity 2) |
### Logical
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `not` | `dest, src` | Logical NOT |
| `and` | `dest, a, b` | Logical AND |
| `or` | `dest, a, b` | Logical OR |
### Bitwise
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `bitand` | `dest, a, b` | Bitwise AND |
| `bitor` | `dest, a, b` | Bitwise OR |
| `bitxor` | `dest, a, b` | Bitwise XOR |
| `bitnot` | `dest, src` | Bitwise NOT |
| `shl` | `dest, a, b` | Shift left |
| `shr` | `dest, a, b` | Arithmetic shift right |
| `ushr` | `dest, a, b` | Unsigned shift right |
### Property Access
Memory operations come in typed variants. The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold.
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `load_field` | `dest, obj, key` | Load record property by string key |
| `store_field` | `obj, val, key` | Store record property by string key |
| `load_index` | `dest, obj, idx` | Load array element by integer index |
| `store_index` | `obj, val, idx` | Store array element by integer index |
| `load_dynamic` | `dest, obj, key` | Load property (dispatches at runtime) |
| `store_dynamic` | `obj, val, key` | Store property (dispatches at runtime) |
| `delete` | `obj, key` | Delete property |
| `in` | `dest, obj, key` | Check if property exists |
| `length` | `dest, src` | Get length of array or text |
### Object and Array Construction
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `record` | `dest` | Create empty record `{}` |
| `array` | `dest, n` | Create empty array (elements added via `push`) |
| `push` | `arr, val` | Push value to array |
| `pop` | `dest, arr` | Pop value from array |
### Function Calls
Function calls are decomposed into three instructions:
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `frame` | `dest, fn, argc` | Allocate call frame for `fn` with `argc` arguments |
| `setarg` | `frame, idx, val` | Set argument `idx` in call frame |
| `invoke` | `frame, result` | Execute the call, store result |
| `goframe` | `dest, fn, argc` | Allocate frame for async/concurrent call |
| `goinvoke` | `frame, result` | Invoke async/concurrent call |
### Variable Resolution
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `access` | `dest, name` | Load variable (intrinsic or module environment) |
| `set_var` | `name, src` | Set top-level variable by name |
| `get` | `dest, level, slot` | Get closure variable from parent scope |
| `put` | `level, slot, src` | Set closure variable in parent scope |
### Control Flow
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `LABEL` | `name` | Define a named label (not executed) |
| `jump` | `label` | Unconditional jump |
| `jump_true` | `cond, label` | Jump if `cond` is true |
| `jump_false` | `cond, label` | Jump if `cond` is false |
| `jump_not_null` | `val, label` | Jump if `val` is not null |
| `return` | `src` | Return value from function |
| `disrupt` | — | Trigger disruption (error) |
## Typed Instruction Design
A key design principle of mcode is that **every type check is an explicit instruction**. Arithmetic and comparison operations come in type-specialized variants (`add_int`, `add_float`, `eq_text`, etc.) rather than a single polymorphic instruction.
When type information is available from the fold stage, the compiler emits the typed variant directly. When the type is unknown, the compiler emits a type-check/dispatch pattern:
```json
["is_int", check, a]
["jump_false", check, "float_path"]
["add_int", dest, a, b]
["jump", "done"]
["LABEL", "float_path"]
["add_float", dest, a, b]
["LABEL", "done"]
```
The [Streamline Optimizer](streamline.md) eliminates dead branches when types are statically known, collapsing the dispatch to a single typed instruction.
## Intrinsic Inlining
The mcode compiler recognizes calls to built-in intrinsic functions and emits direct opcodes instead of the generic frame/setarg/invoke call sequence:
| Source call | Emitted instruction |
|-------------|-------------------|
| `is_array(x)` | `is_array dest, src` |
| `is_function(x)` | `is_func dest, src` |
| `is_object(x)` | `is_record dest, src` |
| `is_stone(x)` | `is_stone dest, src` |
| `is_integer(x)` | `is_int dest, src` |
| `is_text(x)` | `is_text dest, src` |
| `is_number(x)` | `is_num dest, src` |
| `is_logical(x)` | `is_bool dest, src` |
| `is_null(x)` | `is_null dest, src` |
| `length(x)` | `length dest, src` |
| `push(arr, val)` | `push arr, val` |
## Function Proxy Decomposition
When the compiler encounters a method call `obj.method(args)`, it emits a branching pattern to handle ƿit's function proxy protocol. An arity-2 function used as a proxy target receives the method name and argument array instead of a normal method call:
@@ -25,9 +289,8 @@ When the compiler encounters a method call `obj.method(args)`, it emits a branch
["is_proxy", check, obj]
["jump_false", check, "record_path"]
// Proxy path: call obj(name, [args...]) with this=null
["access", name_slot, "method"]
["array", args_arr, N, arg0, arg1, ...]
["array", args_arr, N, arg0, arg1]
["null", null_slot]
["frame", f, obj, 2]
["setarg", f, 0, null_slot]
@@ -41,21 +304,38 @@ When the compiler encounters a method call `obj.method(args)`, it emits a branch
["frame", f2, method, N]
["setarg", f2, 0, obj]
["setarg", f2, 1, arg0]
...
["invoke", f2, dest]
["LABEL", "done"]
```
The streamline optimizer can eliminate the dead branch when the type of `obj` is statically known.
## Labels and Control Flow
## JSMCode Structure
Control flow uses named labels instead of numeric offsets:
```json
["LABEL", "loop_start"]
["add_int", 1, 1, 2]
["jump_false", 3, "loop_end"]
["jump", "loop_start"]
["LABEL", "loop_end"]
```
Labels are collected into a name-to-index map during loading, enabling O(1) jump resolution. The Mach serializer converts label names to numeric offsets in the binary bytecode.
## Nop Convention
The streamline optimizer replaces eliminated instructions with nop strings (e.g., `_nop_tc_1`, `_nop_bl_2`). Nop strings are skipped during interpretation and native code emission but preserved in the instruction array to maintain positional stability for jump targets.
## Internal Structures
### JSMCode (Mcode Interpreter)
```c
struct JSMCode {
uint16_t nr_args; // argument count
uint16_t nr_slots; // register count
cJSON **instrs; // pre-flattened instruction array
cJSON **instrs; // instruction array
uint32_t instr_count; // number of instructions
struct {
@@ -70,74 +350,25 @@ struct JSMCode {
cJSON *json_root; // keeps JSON alive
const char *name; // function name
const char *filename; // source file
uint16_t disruption_pc; // exception handler offset
uint16_t disruption_pc; // disruption handler offset
};
```
## Instruction Format
### JSCodeRegister (Mach VM Bytecode)
Each instruction is a JSON array. The first element is the instruction name (string), followed by operands (typically `[op, dest, ...args, line, col]`):
```json
["access", 3, 5, 1, 9]
["load_index", 10, 4, 9, 5, 11]
["store_dynamic", 4, 11, 12, 6, 3]
["frame", 15, 14, 1, 7, 7]
["setarg", 15, 0, 16, 7, 7]
["invoke", 15, 13, 7, 7]
```c
struct JSCodeRegister {
uint16_t arity; // argument count
uint16_t nr_slots; // total register count
uint32_t cpool_count; // constant pool size
JSValue *cpool; // constant pool
uint32_t instr_count; // instruction count
MachInstr32 *instructions; // 32-bit instruction array
uint32_t func_count; // nested function count
JSCodeRegister **functions; // nested function table
JSValue name; // function name
uint16_t disruption_pc; // disruption handler offset
};
```
### Typed Load/Store
Memory operations come in typed variants for optimization:
- `load_index dest, obj, idx` — array element by integer index
- `load_field dest, obj, key` — record property by string key
- `load_dynamic dest, obj, key` — unknown; dispatches at runtime
- `store_index obj, val, idx` — array element store
- `store_field obj, val, key` — record property store
- `store_dynamic obj, val, key` — unknown; dispatches at runtime
The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold.
### Decomposed Calls
Function calls are split into separate instructions:
- `frame dest, fn, argc` — allocate call frame
- `setarg frame, idx, val` — set argument
- `invoke frame, result` — execute the call
## Labels
Control flow uses named labels instead of numeric offsets:
```json
["LABEL", "loop_start"]
["ADD", 1, 1, 2]
["JMPFALSE", 3, "loop_end"]
["JMP", "loop_start"]
["LABEL", "loop_end"]
```
Labels are collected into a name-to-index map during loading, enabling O(1) jump resolution.
## Differences from Mach
| Property | Mcode | Mach |
|----------|-------|------|
| Instructions | cJSON arrays | 32-bit binary |
| Dispatch | String comparison | Switch on opcode byte |
| Constants | Inline in JSON | Separate constant pool |
| Jump targets | Named labels | Numeric offsets |
| Memory | Heap (cJSON nodes) | Off-heap (malloc) |
## Purpose
Mcode serves as an inspectable, debuggable intermediate format:
- **Human-readable** — the JSON representation can be printed and examined
- **Language-independent** — any tool that produces the correct JSON can target the ƿit runtime
- **Compilation target** — the Mach compiler can consume mcode as input, and future native code generators can work from the same representation
The cost of string-based dispatch makes mcode slower than the binary Mach VM, so it is primarily useful during development and as a compilation intermediate rather than for production execution.
The Mach serializer (`mach.c`) converts the JSON mcode into compact 32-bit instructions with a constant pool. See [Register VM](mach.md) for the binary encoding formats.

View File

@@ -5,14 +5,17 @@ description: "Overview of the compilation stages and optimizations"
## Overview
The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. All backends share the same path through mcode and streamline. There are three execution backends: the Mach register VM (default), the Mcode interpreter (debug), and native code via QBE (experimental).
The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. All backends share the same path through mcode and streamline.
```
Source → Tokenize → Parse → Fold → Mcode → Streamline → Mach VM (default)
→ Mcode Interpreter
→ QBE → Native
Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine
```
The final **machine** stage has two targets:
- **Mach VM** — a register-based bytecode interpreter that directly executes the mcode instruction set as compact 32-bit binary
- **Native code** — lowers mcode to QBE or LLVM intermediate language, then compiles to machine code for the target CPU architecture
## Stages
### Tokenize (`tokenize.cm`)
@@ -24,7 +27,8 @@ Splits source text into tokens. Handles string interpolation by re-tokenizing te
Converts tokens into an AST. Also performs semantic analysis:
- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`.
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`, `"null"`.
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`. For `def` variables, type tags are also inferred from usage patterns: push (`x[] = v`) implies array, property access (`x.foo = v`) implies record, integer key implies array, text key implies record.
- **Type error detection**: For `def` variables with known type tags, provably wrong operations are reported as compile errors: property access on arrays, push on non-arrays, text keys on arrays, integer keys on records. Only `def` variables are checked because `var` can be reassigned.
- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`.
- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise.
- **Tail position**: Return statements where the expression is a call get `tail: true`.
@@ -37,8 +41,8 @@ Operates on the AST. Performs constant folding and type analysis:
- **Constant propagation**: Tracks `def` bindings whose values are known constants.
- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites.
- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`.
- **Purity marking**: Stamps `pure: true` on expressions with no side effects (literals, name references, arithmetic on pure operands).
- **Dead code elimination**: Removes unreachable branches when conditions are known constants.
- **Purity analysis**: Expressions with no side effects are marked pure (literals, name references, arithmetic on pure operands, calls to pure intrinsics). The pure intrinsic set contains only `is_*` sensory functions — they are the only intrinsics guaranteed to never disrupt regardless of argument types. Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types and are excluded.
- **Dead code elimination**: Removes unreachable branches when conditions are known constants. Removes unused `var`/`def` declarations with pure initializers. Removes standalone calls to pure intrinsics where the result is discarded.
### Mcode (`mcode.cm`)
@@ -47,57 +51,46 @@ Lowers the AST to a JSON-based intermediate representation with explicit operati
- **Typed load/store**: Emits `load_index` (array by integer), `load_field` (record by string), or `load_dynamic` (unknown) based on type information from fold.
- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call).
- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup.
- **Intrinsic inlining**: Type-check intrinsics (`is_array`, `is_text`, `is_number`, `is_integer`, `is_logical`, `is_null`, `is_function`, `is_object`, `is_stone`), `length`, and `push` are emitted as direct opcodes instead of frame/setarg/invoke call sequences.
- **Disruption handler labels**: When a function has a disruption handler, a label is emitted before the handler code. This allows the streamline optimizer's unreachable code elimination to safely nop dead code after `return` without accidentally eliminating the handler.
- **Tail call marking**: When a return statement's expression is a call and the function has no disruption handler, the final `invoke` is renamed to `tail_invoke`. This marks the call site for future tail call optimization. Functions with disruption handlers cannot use TCO because the handler frame must remain on the stack.
See [Mcode IR](mcode.md) for instruction format details.
See [Mcode IR](mcode.md) for the instruction format and complete instruction reference.
### Streamline (`streamline.cm`)
Optimizes the Mcode IR. Operates per-function:
Optimizes the Mcode IR through a series of independent passes. Operates per-function:
- **Redundant instruction elimination**: Removes no-op patterns and redundant moves.
- **Dead code removal**: Eliminates instructions whose results are never used.
- **Type-based narrowing**: When type information is available, narrows `load_dynamic`/`store_dynamic` to typed variants.
1. **Backward type inference**: Infers parameter types from how they are used in typed operators (`add_int`, `store_index`, `load_field`, `push`, `pop`, etc.). Immutable `def` parameters keep their inferred type across label join points.
2. **Type-check elimination**: When a slot's type is known, eliminates `is_<type>` + conditional jump pairs. Narrows `load_dynamic`/`store_dynamic` to typed variants.
3. **Algebraic simplification**: Rewrites identity operations (add 0, multiply 1, divide 1) and folds same-slot comparisons.
4. **Boolean simplification**: Fuses `not` + conditional jump into a single jump with inverted condition.
5. **Move elimination**: Removes self-moves (`move a, a`).
6. **Unreachable elimination**: Nops dead code after `return` until the next label.
7. **Dead jump elimination**: Removes jumps to the immediately following label.
### QBE Emit (`qbe_emit.cm`)
See [Streamline Optimizer](streamline.md) for detailed pass descriptions.
Lowers optimized Mcode IR to QBE intermediate language for native code compilation. Each Mcode function becomes a QBE function that calls into the cell runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.).
### Machine
String constants are interned in a data section. Integer constants are NaN-boxed inline.
The streamlined mcode is lowered to a machine target for execution.
### QBE Macros (`qbe.cm`)
#### Mach VM (default)
Provides operation implementations as QBE IL templates. Each arithmetic, comparison, and type operation is defined as a function that emits the corresponding QBE instructions, handling type dispatch (integer, float, text paths) with proper guard checks.
## Execution Backends
### Mach VM (default)
Binary 32-bit register VM. The Mach serializer (`mach.c`) converts streamlined mcode JSON into compact 32-bit bytecode with a constant pool. Used for production execution and bootstrapping.
The Mach VM is a register-based virtual machine that directly interprets the mcode instruction set as 32-bit binary bytecode. The Mach serializer (`mach.c`) converts streamlined mcode JSON into compact 32-bit instructions with a constant pool. Since the mach bytecode is a direct encoding of the mcode, the [Mcode IR](mcode.md) reference serves as the authoritative instruction set documentation.
```
./cell script.ce
pit script.ce
```
Debug the mach bytecode output:
#### Native Code (QBE / LLVM)
Lowers the streamlined mcode to QBE or LLVM intermediate language for compilation to native machine code. Each mcode function becomes a native function that calls into the ƿit runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.).
String constants are interned in a data section. Integer constants are encoded inline.
```
./cell --core . --dump-mach script.ce
```
### Mcode Interpreter
JSON-based interpreter. Used for debugging the compilation pipeline.
```
./cell --mcode script.ce
```
### QBE Native (experimental)
Generates QBE IL that can be compiled to native code.
```
./cell --emit-qbe script.ce > output.ssa
pit --emit-qbe script.ce > output.ssa
```
## Files
@@ -113,6 +106,14 @@ Generates QBE IL that can be compiled to native code.
| `qbe.cm` | QBE IL operation templates |
| `internal/bootstrap.cm` | Pipeline orchestrator |
## Debug Tools
| File | Purpose |
|------|---------|
| `dump_mcode.cm` | Print raw Mcode IR before streamlining |
| `dump_stream.cm` | Print IR after streamlining with before/after stats |
| `dump_types.cm` | Print streamlined IR with type annotations |
## Test Files
| File | Tests |
@@ -122,3 +123,5 @@ Generates QBE IL that can be compiled to native code.
| `mcode_test.ce` | Typed load/store, decomposed calls |
| `streamline_test.ce` | Optimization counts, IR before/after |
| `qbe_test.ce` | End-to-end QBE IL generation |
| `test_intrinsics.cm` | Inlined intrinsic opcodes (is_array, length, push, etc.) |
| `test_backward.cm` | Backward type propagation for parameters |

361
docs/spec/streamline.md Normal file
View File

@@ -0,0 +1,361 @@
---
title: "Streamline Optimizer"
description: "Mcode IR optimization passes"
---
## Overview
The streamline optimizer (`streamline.cm`) runs a series of independent passes over the Mcode IR to eliminate redundant operations. Each pass is a standalone function that can be enabled, disabled, or reordered. Passes communicate only through the instruction array they mutate in place, replacing eliminated instructions with nop strings (e.g., `_nop_tc_1`).
The optimizer runs after `mcode.cm` generates the IR and before the result is lowered to the Mach VM or emitted as QBE IL.
```
Fold (AST) → Mcode (JSON IR) → Streamline → Mach VM / QBE
```
## Type Lattice
The optimizer tracks a type for each slot in the register file:
| Type | Meaning |
|------|---------|
| `unknown` | No type information |
| `int` | Integer |
| `float` | Floating-point |
| `num` | Number (subsumes int and float) |
| `text` | String |
| `bool` | Logical (true/false) |
| `null` | Null value |
| `array` | Array |
| `record` | Record (object) |
| `function` | Function |
| `blob` | Binary blob |
Subsumption: `int` and `float` both satisfy a `num` check.
## Passes
### 1. infer_param_types (backward type inference)
Scans typed operators and generic arithmetic to determine what types their operands must be. For example, `subtract dest, a, b` implies both `a` and `b` are numbers.
When a parameter slot (1..nr_args) is consistently inferred as a single type, that type is recorded. Since parameters are immutable (`def`), the inferred type holds for the entire function and persists across label join points (loop headers, branch targets).
Backward inference rules:
| Operator class | Operand type inferred |
|---|---|
| `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate` | T_NUM |
| `eq_int`, `ne_int`, `lt_int`, `gt_int`, `le_int`, `ge_int`, bitwise ops | T_INT |
| `eq_float`, `ne_float`, `lt_float`, `gt_float`, `le_float`, `ge_float` | T_FLOAT |
| `concat`, text comparisons | T_TEXT |
| `eq_bool`, `ne_bool`, `not`, `and`, `or` | T_BOOL |
| `store_index` (object operand) | T_ARRAY |
| `store_index` (index operand) | T_INT |
| `store_field` (object operand) | T_RECORD |
| `push` (array operand) | T_ARRAY |
| `load_index` (object operand) | T_ARRAY |
| `load_index` (index operand) | T_INT |
| `load_field` (object operand) | T_RECORD |
| `pop` (array operand) | T_ARRAY |
Note: `add` is excluded from backward inference because it is polymorphic — it handles both numeric addition and text concatenation. Only operators that are unambiguously numeric can infer T_NUM.
When a slot appears with conflicting type inferences, the result is `unknown`. INT + FLOAT conflicts produce `num`.
**Nop prefix:** none (analysis only, does not modify instructions)
### 2. infer_slot_write_types (slot write-type invariance)
Scans all instructions to determine which non-parameter slots have a consistent write type. If every instruction that writes to a given slot produces the same type, that type is globally invariant and can safely persist across label join points.
This analysis is sound because:
- `alloc_slot()` in mcode.cm is monotonically increasing — temp slots are never reused
- All local variable declarations must be at function body level and initialized — slots are written before any backward jumps to loop headers
- `move` is conservatively treated as T_UNKNOWN, avoiding unsound transitive assumptions
Write type mapping:
| Instruction class | Write type |
|---|---|
| `int` | T_INT |
| `true`, `false` | T_BOOL |
| `null` | T_NULL |
| `access` | type of literal value |
| `array` | T_ARRAY |
| `record` | T_RECORD |
| `function` | T_FUNCTION |
| `length` | T_INT |
| bitwise ops | T_INT |
| `concat` | T_TEXT |
| bool ops, comparisons, `in` | T_BOOL |
| generic arithmetic (`add`, `subtract`, `negate`, etc.) | T_UNKNOWN |
| `move`, `load_field`, `load_index`, `load_dynamic`, `pop`, `get` | T_UNKNOWN |
| `invoke`, `tail_invoke` | T_UNKNOWN |
The result is a map of slot→type for slots where all writes agree on a single known type. Parameter slots (1..nr_args) and slot 0 are excluded.
Common patterns this enables:
- **Length variables** (`var len = length(arr)`): written by `length` (T_INT) only → invariant T_INT
- **Boolean flags** (`var found = false; ... found = true`): written by `false` and `true` → invariant T_BOOL
- **Locally-created containers** (`var arr = []`): written by `array` only → invariant T_ARRAY
Note: Loop counters (`var i = 0; i = i + 1`) are NOT invariant because `add` produces T_UNKNOWN. However, if `i` is a function parameter used in arithmetic, backward inference from `subtract`/`multiply`/etc. will infer T_NUM for it, which persists across labels.
**Nop prefix:** none (analysis only, does not modify instructions)
### 3. eliminate_type_checks (type-check + jump elimination)
Forward pass that tracks the known type of each slot. When a type check (`is_int`, `is_text`, `is_num`, etc.) is followed by a conditional jump, and the slot's type is already known, the check and jump can be eliminated or converted to an unconditional jump.
Three cases:
- **Known match** (e.g., `is_int` on a slot known to be `int`): both the check and the conditional jump are eliminated (nop'd).
- **Known mismatch** (e.g., `is_text` on a slot known to be `int`): the check is nop'd and the conditional jump is rewritten to an unconditional `jump`.
- **Unknown**: the check remains, but on fallthrough, the slot's type is narrowed to the checked type (enabling downstream eliminations).
This pass also reduces `load_dynamic`/`store_dynamic` to `load_field`/`store_field` or `load_index`/`store_index` when the key slot's type is known.
At label join points, all type information is reset except for parameter types from backward inference and write-invariant types from slot write-type analysis.
**Nop prefix:** `_nop_tc_`
### 4. simplify_algebra (same-slot comparison folding)
Tracks known constant values. Folds same-slot comparisons:
| Pattern | Rewrite |
|---------|---------|
| `eq_* dest, x, x` | `true dest` |
| `le_* dest, x, x` | `true dest` |
| `ge_* dest, x, x` | `true dest` |
| `is_identical dest, x, x` | `true dest` |
| `ne_* dest, x, x` | `false dest` |
| `lt_* dest, x, x` | `false dest` |
| `gt_* dest, x, x` | `false dest` |
**Nop prefix:** none (rewrites in place, does not create nops)
### 5. simplify_booleans (not + jump fusion)
Peephole pass that eliminates unnecessary `not` instructions:
| Pattern | Rewrite |
|---------|---------|
| `not d, x; jump_false d, L` | nop; `jump_true x, L` |
| `not d, x; jump_true d, L` | nop; `jump_false x, L` |
| `not d1, x; not d2, d1` | nop; `move d2, x` |
This is particularly effective on `if (!cond)` patterns, which the compiler generates as `not; jump_false`. After this pass, they become a single `jump_true`.
**Nop prefix:** `_nop_bl_`
### 6. eliminate_moves (self-move elimination)
Removes `move a, a` instructions where the source and destination are the same slot. These can arise from earlier passes rewriting binary operations into moves.
**Nop prefix:** `_nop_mv_`
### 7. eliminate_unreachable (dead code after return)
Nops instructions after `return` until the next real label. Only `return` is treated as a terminal instruction; `disrupt` is not, because the disruption handler code immediately follows `disrupt` and must remain reachable.
The mcode compiler emits a label at disruption handler entry points (see `emit_label(gen_label("disruption"))` in mcode.cm), which provides the label boundary that stops this pass from eliminating handler code.
**Nop prefix:** `_nop_ur_`
### 8. eliminate_dead_jumps (jump-to-next-label elimination)
Removes `jump L` instructions where `L` is the immediately following label (skipping over any intervening nop strings). These are common after other passes eliminate conditional branches, leaving behind jumps that fall through naturally.
**Nop prefix:** `_nop_dj_`
## Pass Composition
All passes run in sequence in `optimize_function`:
```
infer_param_types → returns param_types map
infer_slot_write_types → returns write_types map
eliminate_type_checks → uses param_types + write_types
simplify_algebra
simplify_booleans
eliminate_moves
eliminate_unreachable
eliminate_dead_jumps
```
Each pass is independent and can be commented out for testing or benchmarking.
## Intrinsic Inlining
Before streamlining, `mcode.cm` recognizes calls to built-in intrinsic functions and emits direct opcodes instead of the generic frame/setarg/invoke call sequence. This reduces a 6-instruction call pattern to a single instruction:
| Call | Emitted opcode |
|------|---------------|
| `is_array(x)` | `is_array dest, src` |
| `is_function(x)` | `is_func dest, src` |
| `is_object(x)` | `is_record dest, src` |
| `is_stone(x)` | `is_stone dest, src` |
| `is_integer(x)` | `is_int dest, src` |
| `is_text(x)` | `is_text dest, src` |
| `is_number(x)` | `is_num dest, src` |
| `is_logical(x)` | `is_bool dest, src` |
| `is_null(x)` | `is_null dest, src` |
| `length(x)` | `length dest, src` |
| `push(arr, val)` | `push arr, val` |
These inlined opcodes have corresponding Mach VM implementations in `mach.c`.
## Unified Arithmetic
Arithmetic operations use generic opcodes: `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate`. There are no type-dispatched variants (e.g., no `add_int`/`add_float`).
The Mach VM dispatches at runtime with an int-first fast path via `reg_vm_binop()`: it checks `JS_VALUE_IS_BOTH_INT` first for fast integer arithmetic, then falls back to float conversion, text concatenation (for `add` only), or type error.
Bitwise operations (`shl`, `shr`, `ushr`, `bitand`, `bitor`, `bitxor`, `bitnot`) remain integer-only and disrupt if operands are not integers.
The QBE/native backend maps generic arithmetic to helper calls (`qbe.add`, `qbe.sub`, etc.). The vision for the native path is that with sufficient type inference, the backend can unbox proven-numeric values to raw registers, operate directly, and only rebox at boundaries (returns, calls, stores).
## Debugging Tools
Three dump tools inspect the IR at different stages:
- **`dump_mcode.cm`** — prints the raw Mcode IR after `mcode.cm`, before streamlining
- **`dump_stream.cm`** — prints the IR after streamlining, with before/after instruction counts
- **`dump_types.cm`** — prints the streamlined IR with type annotations on each instruction
Usage:
```
./cell --core . dump_mcode.cm <file.ce|file.cm>
./cell --core . dump_stream.cm <file.ce|file.cm>
./cell --core . dump_types.cm <file.ce|file.cm>
```
## Tail Call Marking
When a function's return expression is a call (`stmt.tail == true` from the parser) and the function has no disruption handler, mcode.cm renames the final `invoke` instruction to `tail_invoke`. This is semantically identical to `invoke` in the current Mach VM, but marks the call site for future tail call optimization.
The disruption handler restriction exists because TCO would discard the current frame, but the handler must remain on the stack to catch disruptions from the callee.
`tail_invoke` is handled by the same passes as `invoke` in streamline (type tracking, algebraic simplification) and executes identically in the VM.
## Type Propagation Architecture
Type information flows through three compilation stages, each building on the previous:
### Stage 1: Parse-time type tags (parse.cm)
The parser assigns `type_tag` strings to scope variable entries when the type is syntactically obvious:
- **From initializers**: `def a = []``type_tag: "array"`, `def n = 42``type_tag: "integer"`, `def r = {}``type_tag: "record"`
- **From usage patterns** (def only): `def x = null; x[] = v` infers `type_tag: "array"` from the push. `def x = null; x.foo = v` infers `type_tag: "record"` from property access.
- **Type error detection** (def only): When a `def` variable has a known type_tag, provably wrong operations are compile errors:
- Property access (`.`) on array
- Push (`[]`) on non-array
- Text key on array
- Integer key on record
Only `def` (constant) variables participate in type inference and error detection. `var` variables can be reassigned, making their initializer type unreliable.
### Stage 2: Fold-time type propagation (fold.cm)
The fold pass extends type information through the AST:
- **Intrinsic folding**: `is_array(known_array)` folds to `true`. `length(known_array)` gets `hint: "array_length"`.
- **Purity analysis**: Expressions involving only `is_*` intrinsic calls with pure arguments are considered pure. This enables dead code elimination for unused `var`/`def` bindings with pure initializers, and elimination of standalone pure call statements.
- **Dead code**: Unused pure `var`/`def` declarations are removed. Standalone calls to pure intrinsics (where the result is discarded) are removed. Unreachable branches with constant conditions are removed.
The `pure_intrinsics` set currently contains only `is_*` sensory functions (`is_array`, `is_text`, `is_number`, `is_integer`, `is_function`, `is_logical`, `is_null`, `is_object`, `is_stone`). Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types, so they are excluded — removing a call that would disrupt changes observable behavior.
### Stage 3: Streamline-time type tracking (streamline.cm)
The streamline optimizer uses a numeric type lattice (`T_INT`, `T_FLOAT`, `T_TEXT`, etc.) for fine-grained per-instruction tracking:
- **Backward inference** (pass 1): Scans typed operators to infer parameter types. Since parameters are `def` (immutable), inferred types persist across label boundaries.
- **Write-type invariance** (pass 2): Scans all instructions to find local slots where every write produces the same type. These invariant types persist across label boundaries alongside parameter types.
- **Forward tracking** (pass 3): `track_types` follows instruction execution order, tracking the type of each slot. Known-type operations set their destination type (e.g., `concat` → T_TEXT, `length` → T_INT). Generic arithmetic produces T_UNKNOWN. Type checks on unknown slots narrow the type on fallthrough.
- **Type check elimination** (pass 3): When a slot's type is already known, `is_<type>` + conditional jump pairs are eliminated or converted to unconditional jumps.
- **Dynamic access narrowing** (pass 3): `load_dynamic`/`store_dynamic` are narrowed to `load_field`/`store_field` or `load_index`/`store_index` when the key type is known.
Type information resets at label join points (since control flow merges could bring different types), except for parameter types from backward inference and write-invariant types from slot write-type analysis.
## Future Work
### Copy Propagation
A basic-block-local copy propagation pass would replace uses of a copied variable with its source, enabling further move elimination. An implementation was attempted but encountered an unsolved bug where 2-position instruction operand replacement produces incorrect code during self-hosting (the replacement logic for 3-position instructions works correctly). The root cause is not yet understood. See the project memory files for detailed notes.
### Expanded Purity Analysis
The current purity set is conservative (only `is_*`). It could be expanded by:
- **Argument-type-aware purity**: If all arguments to an intrinsic are known to be the correct types (via type_tag or slot_types), the call cannot disrupt and is safe to eliminate. For example, `length(known_array)` is pure but `length(unknown)` is not.
- **User function purity**: Analyze user-defined function bodies during pre_scan. A function is pure if its body contains only pure expressions and calls to known-pure functions. This requires fixpoint iteration for mutual recursion.
- **Callback-aware purity**: Intrinsics like `filter`, `find`, `reduce`, `some`, `every` are pure if their callback argument is pure.
### Forward Type Narrowing from Typed Operations
With unified arithmetic (generic `add`/`subtract`/`multiply`/`divide`/`modulo`/`negate` instead of typed variants), this approach is no longer applicable. Typed comparisons (`eq_int`, `lt_float`, etc.) still exist and their operands have known types, but these are already handled by backward inference.
### Guard Hoisting for Parameters
When a type check on a parameter passes (falls through), the parameter's type could be promoted to `param_types` so it persists across label boundaries. This would allow the first type check on a parameter to prove its type for the entire function. However, this is unsound for polymorphic parameters — if a function is called with different argument types, the first check would wrongly eliminate checks for subsequent types.
A safe version would require proving that a parameter is monomorphic (called with only one type across all call sites), which requires interprocedural analysis.
**Note:** For local variables (non-parameters), the write-type invariance analysis (pass 2) achieves a similar effect safely — if every write to a slot produces the same type, that type persists across labels without needing to hoist any guard.
### Tail Call Optimization
`tail_invoke` instructions are currently marked but execute identically to `invoke`. Actual TCO would reuse the current call frame instead of creating a new one. This requires:
- Ensuring argument count matches (or the frame can be resized)
- No live locals needed after the call (guaranteed by tail position)
- No disruption handler on the current function (already enforced by the marking)
- VM support in mach.c to rewrite the frame in place
### Interprocedural Type Inference
Currently all type inference is intraprocedural (within a single function). Cross-function analysis could:
- Infer return types from function bodies
- Propagate argument types from call sites to callees
- Specialize functions for known argument types (cloning)
### Strength Reduction
Common patterns that could be lowered to cheaper operations when operand types are known:
- `multiply x, 2` with proven-int operands → shift left
- `divide x, 2` with proven-int → arithmetic shift right
- `modulo x, power_of_2` with proven-int → bitwise and
### Numeric Unboxing (QBE/native path)
With unified arithmetic and backward type inference, the native backend can identify regions where numeric values remain in registers without boxing/unboxing:
1. **Guard once**: When backward inference proves a parameter is T_NUM, emit a single type guard at function entry.
2. **Unbox**: Convert the tagged JSValue to a raw double register.
3. **Operate**: Use native FP/int instructions directly (no function calls, no tag checks).
4. **Rebox**: Convert back to tagged JSValue only at rebox points (function returns, calls, stores to arrays/records).
This requires inserting `unbox`/`rebox` IR annotations (no-ops in the Mach VM, meaningful only to QBE).
### Loop-Invariant Code Motion
Type checks that are invariant across loop iterations (checking a variable that doesn't change in the loop body) could be hoisted above the loop. This would require identifying loop boundaries and proving invariance.
### Algebraic Identity Optimization
With unified arithmetic, algebraic identities (x+0→x, x*1→x, x*0→0, x/1→x) require knowing operand values at compile time. Since generic `add`/`multiply` operate on any numeric type, the constant-tracking logic in `simplify_algebra` could be extended to handle these for known-constant slots.
## Nop Convention
Eliminated instructions are replaced with strings matching `_nop_<prefix>_<counter>`. The prefix identifies which pass created the nop. Nop strings are:
- Skipped during interpretation (the VM ignores them)
- Skipped during QBE emission
- Not counted in instruction statistics
- Preserved in the instruction array to maintain positional stability for jump targets

170
docs/testing.md Normal file
View File

@@ -0,0 +1,170 @@
---
title: "Testing"
description: "Writing and running tests in ƿit"
weight: 45
type: "docs"
---
ƿit has built-in support for writing and running tests. Tests live in the `tests/` directory of a package and are `.cm` modules that return a record of test functions.
## Writing Tests
A test file returns a record where each key starting with `test_` is a test function. A test passes if it returns `null` (or nothing). It fails if it returns a text string describing the failure.
```javascript
// tests/math.cm
return {
test_addition: function() {
if (1 + 2 != 3) return "expected 3"
},
test_division: function() {
if (10 / 3 != 3.333333333333333333) return "unexpected result"
}
}
```
Test functions take no arguments. Use early returns with a failure message to report errors:
```javascript
test_array_push: function() {
var a = [1, 2]
a[] = 3
if (length(a) != 3) return "expected length 3, got " + text(length(a))
if (a[2] != 3) return "expected a[2] to be 3"
}
```
## Running Tests
```bash
pit test # run all tests in current package
pit test suite # run a specific test file (tests/suite.cm)
pit test tests/math # same, with explicit path
pit test all # run all tests in current package
pit test package <name> # run all tests in a named package
pit test package <name> <test> # run a specific test in a named package
pit test package all # run tests from all installed packages
```
### Flags
```bash
pit test suite -g # run GC after each test (useful for detecting leaks)
pit test suite --verify # enable IR verification during compilation
pit test suite --diff # run each test optimized and unoptimized, compare results
```
`--verify` and `--diff` can be combined:
```bash
pit test suite --verify --diff
```
## IR Verification
The `--verify` flag enables structural validation of the compiler's intermediate representation after each optimizer pass. This catches bugs like invalid slot references, broken jump targets, and malformed instructions.
When verification fails, errors are printed with the pass name that introduced them:
```
[verify_ir] slot_bounds: slot 12 out of range 0..9 in instruction add_int
[verify_ir] 1 errors after dead_code_elimination
```
IR verification adds overhead and is intended for development, not production use.
## Differential Testing
Differential testing runs each test through two paths — with the optimizer enabled and with it disabled — and compares results. Any mismatch between the two indicates an optimizer bug.
### Inline Mode
The `--diff` flag on `pit test` runs each test module through both paths during a normal test run:
```bash
pit test suite --diff
```
Output includes a mismatch count at the end:
```
Tests: 493 passed, 0 failed, 493 total
Diff mismatches: 0
```
### Standalone Mode
`pit diff` is a dedicated differential testing tool with detailed mismatch reporting:
```bash
pit diff # diff all test files in current package
pit diff suite # diff a specific test file
pit diff tests/math # same, with explicit path
```
For each test function, it reports whether the optimized and unoptimized results match:
```
tests/suite.cm: 493 passed, 0 failed
----------------------------------------
Diff: 493 passed, 0 failed, 493 total
```
When a mismatch is found:
```
tests/suite.cm: 492 passed, 1 failed
MISMATCH: test_foo: result mismatch opt=42 noopt=43
```
## Fuzz Testing
The fuzzer generates random self-checking programs, compiles them, and runs them through both optimized and unoptimized paths. Each generated program contains test functions that validate their own expected results, so failures catch both correctness bugs and optimizer mismatches.
```bash
pit fuzz # 100 iterations, random seed
pit fuzz 500 # 500 iterations, random seed
pit fuzz --seed 42 # 100 iterations, deterministic seed
pit fuzz 1000 --seed 42 # 1000 iterations, deterministic seed
```
The fuzzer generates programs that exercise:
- Integer and float arithmetic with known expected results
- Control flow (if/else, while loops)
- Closures and captured variable mutation
- Records and property access
- Arrays and iteration
- Higher-order functions
- Disruption handling
- Text concatenation
On failure, the generated source is saved to `tests/fuzz_failures/` for reproduction:
```
Fuzzing: 1000 iterations, starting seed=42
FAIL seed=57: diff fuzz_3: opt=10 noopt=11
saved to tests/fuzz_failures/seed_57.cm
----------------------------------------
Fuzz: 999 passed, 1 failed, 1000 total
Failures saved to tests/fuzz_failures/
```
Saved failure files are valid `.cm` modules that can be run directly or added to the test suite.
## Test File Organization
Tests live in the `tests/` directory of a package:
```
mypackage/
├── pit.toml
├── math.cm
└── tests/
├── suite.cm # main test suite
├── math.cm # math-specific tests
└── disrupt.cm # disruption tests
```
All `.cm` files under `tests/` are discovered automatically by `pit test`.

16
dump_ast.cm Normal file
View File

@@ -0,0 +1,16 @@
// dump_ast.cm — pretty-print the folded AST as JSON
//
// Usage: ./cell --core . dump_ast.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
print(json.encode(folded))

View File

@@ -1,20 +1,117 @@
// dump_mcode.cm — pretty-print mcode IR (before streamlining)
//
// Usage: ./cell --core . dump_mcode.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var name = args[0]
var src = text(fd.slurp(name))
var tok = tokenize(src, name)
var ast = parse(tok.tokens, src, name, tokenize)
if (length(args) < 1) {
print("usage: cell --core . dump_mcode.cm <file>")
return
}
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
var out = json.encode(optimized)
var f = fd.open("/tmp/mcode_dump.json", "w")
fd.write(f, out)
fd.close(f)
print("wrote /tmp/mcode_dump.json")
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_object(v)) {
return json.encode(v)
}
if (is_logical(v)) {
return v ? "true" : "false"
}
return text(v)
}
var dump_function = function(func, name) {
var nr_args = func.nr_args != null ? func.nr_args : 0
var nr_slots = func.nr_slots != null ? func.nr_slots : 0
var nr_close = func.nr_close_slots != null ? func.nr_close_slots : 0
var instrs = func.instructions
var i = 0
var pc = 0
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var operands = null
var pc_str = null
var op_str = null
print(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}, closures=${text(nr_close)}) ===`)
if (instrs == null || length(instrs) == 0) {
print(" (empty)")
return null
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (!starts_with(instr, "_nop_")) {
print(`${instr}:`)
}
} else if (is_array(instr)) {
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
pc_str = pad_right(text(pc), 5)
op_str = pad_right(op, 14)
print(` ${pc_str} ${op_str} ${operands}`)
pc = pc + 1
}
i = i + 1
}
return null
}
var main_name = null
var fi = 0
var func = null
var fname = null
// Dump main
if (compiled.main != null) {
main_name = compiled.name != null ? compiled.name : "<main>"
dump_function(compiled.main, main_name)
}
// Dump sub-functions
if (compiled.functions != null) {
fi = 0
while (fi < length(compiled.functions)) {
func = compiled.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
dump_function(func, `[${text(fi)}] ${fname}`)
fi = fi + 1
}
}

166
dump_stream.cm Normal file
View File

@@ -0,0 +1,166 @@
// dump_stream.cm — show mcode IR before and after streamlining
//
// Usage: ./cell --core . dump_stream.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
if (length(args) < 1) {
print("usage: cell --core . dump_stream.cm <file>")
return
}
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
// Deep copy IR for before snapshot
var before = json.decode(json.encode(compiled))
var optimized = streamline(compiled)
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_object(v)) {
return json.encode(v)
}
if (is_logical(v)) {
return v ? "true" : "false"
}
return text(v)
}
var count_stats = function(func) {
var instrs = func.instructions
var total = 0
var nops = 0
var calls = 0
var i = 0
var instr = null
if (instrs == null) {
return {total: 0, nops: 0, real: 0, calls: 0}
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
nops = nops + 1
}
} else if (is_array(instr)) {
total = total + 1
if (instr[0] == "invoke") {
calls = calls + 1
}
}
i = i + 1
}
return {total: total, nops: nops, real: total - nops, calls: calls}
}
var dump_function = function(func, show_nops) {
var instrs = func.instructions
var i = 0
var pc = 0
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var operands = null
var pc_str = null
var op_str = null
if (instrs == null || length(instrs) == 0) {
return null
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
if (show_nops) {
print(` ${pad_right(text(pc), 5)} --- nop ---`)
pc = pc + 1
}
} else {
print(`${instr}:`)
}
} else if (is_array(instr)) {
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
pc_str = pad_right(text(pc), 5)
op_str = pad_right(op, 14)
print(` ${pc_str} ${op_str} ${operands}`)
pc = pc + 1
}
i = i + 1
}
return null
}
var dump_pair = function(before_func, after_func, name) {
var nr_args = after_func.nr_args != null ? after_func.nr_args : 0
var nr_slots = after_func.nr_slots != null ? after_func.nr_slots : 0
var b_stats = count_stats(before_func)
var a_stats = count_stats(after_func)
var eliminated = a_stats.nops
print(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
print(` before: ${text(b_stats.total)} instructions, ${text(b_stats.calls)} invokes`)
print(` after: ${text(a_stats.real)} instructions (${text(eliminated)} eliminated), ${text(a_stats.calls)} invokes`)
print("\n -- streamlined --")
dump_function(after_func, false)
return null
}
var main_name = null
var fi = 0
var func = null
var bfunc = null
var fname = null
// Dump main
if (optimized.main != null && before.main != null) {
main_name = optimized.name != null ? optimized.name : "<main>"
dump_pair(before.main, optimized.main, main_name)
}
// Dump sub-functions
if (optimized.functions != null && before.functions != null) {
fi = 0
while (fi < length(optimized.functions)) {
func = optimized.functions[fi]
bfunc = before.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
dump_pair(bfunc, func, `[${text(fi)}] ${fname}`)
fi = fi + 1
}
}

237
dump_types.cm Normal file
View File

@@ -0,0 +1,237 @@
// dump_types.cm — show streamlined IR with type annotations
//
// Usage: ./cell --core . dump_types.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
if (length(args) < 1) {
print("usage: cell --core . dump_types.cm <file>")
return
}
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
// Type constants
def T_UNKNOWN = "unknown"
def T_INT = "int"
def T_FLOAT = "float"
def T_NUM = "num"
def T_TEXT = "text"
def T_BOOL = "bool"
def T_NULL = "null"
def T_ARRAY = "array"
def T_RECORD = "record"
def T_FUNCTION = "function"
def int_result_ops = {
bitnot: true, bitand: true, bitor: true,
bitxor: true, shl: true, shr: true, ushr: true
}
def bool_result_ops = {
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
le_int: true, ge_int: true,
eq_float: true, ne_float: true, lt_float: true, gt_float: true,
le_float: true, ge_float: true,
eq_text: true, ne_text: true, lt_text: true, gt_text: true,
le_text: true, ge_text: true,
eq_bool: true, ne_bool: true,
not: true, and: true, or: true,
is_int: true, is_text: true, is_num: true,
is_bool: true, is_null: true, is_identical: true,
is_array: true, is_func: true, is_record: true, is_stone: true
}
var access_value_type = function(val) {
if (is_number(val)) {
return is_integer(val) ? T_INT : T_FLOAT
}
if (is_text(val)) {
return T_TEXT
}
return T_UNKNOWN
}
var track_types = function(slot_types, instr) {
var op = instr[0]
var src_type = null
if (op == "access") {
slot_types[text(instr[1])] = access_value_type(instr[2])
} else if (op == "int") {
slot_types[text(instr[1])] = T_INT
} else if (op == "true" || op == "false") {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "null") {
slot_types[text(instr[1])] = T_NULL
} else if (op == "move") {
src_type = slot_types[text(instr[2])]
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
} else if (int_result_ops[op] == true) {
slot_types[text(instr[1])] = T_INT
} else if (op == "concat") {
slot_types[text(instr[1])] = T_TEXT
} else if (bool_result_ops[op] == true) {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "typeof") {
slot_types[text(instr[1])] = T_TEXT
} else if (op == "array") {
slot_types[text(instr[1])] = T_ARRAY
} else if (op == "record") {
slot_types[text(instr[1])] = T_RECORD
} else if (op == "function") {
slot_types[text(instr[1])] = T_FUNCTION
} else if (op == "invoke" || op == "tail_invoke") {
slot_types[text(instr[2])] = T_UNKNOWN
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "pop" || op == "get") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "length") {
slot_types[text(instr[1])] = T_INT
} else if (op == "add" || op == "subtract" || op == "multiply" ||
op == "divide" || op == "modulo" || op == "pow" || op == "negate") {
slot_types[text(instr[1])] = T_UNKNOWN
}
return null
}
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_object(v)) {
return json.encode(v)
}
if (is_logical(v)) {
return v ? "true" : "false"
}
return text(v)
}
// Build type annotation string for an instruction
var type_annotation = function(slot_types, instr) {
var n = length(instr)
var parts = []
var j = 1
var v = null
var t = null
while (j < n - 2) {
v = instr[j]
if (is_number(v)) {
t = slot_types[text(v)]
if (t != null && t != T_UNKNOWN) {
push(parts, `s${text(v)}:${t}`)
}
}
j = j + 1
}
if (length(parts) == 0) {
return ""
}
return text(parts, " ")
}
var dump_function_typed = function(func, name) {
var nr_args = func.nr_args != null ? func.nr_args : 0
var nr_slots = func.nr_slots != null ? func.nr_slots : 0
var instrs = func.instructions
var slot_types = {}
var i = 0
var pc = 0
var instr = null
var op = null
var n = 0
var annotation = null
var operand_parts = null
var j = 0
var operands = null
var pc_str = null
var op_str = null
var line = null
print(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
if (instrs == null || length(instrs) == 0) {
print(" (empty)")
return null
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
i = i + 1
continue
}
slot_types = {}
print(`${instr}:`)
} else if (is_array(instr)) {
op = instr[0]
n = length(instr)
annotation = type_annotation(slot_types, instr)
operand_parts = []
j = 1
while (j < n - 2) {
push(operand_parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(operand_parts, ", ")
pc_str = pad_right(text(pc), 5)
op_str = pad_right(op, 14)
line = pad_right(` ${pc_str} ${op_str} ${operands}`, 50)
if (length(annotation) > 0) {
print(`${line} ; ${annotation}`)
} else {
print(line)
}
track_types(slot_types, instr)
pc = pc + 1
}
i = i + 1
}
return null
}
var main_name = null
var fi = 0
var func = null
var fname = null
// Dump main
if (optimized.main != null) {
main_name = optimized.name != null ? optimized.name : "<main>"
dump_function_typed(optimized.main, main_name)
}
// Dump sub-functions
if (optimized.functions != null) {
fi = 0
while (fi < length(optimized.functions)) {
func = optimized.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
dump_function_typed(func, `[${text(fi)}] ${fname}`)
fi = fi + 1
}
}

153
fd.c
View File

@@ -412,117 +412,117 @@ JSC_CCALL(fd_close,
JSC_CCALL(fd_fstat,
int fd = js2fd(js, argv[0]);
if (fd < 0) return JS_EXCEPTION;
struct stat st;
if (fstat(fd, &st) != 0)
return JS_ThrowInternalError(js, "fstat failed: %s", strerror(errno));
JSValue obj = JS_NewObject(js);
JS_SetPropertyStr(js, obj, "size", JS_NewInt64(js, st.st_size));
JS_SetPropertyStr(js, obj, "mode", JS_NewInt32(js, st.st_mode));
JS_SetPropertyStr(js, obj, "uid", JS_NewInt32(js, st.st_uid));
JS_SetPropertyStr(js, obj, "gid", JS_NewInt32(js, st.st_gid));
JS_SetPropertyStr(js, obj, "atime", JS_NewInt64(js, st.st_atime));
JS_SetPropertyStr(js, obj, "mtime", JS_NewInt64(js, st.st_mtime));
JS_SetPropertyStr(js, obj, "ctime", JS_NewInt64(js, st.st_ctime));
JS_SetPropertyStr(js, obj, "nlink", JS_NewInt32(js, st.st_nlink));
JS_SetPropertyStr(js, obj, "ino", JS_NewInt64(js, st.st_ino));
JS_SetPropertyStr(js, obj, "dev", JS_NewInt32(js, st.st_dev));
JS_SetPropertyStr(js, obj, "rdev", JS_NewInt32(js, st.st_rdev));
JS_FRAME(js);
JS_ROOT(obj, JS_NewObject(js));
JS_SetPropertyStr(js, obj.val, "size", JS_NewInt64(js, st.st_size));
JS_SetPropertyStr(js, obj.val, "mode", JS_NewInt32(js, st.st_mode));
JS_SetPropertyStr(js, obj.val, "uid", JS_NewInt32(js, st.st_uid));
JS_SetPropertyStr(js, obj.val, "gid", JS_NewInt32(js, st.st_gid));
JS_SetPropertyStr(js, obj.val, "atime", JS_NewInt64(js, st.st_atime));
JS_SetPropertyStr(js, obj.val, "mtime", JS_NewInt64(js, st.st_mtime));
JS_SetPropertyStr(js, obj.val, "ctime", JS_NewInt64(js, st.st_ctime));
JS_SetPropertyStr(js, obj.val, "nlink", JS_NewInt32(js, st.st_nlink));
JS_SetPropertyStr(js, obj.val, "ino", JS_NewInt64(js, st.st_ino));
JS_SetPropertyStr(js, obj.val, "dev", JS_NewInt32(js, st.st_dev));
JS_SetPropertyStr(js, obj.val, "rdev", JS_NewInt32(js, st.st_rdev));
#ifndef _WIN32
JS_SetPropertyStr(js, obj, "blksize", JS_NewInt32(js, st.st_blksize));
JS_SetPropertyStr(js, obj, "blocks", JS_NewInt64(js, st.st_blocks));
JS_SetPropertyStr(js, obj.val, "blksize", JS_NewInt32(js, st.st_blksize));
JS_SetPropertyStr(js, obj.val, "blocks", JS_NewInt64(js, st.st_blocks));
#else
JS_SetPropertyStr(js, obj, "blksize", JS_NewInt32(js, 4096));
JS_SetPropertyStr(js, obj, "blocks", JS_NewInt64(js, st.st_size / 512));
JS_SetPropertyStr(js, obj.val, "blksize", JS_NewInt32(js, 4096));
JS_SetPropertyStr(js, obj.val, "blocks", JS_NewInt64(js, st.st_size / 512));
#endif
// Add boolean properties for file type
JS_SetPropertyStr(js, obj, "isFile", JS_NewBool(js, S_ISREG(st.st_mode)));
JS_SetPropertyStr(js, obj, "isDirectory", JS_NewBool(js, S_ISDIR(st.st_mode)));
JS_SetPropertyStr(js, obj, "isSymlink", JS_NewBool(js, S_ISLNK(st.st_mode)));
JS_SetPropertyStr(js, obj, "isFIFO", JS_NewBool(js, S_ISFIFO(st.st_mode)));
JS_SetPropertyStr(js, obj, "isSocket", JS_NewBool(js, S_ISSOCK(st.st_mode)));
JS_SetPropertyStr(js, obj, "isCharDevice", JS_NewBool(js, S_ISCHR(st.st_mode)));
JS_SetPropertyStr(js, obj, "isBlockDevice", JS_NewBool(js, S_ISBLK(st.st_mode)));
return obj;
JS_SetPropertyStr(js, obj.val, "isFile", JS_NewBool(js, S_ISREG(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isDirectory", JS_NewBool(js, S_ISDIR(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isSymlink", JS_NewBool(js, S_ISLNK(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isFIFO", JS_NewBool(js, S_ISFIFO(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isSocket", JS_NewBool(js, S_ISSOCK(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isCharDevice", JS_NewBool(js, S_ISCHR(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isBlockDevice", JS_NewBool(js, S_ISBLK(st.st_mode)));
JS_RETURN(obj.val);
)
JSC_CCALL(fd_stat,
const char *path = JS_ToCString(js, argv[0]);
if (!path) return JS_EXCEPTION;
struct stat st;
if (stat(path, &st) != 0) {
JS_FreeCString(js, path);
return JS_NewObject(js);
}
JSValue obj = JS_NewObject(js);
JS_SetPropertyStr(js, obj, "size", JS_NewInt64(js, st.st_size));
JS_SetPropertyStr(js, obj, "mode", JS_NewInt32(js, st.st_mode));
JS_SetPropertyStr(js, obj, "uid", JS_NewInt32(js, st.st_uid));
JS_SetPropertyStr(js, obj, "gid", JS_NewInt32(js, st.st_gid));
JS_SetPropertyStr(js, obj, "atime", JS_NewInt64(js, st.st_atime));
JS_SetPropertyStr(js, obj, "mtime", JS_NewInt64(js, st.st_mtime));
JS_SetPropertyStr(js, obj, "ctime", JS_NewInt64(js, st.st_ctime));
JS_SetPropertyStr(js, obj, "nlink", JS_NewInt32(js, st.st_nlink));
JS_SetPropertyStr(js, obj, "ino", JS_NewInt64(js, st.st_ino));
JS_SetPropertyStr(js, obj, "dev", JS_NewInt32(js, st.st_dev));
JS_SetPropertyStr(js, obj, "rdev", JS_NewInt32(js, st.st_rdev));
JS_FRAME(js);
JS_ROOT(obj, JS_NewObject(js));
JS_SetPropertyStr(js, obj.val, "size", JS_NewInt64(js, st.st_size));
JS_SetPropertyStr(js, obj.val, "mode", JS_NewInt32(js, st.st_mode));
JS_SetPropertyStr(js, obj.val, "uid", JS_NewInt32(js, st.st_uid));
JS_SetPropertyStr(js, obj.val, "gid", JS_NewInt32(js, st.st_gid));
JS_SetPropertyStr(js, obj.val, "atime", JS_NewInt64(js, st.st_atime));
JS_SetPropertyStr(js, obj.val, "mtime", JS_NewInt64(js, st.st_mtime));
JS_SetPropertyStr(js, obj.val, "ctime", JS_NewInt64(js, st.st_ctime));
JS_SetPropertyStr(js, obj.val, "nlink", JS_NewInt32(js, st.st_nlink));
JS_SetPropertyStr(js, obj.val, "ino", JS_NewInt64(js, st.st_ino));
JS_SetPropertyStr(js, obj.val, "dev", JS_NewInt32(js, st.st_dev));
JS_SetPropertyStr(js, obj.val, "rdev", JS_NewInt32(js, st.st_rdev));
#ifndef _WIN32
JS_SetPropertyStr(js, obj, "blksize", JS_NewInt32(js, st.st_blksize));
JS_SetPropertyStr(js, obj, "blocks", JS_NewInt64(js, st.st_blocks));
JS_SetPropertyStr(js, obj.val, "blksize", JS_NewInt32(js, st.st_blksize));
JS_SetPropertyStr(js, obj.val, "blocks", JS_NewInt64(js, st.st_blocks));
#else
JS_SetPropertyStr(js, obj, "blksize", JS_NewInt32(js, 4096));
JS_SetPropertyStr(js, obj, "blocks", JS_NewInt64(js, st.st_size / 512));
JS_SetPropertyStr(js, obj.val, "blksize", JS_NewInt32(js, 4096));
JS_SetPropertyStr(js, obj.val, "blocks", JS_NewInt64(js, st.st_size / 512));
#endif
// Add boolean properties for file type
JS_SetPropertyStr(js, obj, "isFile", JS_NewBool(js, S_ISREG(st.st_mode)));
JS_SetPropertyStr(js, obj, "isDirectory", JS_NewBool(js, S_ISDIR(st.st_mode)));
JS_SetPropertyStr(js, obj, "isSymlink", JS_NewBool(js, S_ISLNK(st.st_mode)));
JS_SetPropertyStr(js, obj, "isFIFO", JS_NewBool(js, S_ISFIFO(st.st_mode)));
JS_SetPropertyStr(js, obj, "isSocket", JS_NewBool(js, S_ISSOCK(st.st_mode)));
JS_SetPropertyStr(js, obj, "isCharDevice", JS_NewBool(js, S_ISCHR(st.st_mode)));
JS_SetPropertyStr(js, obj, "isBlockDevice", JS_NewBool(js, S_ISBLK(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isFile", JS_NewBool(js, S_ISREG(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isDirectory", JS_NewBool(js, S_ISDIR(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isSymlink", JS_NewBool(js, S_ISLNK(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isFIFO", JS_NewBool(js, S_ISFIFO(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isSocket", JS_NewBool(js, S_ISSOCK(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isCharDevice", JS_NewBool(js, S_ISCHR(st.st_mode)));
JS_SetPropertyStr(js, obj.val, "isBlockDevice", JS_NewBool(js, S_ISBLK(st.st_mode)));
JS_FreeCString(js, path);
return obj;
JS_RETURN(obj.val);
)
JSC_SCALL(fd_readdir,
JS_FRAME(js);
#ifdef _WIN32
WIN32_FIND_DATA ffd;
char path[PATH_MAX];
snprintf(path, sizeof(path), "%s\\*", str);
HANDLE hFind = FindFirstFile(path, &ffd);
if (hFind == INVALID_HANDLE_VALUE) {
ret = JS_ThrowInternalError(js, "FindFirstFile failed for %s", path);
ret = JS_ThrowInternalError(js, "FindFirstFile failed for %s", path);
} else {
ret = JS_NewArray(js);
do {
if (strcmp(ffd.cFileName, ".") == 0 || strcmp(ffd.cFileName, "..") == 0) continue;
JS_ArrayPush(js, &ret, JS_NewString(js, ffd.cFileName));
} while (FindNextFile(hFind, &ffd) != 0);
FindClose(hFind);
JS_ROOT(arr, JS_NewArray(js));
do {
if (strcmp(ffd.cFileName, ".") == 0 || strcmp(ffd.cFileName, "..") == 0) continue;
JS_ArrayPush(js, &arr.val, JS_NewString(js, ffd.cFileName));
} while (FindNextFile(hFind, &ffd) != 0);
FindClose(hFind);
ret = arr.val;
}
#else
DIR *d;
struct dirent *dir;
d = opendir(str);
if (d) {
ret = JS_NewArray(js);
JS_ROOT(arr, JS_NewArray(js));
while ((dir = readdir(d)) != NULL) {
if (strcmp(dir->d_name, ".") == 0 || strcmp(dir->d_name, "..") == 0) continue;
JS_ArrayPush(js, &ret, JS_NewString(js, dir->d_name));
JS_ArrayPush(js, &arr.val, JS_NewString(js, dir->d_name));
}
closedir(d);
ret = arr.val;
} else {
ret = JS_ThrowInternalError(js, "opendir failed for %s: %s", str, strerror(errno));
}
#endif
JS_RestoreFrame(_js_ctx, _js_gc_frame, _js_local_frame);
)
JSC_CCALL(fd_is_file,
@@ -585,9 +585,9 @@ JSC_CCALL(fd_slurpwrite,
)
// Helper function for recursive enumeration
static void visit_directory(JSContext *js, JSValue results, int *result_count, const char *curr_path, const char *rel_prefix, int recurse) {
static void visit_directory(JSContext *js, JSValue *results, int *result_count, const char *curr_path, const char *rel_prefix, int recurse) {
if (!curr_path) return;
#ifdef _WIN32
WIN32_FIND_DATA ffd;
char search_path[PATH_MAX];
@@ -602,7 +602,7 @@ static void visit_directory(JSContext *js, JSValue results, int *result_count, c
} else {
strcpy(item_rel, ffd.cFileName);
}
JS_SetPropertyNumber(js, results, (*result_count)++, JS_NewString(js, item_rel));
JS_SetPropertyNumber(js, *results, (*result_count)++, JS_NewString(js, item_rel));
if (recurse) {
struct stat st;
@@ -627,7 +627,7 @@ static void visit_directory(JSContext *js, JSValue results, int *result_count, c
} else {
strcpy(item_rel, dir->d_name);
}
JS_SetPropertyNumber(js, results, (*result_count)++, JS_NewString(js, item_rel));
JS_SetPropertyNumber(js, *results, (*result_count)++, JS_NewString(js, item_rel));
if (recurse) {
struct stat st;
@@ -651,14 +651,16 @@ JSC_SCALL(fd_enumerate,
if (argc > 1)
recurse = JS_ToBool(js, argv[1]);
JSValue results = JS_NewArray(js);
JS_FRAME(js);
JS_ROOT(arr, JS_NewArray(js));
int result_count = 0;
struct stat st;
if (stat(path, &st) == 0 && S_ISDIR(st.st_mode))
visit_directory(js, results, &result_count, path, "", recurse);
visit_directory(js, &arr.val, &result_count, path, "", recurse);
ret = results;
ret = arr.val;
JS_RestoreFrame(_js_ctx, _js_gc_frame, _js_local_frame);
)
JSC_CCALL(fd_realpath,
@@ -753,7 +755,8 @@ static const JSCFunctionListEntry js_fd_funcs[] = {
};
JSValue js_fd_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_fd_funcs, countof(js_fd_funcs));
return mod;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_fd_funcs, countof(js_fd_funcs));
JS_RETURN(mod.val);
}

4
fd.cm
View File

@@ -1,4 +1,4 @@
var fd = native
var fd = use('internal/fd_c')
var wildstar = use('wildstar')
function last_pos(str, sep) {
@@ -97,4 +97,4 @@ fd.globfs = function(globs, dir) {
return results
}
return fd
return fd

7
fit.c
View File

@@ -250,7 +250,8 @@ static const JSCFunctionListEntry js_fit_funcs[] = {
JSValue js_fit_use(JSContext *js)
{
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_fit_funcs, countof(js_fit_funcs));
return mod;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_fit_funcs, countof(js_fit_funcs));
JS_RETURN(mod.val);
}

82
fold.cm
View File

@@ -5,6 +5,34 @@ var fold = function(ast) {
var scopes = ast.scopes
var nr_scopes = length(scopes)
var type_tag_map = {
array: "array", record: "record", text: "text",
number: "number", blob: "blob"
}
var binary_ops = {
"+": true, "-": true, "*": true, "/": true, "%": true,
"**": true, "==": true, "!=": true, "<": true, ">": true,
"<=": true, ">=": true, "&": true, "|": true, "^": true,
"<<": true, ">>": true, ">>>": true, "&&": true, "||": true,
",": true, in: true
}
var unary_ops = {
"!": true, "~": true, "-unary": true, "+unary": true, delete: true
}
var assign_ops = {
assign: true, "+=": true, "-=": true, "*=": true,
"/=": true, "%=": true, "<<=": true, ">>=": true,
">>>=": true, "&=": true, "^=": true, "|=": true,
"**=": true, "&&=": true, "||=": true
}
var arith_ops = {
"+": true, "-": true, "*": true, "/": true, "%": true, "**": true
}
var comparison_ops = {
"==": true, "!=": true, "<": true, ">": true, "<=": true, ">=": true
}
// ============================================================
// Helpers
// ============================================================
@@ -15,10 +43,18 @@ var fold = function(ast) {
return k == "number" || k == "text" || k == "true" || k == "false" || k == "null"
}
// Only intrinsics that can NEVER disrupt regardless of argument types
var pure_intrinsics = {
is_array: true, is_text: true, is_number: true, is_integer: true,
is_function: true, is_logical: true, is_null: true, is_object: true,
is_stone: true
}
var is_pure = function(expr) {
if (expr == null) return true
var k = expr.kind
var i = 0
var target = null
if (k == "number" || k == "text" || k == "true" || k == "false" ||
k == "null" || k == "name" || k == "this") return true
if (k == "function") return true
@@ -47,6 +83,17 @@ var fold = function(ast) {
if (k == "==" || k == "!=" || k == "&&" || k == "||") {
return is_pure(expr.left) && is_pure(expr.right)
}
if (k == "(") {
target = expr.expression
if (target != null && target.intrinsic == true && pure_intrinsics[target.name] == true) {
i = 0
while (i < length(expr.list)) {
if (!is_pure(expr.list[i])) return false
i = i + 1
}
return true
}
}
return false
}
@@ -175,11 +222,7 @@ var fold = function(ast) {
if (rhs_target != null && rhs_target.intrinsic == true) {
sv = scope_var(fn_nr, name)
if (sv != null && sv.type_tag == null) {
if (rhs_target.name == "array") sv.type_tag = "array"
else if (rhs_target.name == "record") sv.type_tag = "record"
else if (rhs_target.name == "text") sv.type_tag = "text"
else if (rhs_target.name == "number") sv.type_tag = "number"
else if (rhs_target.name == "blob") sv.type_tag = "blob"
if (type_tag_map[rhs_target.name] != null) sv.type_tag = type_tag_map[rhs_target.name]
}
}
}
@@ -338,17 +381,13 @@ var fold = function(ast) {
var arg = null
// Recurse into children first (bottom-up)
if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" ||
k == "**" || k == "==" || k == "!=" || k == "<" || k == ">" ||
k == "<=" || k == ">=" || k == "&" || k == "|" || k == "^" ||
k == "<<" || k == ">>" || k == ">>>" || k == "&&" || k == "||" ||
k == "," || k == "in") {
if (binary_ops[k] == true) {
expr.left = fold_expr(expr.left, fn_nr)
expr.right = fold_expr(expr.right, fn_nr)
} else if (k == "." || k == "[") {
expr.left = fold_expr(expr.left, fn_nr)
if (k == "[" && expr.right != null) expr.right = fold_expr(expr.right, fn_nr)
} else if (k == "!" || k == "~" || k == "-unary" || k == "+unary" || k == "delete") {
} else if (unary_ops[k] == true) {
expr.expression = fold_expr(expr.expression, fn_nr)
} else if (k == "++" || k == "--") {
return expr
@@ -363,7 +402,7 @@ var fold = function(ast) {
expr.list[i] = fold_expr(expr.list[i], fn_nr)
i = i + 1
}
} else if (k == "array") {
} else if (k == "array" || k == "text literal") {
i = 0
while (i < length(expr.list)) {
expr.list[i] = fold_expr(expr.list[i], fn_nr)
@@ -375,19 +414,10 @@ var fold = function(ast) {
expr.list[i].right = fold_expr(expr.list[i].right, fn_nr)
i = i + 1
}
} else if (k == "text literal") {
i = 0
while (i < length(expr.list)) {
expr.list[i] = fold_expr(expr.list[i], fn_nr)
i = i + 1
}
} else if (k == "function") {
fold_fn(expr)
return expr
} else if (k == "assign" || k == "+=" || k == "-=" || k == "*=" ||
k == "/=" || k == "%=" || k == "<<=" || k == ">>=" ||
k == ">>>=" || k == "&=" || k == "^=" || k == "|=" ||
k == "**=" || k == "&&=" || k == "||=") {
} else if (assign_ops[k] == true) {
expr.right = fold_expr(expr.right, fn_nr)
return expr
}
@@ -409,7 +439,7 @@ var fold = function(ast) {
}
// Binary constant folding
if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" || k == "**") {
if (arith_ops[k] == true) {
left = expr.left
right = expr.right
if (left != null && right != null && left.kind == "number" && right.kind == "number") {
@@ -441,7 +471,7 @@ var fold = function(ast) {
}
// Comparison folding
if (k == "==" || k == "!=" || k == "<" || k == ">" || k == "<=" || k == ">=") {
if (comparison_ops[k] == true) {
left = expr.left
right = expr.right
if (left != null && right != null) {
@@ -676,6 +706,10 @@ var fold = function(ast) {
}
}
}
// Dead pure call elimination: standalone pure calls with no result
if (stmt.kind == "call" && is_pure(stmt.expression)) {
stmt.dead = true
}
// Dead function elimination
if (stmt.kind == "function" && stmt.name != null) {
sv = scope_var(fn_nr, stmt.name)

BIN
fold.mach

Binary file not shown.

278
fuzz.ce Normal file
View File

@@ -0,0 +1,278 @@
// fuzz.ce — fuzzer driver: generates random programs, runs differential, saves failures
//
// Usage:
// cell fuzz - run 100 iterations with a random seed
// cell fuzz 500 - run 500 iterations with a random seed
// cell fuzz --seed 42 - run 100 iterations starting at seed 42
// cell fuzz 500 --seed 42 - run 500 iterations starting at seed 42
//
// Each iteration generates a random self-checking program, compiles it,
// runs it through both optimized and unoptimized paths, and compares results.
// Failures are saved to tests/fuzz_failures/ for reproduction.
var fd = use('fd')
var time = use('time')
var json = use('json')
var os_ref = use('os')
var analyze = os_ref.analyze
var run_ast_fn = os_ref.run_ast_fn
var run_ast_noopt_fn = os_ref.run_ast_noopt_fn
var fuzzgen = use('fuzzgen')
var _args = args == null ? [] : args
// Parse arguments: fuzz [iterations] [--seed N]
var iterations = 100
var start_seed = null
var i = 0
var n = null
var run_err = null
var _run_one = null
while (i < length(_args)) {
if (_args[i] == '--seed' && i + 1 < length(_args)) {
start_seed = number(_args[i + 1])
i = i + 2
} else {
n = number(_args[i])
if (n != null && n > 0) iterations = n
i = i + 1
}
}
if (start_seed == null) {
start_seed = floor(time.number() * 1000) % 1000000
}
if (!run_ast_noopt_fn) {
log.console("error: run_ast_noopt_fn not available (rebuild bootstrap)")
$stop()
return
}
// Ensure failures directory exists
var failures_dir = "tests/fuzz_failures"
function ensure_dir(path) {
if (fd.is_dir(path)) return
var parts = array(path, '/')
var current = ''
var j = 0
while (j < length(parts)) {
if (parts[j] != '') {
current = current + parts[j] + '/'
if (!fd.is_dir(current)) {
fd.mkdir(current)
}
}
j = j + 1
}
}
// Deep comparison
function values_equal(a, b) {
var j = 0
if (a == b) return true
if (is_null(a) && is_null(b)) return true
if (is_null(a) || is_null(b)) return false
if (is_array(a) && is_array(b)) {
if (length(a) != length(b)) return false
j = 0
while (j < length(a)) {
if (!values_equal(a[j], b[j])) return false
j = j + 1
}
return true
}
return false
}
function describe(val) {
if (is_null(val)) return "null"
if (is_text(val)) return `"${val}"`
if (is_number(val)) return text(val)
if (is_logical(val)) return text(val)
if (is_function(val)) return "<function>"
return "<other>"
}
// Run a single fuzz iteration
function run_fuzz(seed_val) {
var src = fuzzgen.generate(seed_val)
var name = "fuzz_" + text(seed_val)
var ast = null
var mod_opt = null
var mod_noopt = null
var opt_err = null
var noopt_err = null
var errors = []
var keys = null
var k = 0
var key = null
var ret = null
var _run = null
var run_err = null
var keys2 = null
var k2 = 0
var key2 = null
var opt_result = null
var noopt_result = null
var opt_fn_err = null
var noopt_fn_err = null
var _run_opt = null
var _run_noopt = null
// Parse
var _parse = function() {
ast = analyze(src, name + ".cm")
} disruption {
push(errors, "parse error")
}
_parse()
if (length(errors) > 0) return {seed: seed_val, errors: errors, src: src}
// Run optimized
var _opt = function() {
mod_opt = run_ast_fn(name, ast, {use: function(p) { return use(p) }})
} disruption {
opt_err = "disrupted"
}
_opt()
// Run unoptimized
var _noopt = function() {
mod_noopt = run_ast_noopt_fn(name + "_noopt", ast, {use: function(p) { return use(p) }})
} disruption {
noopt_err = "disrupted"
}
_noopt()
// Check module-level behavior
if (opt_err != noopt_err) {
push(errors, `module load: opt=${opt_err != null ? opt_err : "ok"} noopt=${noopt_err != null ? noopt_err : "ok"}`)
return {seed: seed_val, errors: errors, src: src}
}
if (opt_err != null) {
// Both failed to load — consistent
return {seed: seed_val, errors: errors, src: src}
}
// Run self-checks (optimized module)
if (is_object(mod_opt)) {
keys = array(mod_opt)
k = 0
while (k < length(keys)) {
key = keys[k]
if (is_function(mod_opt[key])) {
ret = null
run_err = null
_run = function() {
ret = mod_opt[key]()
} disruption {
run_err = "disrupted"
}
_run()
if (is_text(ret)) {
push(errors, `self-check ${key}: ${ret}`)
}
if (run_err != null) {
push(errors, `self-check ${key}: unexpected disruption`)
}
}
k = k + 1
}
}
// Differential check on each function
if (is_object(mod_opt) && is_object(mod_noopt)) {
keys2 = array(mod_opt)
k2 = 0
while (k2 < length(keys2)) {
key2 = keys2[k2]
if (is_function(mod_opt[key2]) && is_function(mod_noopt[key2])) {
opt_result = null
noopt_result = null
opt_fn_err = null
noopt_fn_err = null
_run_opt = function() {
opt_result = mod_opt[key2]()
} disruption {
opt_fn_err = "disrupted"
}
_run_opt()
_run_noopt = function() {
noopt_result = mod_noopt[key2]()
} disruption {
noopt_fn_err = "disrupted"
}
_run_noopt()
if (opt_fn_err != noopt_fn_err) {
push(errors, `diff ${key2}: opt=${opt_fn_err != null ? opt_fn_err : "ok"} noopt=${noopt_fn_err != null ? noopt_fn_err : "ok"}`)
} else if (!values_equal(opt_result, noopt_result)) {
push(errors, `diff ${key2}: opt=${describe(opt_result)} noopt=${describe(noopt_result)}`)
}
}
k2 = k2 + 1
}
}
return {seed: seed_val, errors: errors, src: src}
}
// Main loop
log.console(`Fuzzing: ${text(iterations)} iterations, starting seed=${text(start_seed)}`)
var total_pass = 0
var total_fail = 0
var result = null
var j = 0
var current_seed = 0
var fail_path = null
i = 0
while (i < iterations) {
current_seed = start_seed + i
run_err = null
_run_one = function() {
result = run_fuzz(current_seed)
} disruption {
run_err = "generator crashed"
}
_run_one()
if (run_err != null) {
result = {seed: current_seed, errors: [run_err], src: "// generator crashed"}
}
if (length(result.errors) > 0) {
total_fail = total_fail + 1
log.console(` FAIL seed=${text(current_seed)}: ${result.errors[0]}`)
// Save failure source for reproduction
ensure_dir(failures_dir)
fail_path = failures_dir + "/seed_" + text(current_seed) + ".cm"
fd.slurpwrite(fail_path, stone(blob(result.src)))
log.console(` saved to ${fail_path}`)
} else {
total_pass = total_pass + 1
}
// Progress report every 100 iterations
if ((i + 1) % 100 == 0) {
log.console(` progress: ${text(i + 1)}/${text(iterations)} (${text(total_pass)} passed, ${text(total_fail)} failed)`)
}
i = i + 1
}
log.console(`----------------------------------------`)
log.console(`Fuzz: ${text(total_pass)} passed, ${text(total_fail)} failed, ${text(iterations)} total`)
if (total_fail > 0) {
log.console(`Failures saved to ${failures_dir}/`)
}
$stop()

348
fuzzgen.cm Normal file
View File

@@ -0,0 +1,348 @@
// fuzzgen.cm — generates self-checking .cm programs for fuzz testing
// Each generated program returns a record of test functions that
// validate their own expected results.
// Newline constant — backtick strings don't interpret \n as escape
var NL = "\n"
// Simple seeded PRNG (xorshift32)
var _seed = 1
function seed(s) {
_seed = s != 0 ? s : 1
}
function rand() {
_seed = _seed ^ (_seed << 13)
_seed = _seed ^ (_seed >> 17)
_seed = _seed ^ (_seed << 5)
if (_seed < 0) _seed = -_seed
return _seed
}
function rand_int(lo, hi) {
return lo + (rand() % (hi - lo + 1))
}
function rand_float() {
return rand_int(-10000, 10000) / 100
}
function rand_bool() {
return rand() % 2 == 0
}
function pick(arr) {
return arr[rand() % length(arr)]
}
// Expression generators — each returns {src: "code", val: expected_value}
// depth is decremented to prevent infinite recursion
function gen_int_literal() {
var v = rand_int(-10000, 10000)
return {src: text(v), val: v}
}
function gen_float_literal() {
var v = rand_float()
return {src: text(v), val: v}
}
function gen_bool_literal() {
var v = rand_bool()
var s = "false"
if (v) s = "true"
return {src: s, val: v}
}
function gen_text_literal() {
var words = ["alpha", "beta", "gamma", "delta", "epsilon"]
var w = pick(words)
return {src: `"${w}"`, val: w}
}
function gen_null_literal() {
return {src: "null", val: null}
}
function gen_int_expr(depth) {
var a = null
var b = null
var op = null
var result = null
if (depth <= 0) return gen_int_literal()
a = gen_int_expr(depth - 1)
b = gen_int_expr(depth - 1)
// Avoid division by zero
if (b.val == 0) b = {src: "1", val: 1}
op = pick(["+", "-", "*"])
if (op == "+") {
result = a.val + b.val
} else if (op == "-") {
result = a.val - b.val
} else {
result = a.val * b.val
}
// Guard against overflow beyond safe integer range
if (result > 9007199254740991 || result < -9007199254740991) {
return gen_int_literal()
}
return {src: `(${a.src} ${op} ${b.src})`, val: result}
}
function gen_float_expr(depth) {
var a = null
var b = null
var op = null
var result = null
if (depth <= 0) return gen_float_literal()
a = gen_float_expr(depth - 1)
b = gen_float_expr(depth - 1)
if (b.val == 0) b = {src: "1.0", val: 1.0}
op = pick(["+", "-", "*"])
if (op == "+") {
result = a.val + b.val
} else if (op == "-") {
result = a.val - b.val
} else {
result = a.val * b.val
}
return {src: `(${a.src} ${op} ${b.src})`, val: result}
}
function gen_text_expr(depth) {
var a = null
var b = null
if (depth <= 0) return gen_text_literal()
a = gen_text_literal()
b = gen_text_literal()
return {src: `(${a.src} + ${b.src})`, val: a.val + b.val}
}
function gen_comparison_expr(depth) {
var a = null
var b = null
var op = null
var result = null
a = gen_int_expr(depth > 0 ? depth - 1 : 0)
b = gen_int_expr(depth > 0 ? depth - 1 : 0)
op = pick(["==", "!=", "<", ">", "<=", ">="])
if (op == "==") {
result = a.val == b.val
} else if (op == "!=") {
result = a.val != b.val
} else if (op == "<") {
result = a.val < b.val
} else if (op == ">") {
result = a.val > b.val
} else if (op == "<=") {
result = a.val <= b.val
} else {
result = a.val >= b.val
}
return {src: `(${a.src} ${op} ${b.src})`, val: result}
}
// Generate an if-else expression test
function gen_if_else_test() {
var cond = gen_comparison_expr(1)
var then_val = gen_int_literal()
var else_val = gen_int_literal()
var expected = cond.val ? then_val.val : else_val.val
var body = "var result = null" + NL
body = body + " if (" + cond.src + ") {" + NL
body = body + " result = " + then_val.src + NL
body = body + " } else {" + NL
body = body + " result = " + else_val.src + NL
body = body + " }" + NL
body = body + " if (result != " + text(expected) + ") return \"if_else: expected " + text(expected) + " got \" + text(result)"
return body
}
// Generate a loop accumulator test
function gen_loop_test() {
var count = rand_int(1, 50)
var step = rand_int(1, 10)
var expected = 0
var i = 0
while (i < count) {
expected = expected + step
i = i + 1
}
var body = "var acc = 0" + NL
body = body + " var i = 0" + NL
body = body + " while (i < " + text(count) + ") {" + NL
body = body + " acc = acc + " + text(step) + NL
body = body + " i = i + 1" + NL
body = body + " }" + NL
body = body + " if (acc != " + text(expected) + ") return \"loop: expected " + text(expected) + " got \" + text(acc)"
return body
}
// Generate a closure test
function gen_closure_test() {
var init_val = rand_int(1, 100)
var inc = rand_int(1, 10)
var calls = rand_int(1, 10)
var expected = init_val + (inc * calls)
var body = "var counter = " + text(init_val) + NL
body = body + " var inc = function() { counter = counter + " + text(inc) + " }" + NL
body = body + " var i = 0" + NL
body = body + " while (i < " + text(calls) + ") {" + NL
body = body + " inc()" + NL
body = body + " i = i + 1" + NL
body = body + " }" + NL
body = body + " if (counter != " + text(expected) + ") return \"closure: expected " + text(expected) + " got \" + text(counter)"
return body
}
// Generate a record property test
function gen_record_test() {
var a = gen_int_literal()
var b = gen_int_literal()
var sum = a.val + b.val
var body = "var r = {a: " + a.src + ", b: " + b.src + "}" + NL
body = body + " var result = r.a + r.b" + NL
body = body + " if (result != " + text(sum) + ") return \"record: expected " + text(sum) + " got \" + text(result)"
return body
}
// Generate an array test
function gen_array_test() {
var n = rand_int(2, 10)
var vals = []
var i = 0
var sum = 0
var v = 0
while (i < n) {
v = rand_int(-100, 100)
push(vals, v)
sum = sum + v
i = i + 1
}
var val_strs = []
i = 0
while (i < n) {
push(val_strs, text(vals[i]))
i = i + 1
}
var body = "var a = [" + text(val_strs, ", ") + "]" + NL
body = body + " var _sum = 0" + NL
body = body + " var i = 0" + NL
body = body + " while (i < length(a)) {" + NL
body = body + " _sum = _sum + a[i]" + NL
body = body + " i = i + 1" + NL
body = body + " }" + NL
body = body + " if (_sum != " + text(sum) + ") return \"array_sum: expected " + text(sum) + " got \" + text(_sum)"
return body
}
// Generate a nested function / higher-order test
function gen_higher_order_test() {
var mul = rand_int(2, 10)
var input = rand_int(1, 100)
var expected = input * mul
var body = "var make_mul = function(m) {" + NL
body = body + " return function(x) { return x * m }" + NL
body = body + " }" + NL
body = body + " var fn = make_mul(" + text(mul) + ")" + NL
body = body + " var result = fn(" + text(input) + ")" + NL
body = body + " if (result != " + text(expected) + ") return \"higher_order: expected " + text(expected) + " got \" + text(result)"
return body
}
// Generate a disruption handling test
function gen_disrupt_test() {
var body = "var caught = false" + NL
body = body + " var _fn = function() { disrupt } disruption { caught = true }" + NL
body = body + " _fn()" + NL
body = body + " if (!caught) return \"disrupt: expected to catch disruption\""
return body
}
// Generate a text operation test
function gen_text_op_test() {
var words = ["hello", "world", "foo", "bar", "baz"]
var w1 = pick(words)
var w2 = pick(words)
var expected = w1 + w2
var body = "var a = \"" + w1 + "\"" + NL
body = body + " var b = \"" + w2 + "\"" + NL
body = body + " var c = a + b" + NL
body = body + " if (c != \"" + expected + "\") return \"text_op: expected " + expected + " got \" + c"
return body
}
// All generators
var generators = [
gen_if_else_test,
gen_loop_test,
gen_closure_test,
gen_record_test,
gen_array_test,
gen_higher_order_test,
gen_disrupt_test,
gen_text_op_test
]
// Generate a complete self-checking .cm program
function generate(s) {
seed(s)
var num_tests = rand_int(5, 15)
var src = "// Auto-generated fuzz test (seed=" + text(s) + ")\nreturn {\n"
var i = 0
var gen = null
var body = null
while (i < num_tests) {
gen = pick(generators)
body = gen()
if (i > 0) src = src + ",\n"
src = src + " fuzz_" + text(i) + ": function() {\n"
src = src + " " + body + "\n"
src = src + " }"
i = i + 1
}
src = src + "\n}\n"
return src
}
return {
generate: generate,
seed: seed
}

View File

@@ -1,5 +1,5 @@
// Hidden vars come from env:
// CLI mode (cell_init): os, args, core_path, shop_path, emit_qbe, dump_mach
// CLI mode (cell_init): os, args, core_path, shop_path
// Actor spawn (script_startup): os, json, nota, wota, actorsym, init, core_path, shop_path
// args[0] = script name, args[1..] = user args
var load_internal = os.load_internal
@@ -9,11 +9,31 @@ function use_embed(name) {
var fd = use_embed('fd')
var json = use_embed('json')
var crypto = use_embed('crypto')
var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
use_cache['crypto'] = crypto
function content_hash(content) {
return text(crypto.blake2(content), 'h')
}
function cache_path(hash) {
if (!shop_path) return null
return shop_path + '/build/' + hash + '.mach'
}
function ensure_build_dir() {
if (!shop_path) return null
var dir = shop_path + '/build'
if (!fd.is_dir(dir)) {
fd.mkdir(dir)
}
return dir
}
// Bootstrap: load tokenize.cm, parse.cm, fold.cm from pre-compiled mach bytecode
function use_basic(path) {
@@ -24,15 +44,30 @@ function use_basic(path) {
return result
}
// Load a module from .mach bytecode (bootstrap modules have no source fallback)
// Load a module from cached .mach or .mcode bytecode
function boot_load(name, env) {
var mach_path = core_path + '/' + name + ".mach"
var data = null
if (fd.is_file(mach_path)) {
data = fd.slurp(mach_path)
return mach_load(data, env)
var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
var mcode_blob = null
var hash = null
var cached = null
var mcode_json = null
var mach_blob = null
if (fd.is_file(mcode_path)) {
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached)) {
return mach_load(fd.slurp(cached), env)
}
mcode_json = text(mcode_blob)
mach_blob = mach_compile_mcode_bin(name, mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
return mach_load(mach_blob, env)
}
print("error: missing bootstrap bytecode: " + mach_path + "\n")
print("error: missing bootstrap bytecode: " + name + "\n")
disrupt
}
@@ -48,42 +83,44 @@ use_cache['fold'] = fold_mod
var mcode_mod = boot_load("mcode", boot_env)
use_cache['mcode'] = mcode_mod
var streamline_mod = null
var qbe_emit_mod = null
// Warn if any .cm source is newer than its .mach bytecode
// Warn if any .cm source is newer than its compiled bytecode
function check_mach_stale() {
var pairs = [
["tokenize.cm", "tokenize.mach"],
["parse.cm", "parse.mach"],
["fold.cm", "fold.mach"],
["mcode.cm", "mcode.mach"],
["streamline.cm", "streamline.mach"],
["qbe.cm", "qbe.mach"],
["qbe_emit.cm", "qbe_emit.mach"],
["internal/bootstrap.cm", "internal/bootstrap.mach"],
["internal/engine.cm", "internal/engine.mach"]
var sources = [
{src: "tokenize.cm", mcode: "boot/tokenize.cm.mcode"},
{src: "parse.cm", mcode: "boot/parse.cm.mcode"},
{src: "fold.cm", mcode: "boot/fold.cm.mcode"},
{src: "mcode.cm", mcode: "boot/mcode.cm.mcode"},
{src: "streamline.cm", mcode: "boot/streamline.cm.mcode"},
{src: "qbe.cm", mcode: "boot/qbe.cm.mcode"},
{src: "qbe_emit.cm", mcode: "boot/qbe_emit.cm.mcode"},
{src: "verify_ir.cm", mcode: "boot/verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", mcode: "boot/bootstrap.cm.mcode"},
{src: "internal/engine.cm", mcode: "boot/engine.cm.mcode"}
]
var stale = []
var _i = 0
var cm_path = null
var mach_path = null
var mcode_path = null
var cm_stat = null
var mach_stat = null
while (_i < length(pairs)) {
cm_path = core_path + '/' + pairs[_i][0]
mach_path = core_path + '/' + pairs[_i][1]
if (fd.is_file(cm_path) && fd.is_file(mach_path)) {
var compiled_stat = null
var entry = null
while (_i < length(sources)) {
entry = sources[_i]
cm_path = core_path + '/' + entry.src
mcode_path = core_path + '/' + entry.mcode
if (fd.is_file(mcode_path) && fd.is_file(cm_path)) {
compiled_stat = fd.stat(mcode_path)
cm_stat = fd.stat(cm_path)
mach_stat = fd.stat(mach_path)
if (cm_stat.mtime > mach_stat.mtime) {
push(stale, pairs[_i][0])
if (cm_stat.mtime > compiled_stat.mtime) {
push(stale, entry.src)
}
}
_i = _i + 1
}
if (length(stale) > 0) {
print("warning: bytecode is stale for: " + text(stale, ", ") + "\n")
print("run 'make regen' or './cell --core . regen.cm' to update\n")
print("run 'make regen' to update\n")
}
}
check_mach_stale()
@@ -123,109 +160,78 @@ function analyze(src, filename) {
return ast
}
// Load a module from .mach bytecode, falling back to source compilation
function load_module(name, env) {
var mach_path = core_path + '/' + name + ".mach"
var data = null
var src_path = null
var src = null
var ast = null
var compiled = null
var optimized = null
if (fd.is_file(mach_path)) {
data = fd.slurp(mach_path)
return mach_load(data, env)
}
src_path = core_path + '/' + name + ".cm"
src = text(fd.slurp(src_path))
ast = analyze(src, src_path)
compiled = mcode_mod(ast)
optimized = streamline_mod(compiled)
return mach_eval_mcode(name, json.encode(optimized), env)
}
// Load optimization pipeline modules (needs analyze to be defined)
var qbe_macros = null
streamline_mod = load_module("streamline", boot_env)
streamline_mod = boot_load("streamline", boot_env)
use_cache['streamline'] = streamline_mod
if (emit_qbe) {
qbe_macros = load_module("qbe", boot_env)
qbe_emit_mod = load_module("qbe_emit", boot_env)
use_cache['qbe'] = qbe_macros
use_cache['qbe_emit'] = qbe_emit_mod
}
// Lazy-loaded verify_ir module (loaded on first use)
var _verify_ir_mod = null
// Run AST through mcode pipeline → register VM
function run_ast(name, ast, env) {
var compiled = mcode_mod(ast)
if (os._verify_ir) {
if (_verify_ir_mod == null) {
_verify_ir_mod = boot_load('verify_ir', boot_env)
}
compiled._verify = true
compiled._verify_mod = _verify_ir_mod
}
var optimized = streamline_mod(compiled)
var qbe_il = null
if (emit_qbe) {
qbe_il = qbe_emit_mod(optimized, qbe_macros)
print(qbe_il)
return null
// Clean up verify properties before JSON encoding
if (optimized._verify) {
delete optimized._verify
delete optimized._verify_mod
}
if (dump_mach) {
mach_dump_mcode(name, json.encode(optimized), env)
return null
}
return mach_eval_mcode(name, json.encode(optimized), env)
var mcode_json = json.encode(optimized)
var mach_blob = mach_compile_mcode_bin(name, mcode_json)
return mach_load(mach_blob, env)
}
// use() with ƿit pipeline for .cm modules
function use_fn(path) {
var file_path = null
var mach_path = null
var data = null
var script = null
var ast = null
var result = null
if (use_cache[path])
return use_cache[path]
// Run AST through mcode pipeline WITHOUT optimization → register VM
function run_ast_noopt(name, ast, env) {
var compiled = mcode_mod(ast)
var mcode_json = json.encode(compiled)
var mach_blob = mach_compile_mcode_bin(name, mcode_json)
return mach_load(mach_blob, env)
}
// Try .mach bytecode first (CWD then core_path)
mach_path = path + '.mach'
if (!fd.is_file(mach_path))
mach_path = core_path + '/' + path + '.mach'
if (fd.is_file(mach_path)) {
data = fd.slurp(mach_path)
result = mach_load(data, {use: use_fn})
use_cache[path] = result
return result
}
// Try .cm source (CWD then core_path)
file_path = path + '.cm'
if (!fd.is_file(file_path))
file_path = core_path + '/' + path + '.cm'
if (fd.is_file(file_path)) {
script = text(fd.slurp(file_path))
ast = analyze(script, file_path)
result = run_ast(path, ast, {use: use_fn})
use_cache[path] = result
return result
}
// Fallback to embedded C module
result = use_embed(replace(path, '/', '_'))
use_cache[path] = result
return result
// Compile AST to blob without loading (for caching)
function compile_to_blob(name, ast) {
var compiled = mcode_mod(ast)
var optimized = streamline_mod(compiled)
return mach_compile_mcode_bin(name, json.encode(optimized))
}
// Helper to load engine.cm and run it with given env
function load_engine(env) {
var engine_path = core_path + '/internal/engine.mach'
var data = null
var mcode_path = core_path + '/boot/engine.cm.mcode'
var mcode_blob = null
var hash = null
var cached = null
var mcode_json = null
var mach_blob = null
var engine_src = null
var engine_ast = null
if (fd.is_file(engine_path)) {
data = fd.slurp(engine_path)
return mach_load(data, env)
if (fd.is_file(mcode_path)) {
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached)) {
return mach_load(fd.slurp(cached), env)
}
mcode_json = text(mcode_blob)
mach_blob = mach_compile_mcode_bin('engine', mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
return mach_load(mach_blob, env)
}
engine_path = core_path + '/internal/engine.cm'
engine_src = text(fd.slurp(engine_path))
engine_ast = analyze(engine_src, engine_path)
// Fallback: compile from source
var engine_cm = core_path + '/internal/engine.cm'
engine_src = text(fd.slurp(engine_cm))
engine_ast = analyze(engine_src, engine_cm)
return run_ast('engine', engine_ast, env)
}
@@ -234,50 +240,37 @@ function load_engine(env) {
var program = null
var user_args = []
var _j = 0
var script_file = null
var script = null
var ast = null
if (args != null) {
// CLI mode — parse args
// CLI mode — always run as actor program (.ce)
program = args[0]
if (!program) {
print("error: no program specified\n")
disrupt
}
_j = 1
while (_j < length(args)) {
push(user_args, args[_j])
_j = _j + 1
}
// Resolve script file: try .cm then .ce in CWD then core_path
script_file = program
if (!ends_with(script_file, '.ce') && !ends_with(script_file, '.cm'))
script_file = program + '.cm'
if (!fd.is_file(script_file))
script_file = core_path + '/' + program + '.cm'
if (!fd.is_file(script_file))
script_file = program + '.ce'
if (!fd.is_file(script_file))
script_file = core_path + '/' + program + '.ce'
if (ends_with(script_file, '.ce')) {
// Actor script — delegate to engine
load_engine({
os: os, actorsym: actorsym,
init: {program: program, arg: user_args},
core_path: core_path, shop_path: shop_path, json: json,
analyze: analyze, run_ast_fn: run_ast
})
} else {
// Module script — run directly
script = text(fd.slurp(script_file))
ast = analyze(script, script_file)
run_ast(program, ast, {use: use_fn, args: user_args, json: json})
}
load_engine({
os: os, actorsym: actorsym,
init: {program: program, arg: user_args},
core_path: core_path, shop_path: shop_path, json: json,
analyze: analyze, run_ast_fn: run_ast, run_ast_noopt_fn: run_ast_noopt,
use_cache: use_cache,
content_hash: content_hash, cache_path: cache_path,
ensure_build_dir: ensure_build_dir, compile_to_blob_fn: compile_to_blob
})
} else {
// Actor spawn mode — load engine.cm with full actor env
load_engine({
os: os, actorsym: actorsym, init: init,
core_path: core_path, shop_path: shop_path, json: json, nota: nota, wota: wota,
analyze: analyze, run_ast_fn: run_ast
analyze: analyze, run_ast_fn: run_ast, run_ast_noopt_fn: run_ast_noopt,
use_cache: use_cache,
content_hash: content_hash, cache_path: cache_path,
ensure_build_dir: ensure_build_dir, compile_to_blob_fn: compile_to_blob
})
}

Binary file not shown.

View File

@@ -1,4 +1,4 @@
// Hidden vars (os, actorsym, init, core_path, shop_path, analyze, run_ast_fn, json) come from env
// Hidden vars (os, actorsym, init, core_path, shop_path, analyze, run_ast_fn, run_ast_noopt_fn, json, use_cache, content_hash, cache_path, ensure_build_dir, compile_to_blob_fn) come from env
// In actor spawn mode, also: nota, wota
var ACTORDATA = actorsym
var SYSYM = '__SYSTEM__'
@@ -53,7 +53,6 @@ var js = use_embed('js')
// shop_path may be null if --core was used without --shop
var packages_path = shop_path ? shop_path + '/packages' : null
var use_cache = {}
use_cache['core/os'] = os
// Extra env properties added as engine initializes (log, runtime fns, etc.)
@@ -70,26 +69,70 @@ function use_core(path) {
var result = null
var script = null
var ast = null
var c_cache_key = null
// Build env: merge core_extras, include C embed as 'native' if available
// If C embed exists, register it so .cm modules can use('internal/<name>_c')
if (sym) {
c_cache_key = 'core/internal/' + path + '_c'
if (!use_cache[c_cache_key])
use_cache[c_cache_key] = sym
}
// Build env: merge core_extras
env = {use: use_core}
arrfor(array(core_extras), function(k) { env[k] = core_extras[k] })
if (sym) env.native = sym
// Check for pre-compiled .mach file first
var mach_path = core_path + '/' + path + '.mach'
// Check for pre-compiled .cm.mach file first
var mach_path = core_path + '/' + path + '.cm.mach'
if (fd.is_file(mach_path)) {
result = mach_load(fd.slurp(mach_path), env)
use_cache[cache_key] = result
return result
}
// Check for .cm.mcode JSON IR
var mcode_path = core_path + '/' + path + '.cm.mcode'
var mcode_blob = null
var hash = null
var cached_path = null
var mach_blob = null
var source_blob = null
if (fd.is_file(mcode_path)) {
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached_path = cache_path(hash)
if (cached_path && fd.is_file(cached_path)) {
result = mach_load(fd.slurp(cached_path), env)
} else {
mach_blob = mach_compile_mcode_bin('core:' + path, text(mcode_blob))
if (cached_path) {
ensure_build_dir()
fd.slurpwrite(cached_path, mach_blob)
}
result = mach_load(mach_blob, env)
}
use_cache[cache_key] = result
return result
}
// Fall back to source .cm file — compile at runtime
var file_path = core_path + '/' + path + MOD_EXT
if (fd.is_file(file_path)) {
script = text(fd.slurp(file_path))
ast = analyze(script, file_path)
result = run_ast_fn('core:' + path, ast, env)
source_blob = fd.slurp(file_path)
hash = content_hash(source_blob)
cached_path = cache_path(hash)
if (cached_path && fd.is_file(cached_path)) {
result = mach_load(fd.slurp(cached_path), env)
} else {
script = text(source_blob)
ast = analyze(script, file_path)
mach_blob = compile_to_blob_fn('core:' + path, ast)
if (cached_path) {
ensure_build_dir()
fd.slurpwrite(cached_path, mach_blob)
}
result = mach_load(mach_blob, env)
}
use_cache[cache_key] = result
return result
}
@@ -201,14 +244,27 @@ function create_actor(desc) {
var $_ = {}
$_.self = create_actor()
os.use_cache = use_cache
os.global_shop_path = shop_path
os.$_ = $_
os.analyze = analyze
os.run_ast_fn = run_ast_fn
os.json = json
use_cache['core/json'] = json
// Create runtime_env early (empty) — filled after pronto loads.
// Shop accesses it lazily (in inject_env, called at module-use time, not load time)
// so it sees the filled version.
var runtime_env = {}
// Populate core_extras with everything shop (and other core modules) need
core_extras.use_cache = use_cache
core_extras.shop_path = shop_path
core_extras.analyze = analyze
core_extras.run_ast_fn = run_ast_fn
core_extras.run_ast_noopt_fn = run_ast_noopt_fn
core_extras.core_json = json
core_extras.actor_api = $_
core_extras.runtime_env = runtime_env
core_extras.content_hash = content_hash
core_extras.cache_path = cache_path
core_extras.ensure_build_dir = ensure_build_dir
// NOW load shop — it receives all of the above via env
var shop = use_core('internal/shop')
var time = use_core('time')
@@ -218,29 +274,24 @@ var parallel = pronto.parallel
var race = pronto.race
var sequence = pronto.sequence
// Create runtime environment for modules
var runtime_env = {
logical: logical,
some: some,
every: every,
starts_with: starts_with,
ends_with: ends_with,
actor: actor,
is_actor: is_actor,
log: log,
send: send,
fallback: fallback,
parallel: parallel,
race: race,
sequence: sequence
}
// Fill runtime_env (same object reference shop holds)
runtime_env.logical = logical
runtime_env.some = some
runtime_env.every = every
runtime_env.starts_with = starts_with
runtime_env.ends_with = ends_with
runtime_env.actor = actor
runtime_env.is_actor = is_actor
runtime_env.log = log
runtime_env.send = send
runtime_env.fallback = fallback
runtime_env.parallel = parallel
runtime_env.race = race
runtime_env.sequence = sequence
// Make runtime functions available to modules loaded via use_core
arrfor(array(runtime_env), function(k) { core_extras[k] = runtime_env[k] })
// Pass to os for shop to access
os.runtime_env = runtime_env
$_.time_limit = function(requestor, seconds)
{
if (!pronto.is_requestor(requestor)) {
@@ -728,7 +779,7 @@ function report_to_overling(msg)
var program = _cell.args.program
if (!program) {
log.error('No program specified. Usage: cell <program.ce> [args...]')
log.error('No program specified. Usage: cell <program> [args...]')
os.exit(1)
}
@@ -818,6 +869,10 @@ function enet_check()
actor_mod.setname(_cell.args.program)
var prog = _cell.args.program
if (ends_with(prog, '.cm')) {
os.print(`error: ${prog} is a module (.cm), not a program (.ce)\n`)
os.exit(1)
}
if (ends_with(prog, '.ce')) prog = text(prog, 0, -3)
var package = use_core('package')
@@ -864,14 +919,32 @@ $_.clock(_ => {
env.use = function(path) {
var ck = 'core/' + path
if (use_cache[ck]) return use_cache[ck]
var core_mod = use_core(path)
if (core_mod) return core_mod
return shop.use(path, pkg)
}
env.args = _cell.args.arg
env.log = log
var script = text(fd.slurp(prog_path))
var ast = analyze(script, prog_path)
var val = run_ast_fn(prog, ast, env)
var source_blob = fd.slurp(prog_path)
var hash = content_hash(source_blob)
var cached_path = cache_path(hash)
var val = null
var script = null
var ast = null
var mach_blob = null
if (cached_path && fd.is_file(cached_path)) {
val = mach_load(fd.slurp(cached_path), env)
} else {
script = text(source_blob)
ast = analyze(script, prog_path)
mach_blob = compile_to_blob_fn(prog, ast)
if (cached_path) {
ensure_build_dir()
fd.slurpwrite(cached_path, mach_blob)
}
val = mach_load(mach_blob, env)
}
if (val) {
log.error('Program must not return anything')
disrupt

Binary file not shown.

View File

@@ -75,7 +75,8 @@ static const JSCFunctionListEntry js_kim_funcs[] = {
JSValue js_kim_use(JSContext *js)
{
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_kim_funcs, countof(js_kim_funcs));
return mod;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_kim_funcs, countof(js_kim_funcs));
JS_RETURN(mod.val);
}

View File

@@ -277,29 +277,31 @@ JSC_CCALL(os_mallinfo,
)
static JSValue js_os_rusage(JSContext *js, JSValue self, int argc, JSValue *argv) {
JSValue ret = JS_NULL;
ret = JS_NewObject(js);
JS_FRAME(js);
JS_ROOT(ret, JS_NewObject(js));
#if defined(__linux__) || defined(__APPLE__)
struct rusage jsmem;
getrusage(RUSAGE_SELF, &jsmem);
JSJMEMRET(ru_maxrss);
JSJMEMRET(ru_ixrss);
JSJMEMRET(ru_idrss);
JSJMEMRET(ru_isrss);
JSJMEMRET(ru_minflt);
JSJMEMRET(ru_majflt);
JSJMEMRET(ru_nswap);
JSJMEMRET(ru_inblock);
JSJMEMRET(ru_oublock);
JSJMEMRET(ru_msgsnd);
JSJMEMRET(ru_msgrcv);
JSJMEMRET(ru_nsignals);
JSJMEMRET(ru_nvcsw);
JSJMEMRET(ru_nivcsw);
#define JSJMEMRET_GC(FIELD) JS_SetPropertyStr(js, ret.val, #FIELD, number2js(js, jsmem.FIELD));
JSJMEMRET_GC(ru_maxrss);
JSJMEMRET_GC(ru_ixrss);
JSJMEMRET_GC(ru_idrss);
JSJMEMRET_GC(ru_isrss);
JSJMEMRET_GC(ru_minflt);
JSJMEMRET_GC(ru_majflt);
JSJMEMRET_GC(ru_nswap);
JSJMEMRET_GC(ru_inblock);
JSJMEMRET_GC(ru_oublock);
JSJMEMRET_GC(ru_msgsnd);
JSJMEMRET_GC(ru_msgrcv);
JSJMEMRET_GC(ru_nsignals);
JSJMEMRET_GC(ru_nvcsw);
JSJMEMRET_GC(ru_nivcsw);
#undef JSJMEMRET_GC
#endif
return ret;
JS_RETURN(ret.val);
}
JSC_SCALL(os_system,
@@ -425,6 +427,22 @@ static JSValue js_os_dylib_has_symbol(JSContext *js, JSValue self, int argc, JSV
return JS_NewBool(js, symbol != NULL);
}
/* Load a native .cm module from a dylib handle.
Uses cell_rt_native_module_load from qbe_helpers.c */
extern JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle);
static JSValue js_os_native_module_load(JSContext *js, JSValue self, int argc, JSValue *argv)
{
if (argc < 1)
return JS_ThrowTypeError(js, "native_module_load requires a dylib object");
void *handle = JS_GetOpaque(argv[0], js_dylib_class_id);
if (!handle)
return JS_ThrowTypeError(js, "First argument must be a dylib object");
return cell_rt_native_module_load(js, handle);
}
JSC_CCALL(os_print,
size_t len;
const char *str = JS_ToCStringLen(js, &len, argv[0]);
@@ -552,6 +570,37 @@ JSC_CCALL(os_getenv,
ret = JS_NULL;
)
/* --- Embedded module table (generated for static builds) ---
Uses dlsym to check if cell_embedded_module_lookup exists at runtime.
When linking a static build with a generated module_table.c, the symbol
will be found; in dynamic builds it returns NULL gracefully. */
struct cell_embedded_entry {
const char *name;
const unsigned char *data;
size_t size;
};
typedef const struct cell_embedded_entry *(*cell_embed_lookup_fn)(const char *);
static JSValue js_os_embedded_module(JSContext *js, JSValue self, int argc, JSValue *argv)
{
cell_embed_lookup_fn lookup = (cell_embed_lookup_fn)dlsym(RTLD_DEFAULT, "cell_embedded_module_lookup");
if (!lookup)
return JS_NULL;
const char *name = JS_ToCString(js, argv[0]);
if (!name) return JS_NULL;
const struct cell_embedded_entry *entry = lookup(name);
JS_FreeCString(js, name);
if (!entry) return JS_NULL;
/* Return the mach blob as a stoned blob */
return js_new_blob_stoned_copy(js, (void *)entry->data, entry->size);
}
static const JSCFunctionListEntry js_os_funcs[] = {
MIST_FUNC_DEF(os, platform, 0),
MIST_FUNC_DEF(os, arch, 0),
@@ -568,6 +617,8 @@ static const JSCFunctionListEntry js_os_funcs[] = {
MIST_FUNC_DEF(os, dylib_open, 1),
MIST_FUNC_DEF(os, dylib_symbol, 2),
MIST_FUNC_DEF(os, dylib_has_symbol, 2),
MIST_FUNC_DEF(os, native_module_load, 1),
MIST_FUNC_DEF(os, embedded_module, 1),
MIST_FUNC_DEF(os, load_internal, 1),
MIST_FUNC_DEF(os, internal_exists, 1),
MIST_FUNC_DEF(os, print, 1),
@@ -579,7 +630,8 @@ JSValue js_os_use(JSContext *js) {
JS_NewClassID(&js_dylib_class_id);
JS_NewClass(js, js_dylib_class_id, &js_dylib_class);
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js,mod,js_os_funcs,countof(js_os_funcs));
return mod;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_os_funcs, countof(js_os_funcs));
JS_RETURN(mod.val);
}

View File

@@ -12,9 +12,12 @@ var pkg_tools = use('package')
var os = use('os')
var link = use('link')
var analyze = os.analyze
var run_ast_fn = os.run_ast_fn
var shop_json = os.json
// These come from env (via core_extras in engine.cm):
// analyze, run_ast_fn, core_json, use_cache, shop_path, actor_api, runtime_env,
// content_hash, cache_path, ensure_build_dir
var shop_json = core_json
var global_shop_path = shop_path
var my$_ = actor_api
var core = "core"
@@ -45,11 +48,6 @@ function ensure_dir(path) {
}
}
function content_hash(content)
{
return text(crypto.blake2(content), 'h')
}
function hash_path(content)
{
return global_shop_path + '/build' + '/' + content_hash(content)
@@ -66,9 +64,6 @@ var ACTOR_EXT = '.ce'
var dylib_ext = '.dylib' // Default extension
var use_cache = os.use_cache
var global_shop_path = os.global_shop_path
var my$_ = os.$_
Shop.get_package_dir = function(name) {
return global_shop_path + '/packages/' + name
@@ -379,6 +374,32 @@ Shop.extract_commit_hash = function(pkg, response) {
var dylib_visited = {}
var open_dls = {}
var loaded_manifests = {}
// Host target detection for native dylib resolution
function detect_host_target() {
var platform = os.platform()
var arch = os.arch ? os.arch() : 'arm64'
if (platform == 'macOS' || platform == 'darwin')
return arch == 'x86_64' ? 'macos_x86_64' : 'macos_arm64'
if (platform == 'Linux' || platform == 'linux')
return arch == 'x86_64' ? 'linux' : 'linux_arm64'
if (platform == 'Windows' || platform == 'windows')
return 'windows'
return null
}
var host_target = detect_host_target()
// Check for a native .cm dylib in the content-addressed cache
// Returns the loaded module value, or null if no native dylib exists
function try_native_dylib(content_key) {
var native_path = hash_path(content_key) + '.' + host_target + dylib_ext
if (!fd.is_file(native_path)) return null
var handle = os.dylib_open(native_path)
if (!handle) return null
return os.native_module_load(handle)
}
// Default capabilities injected into scripts
// These map to $_ properties in engine.cm
@@ -404,9 +425,8 @@ Shop.get_script_capabilities = function(path) {
// Matches engine.cm's approach: env properties become free variables in the module.
function inject_env(inject) {
var env = {}
var rt = my$_.os ? my$_.os.runtime_env : null
if (rt) {
arrfor(array(rt), function(k) { env[k] = rt[k] })
if (runtime_env) {
arrfor(array(runtime_env), function(k) { env[k] = runtime_env[k] })
}
// Add capability injections with $ prefix
@@ -433,27 +453,54 @@ function resolve_mod_fn(path, pkg) {
if (!fd.is_file(path)) { print(`path ${path} is not a file`); disrupt }
var content = text(fd.slurp(path))
var cached = pull_from_cache(stone(blob(content)))
var content_key = stone(blob(content))
var native_result = null
var cached = null
var ast = null
var compiled = null
var mach_path = null
var mach_blob = null
var mcode_path = null
var ir = null
var optimized = null
var mcode_json = null
var cached_mcode_path = null
// Check for native .cm dylib first (highest performance)
native_result = try_native_dylib(content_key)
if (native_result != null) {
return {_native: true, value: native_result}
}
// Check cache for pre-compiled .mach blob
cached = pull_from_cache(content_key)
if (cached) {
return cached
}
// Check for pre-compiled .mach file alongside .cm source
// Check for cached mcode in content-addressed store
cached_mcode_path = hash_path(content_key) + '.mcode'
if (fd.is_file(cached_mcode_path)) {
mcode_json = text(fd.slurp(cached_mcode_path))
compiled = mach_compile_mcode_bin(path, mcode_json)
put_into_cache(content_key, compiled)
return compiled
}
// Check for pre-compiled .mach or .mcode file alongside .cm source
if (ends_with(path, '.cm')) {
mach_path = text(path, 0, length(path) - 3) + '.mach'
if (fd.is_file(mach_path)) {
mach_blob = fd.slurp(mach_path)
put_into_cache(stone(blob(content)), mach_blob)
put_into_cache(content_key, mach_blob)
return mach_blob
}
mcode_path = path + '.mcode'
if (fd.is_file(mcode_path)) {
compiled = mach_compile_mcode_bin(path, text(fd.slurp(mcode_path)))
put_into_cache(content_key, compiled)
return compiled
}
}
// Compile via full pipeline: analyze → mcode → streamline → serialize
@@ -462,8 +509,15 @@ function resolve_mod_fn(path, pkg) {
ast = analyze(content, path)
ir = _mcode_mod(ast)
optimized = _streamline_mod(ir)
compiled = mach_compile_mcode_bin(path, shop_json.encode(optimized))
put_into_cache(stone(blob(content)), compiled)
mcode_json = shop_json.encode(optimized)
// Cache mcode (architecture-independent) in content-addressed store
ensure_dir(global_shop_path + '/build')
fd.slurpwrite(cached_mcode_path, stone(blob(mcode_json)))
// Cache mach blob
compiled = mach_compile_mcode_bin(path, mcode_json)
put_into_cache(content_key, compiled)
return compiled
}
@@ -569,7 +623,45 @@ function get_lib_path(pkg) {
return global_shop_path + '/lib/' + lib_name + dylib_ext
}
// Open a package's dynamic library and all its dependencies
// Load the manifest for a package's per-module dylibs
// Returns a map of symbol_name -> dylib_path, or null if no manifest
function load_package_manifest(pkg) {
if (loaded_manifests[pkg] != null) return loaded_manifests[pkg]
var lib_name = replace(replace(replace(pkg, '/', '_'), '.', '_'), '-', '_')
var manifest_path = global_shop_path + '/lib/' + lib_name + '.manifest.json'
if (!fd.is_file(manifest_path)) {
loaded_manifests[pkg] = false
return null
}
var content = text(fd.slurp(manifest_path))
var manifest = json.decode(content)
loaded_manifests[pkg] = manifest
return manifest
}
// Open a per-module dylib from a manifest and return the dlopen handle
function open_module_dylib(dylib_path) {
if (open_dls[dylib_path]) return open_dls[dylib_path]
if (!fd.is_file(dylib_path)) return null
open_dls[dylib_path] = os.dylib_open(dylib_path)
return open_dls[dylib_path]
}
// Resolve a C symbol from per-module dylibs for a package
// Returns a loader function or null
function resolve_dylib_symbol(sym, pkg) {
var manifest = load_package_manifest(pkg)
if (!manifest) return null
var dylib_path = manifest[sym]
if (!dylib_path) return null
var handle = open_module_dylib(dylib_path)
if (!handle) return null
if (!os.dylib_has_symbol(handle, sym)) return null
return function() { return os.dylib_symbol(handle, sym) }
}
// Open a package's dynamic libraries (loads manifest + dependency manifests)
Shop.open_package_dylib = function(pkg) {
if (pkg == 'core' || !pkg) return
if (dylib_visited[pkg]) return
@@ -599,22 +691,18 @@ Shop.open_package_dylib = function(pkg) {
}
}
var dl_path = get_lib_path(pkg)
if (fd.is_file(dl_path)) {
if (!open_dls[dl_path]) {
open_dls[dl_path] = os.dylib_open(dl_path)
}
}
// Pre-load the manifest
load_package_manifest(pkg)
}
// Resolve a C symbol by searching:
// 1. If package_context is null, only check core internal symbols
// 2. Otherwise: own package (internal then dylib) -> other packages (internal then dylib) -> core (internal only)
// 2. Otherwise: own package (internal then per-module dylib) -> aliased packages -> core (internal only)
// Core is never loaded as a dynamic library via dlopen
function resolve_c_symbol(path, package_context) {
var explicit = split_explicit_package_import(path)
var sym = null
var dl_path = null
var loader = null
var _path = null
var core_sym = null
var canon_pkg = null
@@ -636,10 +724,10 @@ function resolve_c_symbol(path, package_context) {
}
Shop.open_package_dylib(explicit.package)
dl_path = get_lib_path(explicit.package)
if (open_dls[dl_path] && os.dylib_has_symbol(open_dls[dl_path], sym)) {
loader = resolve_dylib_symbol(sym, explicit.package)
if (loader) {
return {
symbol: function() { return os.dylib_symbol(open_dls[dl_path], sym) },
symbol: loader,
scope: SCOPE_PACKAGE,
package: explicit.package,
path: sym
@@ -661,7 +749,7 @@ function resolve_c_symbol(path, package_context) {
return null
}
// 1. Check own package first (internal, then dylib)
// 1. Check own package first (internal, then per-module dylib)
sym = make_c_symbol(package_context, path)
if (os.internal_exists(sym)) {
return {
@@ -672,11 +760,10 @@ function resolve_c_symbol(path, package_context) {
}
Shop.open_package_dylib(package_context)
dl_path = get_lib_path(package_context)
if (open_dls[dl_path] && os.dylib_has_symbol(open_dls[dl_path], sym)) {
loader = resolve_dylib_symbol(sym, package_context)
if (loader) {
return {
symbol: function() { return os.dylib_symbol(open_dls[dl_path], sym) },
symbol: loader,
scope: SCOPE_LOCAL,
path: sym
}
@@ -693,7 +780,6 @@ function resolve_c_symbol(path, package_context) {
mod_name = get_import_name(path)
sym = make_c_symbol(canon_pkg, mod_name)
// Check internal first
if (os.internal_exists(sym)) {
return {
symbol: function() { return os.load_internal(sym) },
@@ -703,12 +789,11 @@ function resolve_c_symbol(path, package_context) {
}
}
// Then check dylib
Shop.open_package_dylib(canon_pkg)
dl_path = get_lib_path(canon_pkg)
if (open_dls[dl_path] && os.dylib_has_symbol(open_dls[dl_path], sym)) {
loader = resolve_dylib_symbol(sym, canon_pkg)
if (loader) {
return {
symbol: function() { return os.dylib_symbol(open_dls[dl_path], sym) },
symbol: loader,
scope: SCOPE_PACKAGE,
package: canon_pkg,
path: sym
@@ -834,20 +919,20 @@ function execute_module(info)
var pkg = null
if (mod_resolve.scope < 900) {
// Build env with runtime fns, capabilities, and use function
file_info = Shop.file_info(mod_resolve.path)
inject = Shop.script_inject_for(file_info)
env = inject_env(inject)
pkg = file_info.package
env.use = make_use_fn(pkg)
// Check if native dylib was resolved
if (is_object(mod_resolve.symbol) && mod_resolve.symbol._native) {
used = mod_resolve.symbol.value
} else {
// Build env with runtime fns, capabilities, and use function
file_info = Shop.file_info(mod_resolve.path)
inject = Shop.script_inject_for(file_info)
env = inject_env(inject)
pkg = file_info.package
env.use = make_use_fn(pkg)
// Add C module as native context if available
if (c_resolve.scope < 900) {
env.native = call_c_module(c_resolve)
// Load compiled bytecode with env
used = mach_load(mod_resolve.symbol, env)
}
// Load compiled bytecode with env
used = mach_load(mod_resolve.symbol, env)
} else if (c_resolve.scope < 900) {
// C only
used = call_c_module(c_resolve)
@@ -869,6 +954,21 @@ function get_module(path, package_context) {
}
Shop.use = function use(path, package_context) {
// Check for embedded module (static builds)
var embed_key = 'embedded:' + path
var embedded = null
var embed_env = null
if (use_cache[embed_key]) return use_cache[embed_key]
if (os.embedded_module) {
embedded = os.embedded_module(path)
if (embedded) {
embed_env = inject_env(SHOP_DEFAULT_INJECT)
embed_env.use = make_use_fn(package_context)
use_cache[embed_key] = mach_load(embedded, embed_env)
return use_cache[embed_key]
}
}
var info = resolve_module_info(path, package_context)
if (!info) { print(`Module ${path} could not be found in ${package_context}`); disrupt }

200
ir_report.ce Normal file
View File

@@ -0,0 +1,200 @@
// ir_report.ce — optimizer flight recorder CLI
//
// Usage: ./cell --core . ir_report.ce [options] <file.cm|file.ce>
//
// Options:
// --summary Per-pass JSON summaries (default)
// --events Include rewrite events
// --types Include type deltas
// --ir-before=PASS Print canonical IR before PASS
// --ir-after=PASS Print canonical IR after PASS
// --ir-all Print canonical IR before/after every pass
// --full Everything (summary + events + types + ir-all)
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var ir_stats = use("ir_stats")
// --- Parse arguments ---
var filename = null
var opt_events = false
var opt_types = false
var opt_ir_before = null
var opt_ir_after = null
var opt_ir_all = false
var i = 0
var arg = null
var p = null
var e = null
var td = null
while (i < length(args)) {
arg = args[i]
if (arg == "--events") {
opt_events = true
} else if (arg == "--types") {
opt_types = true
} else if (arg == "--ir-all") {
opt_ir_all = true
} else if (arg == "--full") {
opt_events = true
opt_types = true
opt_ir_all = true
} else if (arg == "--summary") {
// default, no-op
} else if (starts_with(arg, "--ir-before=")) {
opt_ir_before = text(arg, 12)
} else if (starts_with(arg, "--ir-after=")) {
opt_ir_after = text(arg, 11)
} else if (!starts_with(arg, "--")) {
filename = arg
} else {
print(`unknown option: ${arg}\n`)
print("usage: cell --core . ir_report.ce [options] <file>\n")
$stop()
}
i = i + 1
}
if (filename == null) {
print("usage: cell --core . ir_report.ce [options] <file.cm|file.ce>\n")
print(" --summary per-pass JSON summaries (default)\n")
print(" --events include rewrite events\n")
print(" --types include type deltas\n")
print(" --ir-before=PASS print canonical IR before PASS\n")
print(" --ir-after=PASS print canonical IR after PASS\n")
print(" --ir-all print canonical IR before/after every pass\n")
print(" --full everything\n")
$stop()
}
// --- Compile ---
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
// --- Determine which passes need IR snapshots ---
var need_snapshots = opt_ir_all || opt_ir_before != null || opt_ir_after != null
// Deep copy for before snapshot if we need IR printing
var before_ir = null
if (need_snapshots) {
before_ir = json.decode(json.encode(compiled))
}
// --- Set up log ---
var log = {
passes: [],
events: null,
type_deltas: null
}
if (opt_events) {
log.events = []
}
if (opt_types) {
log.type_deltas = []
}
// --- Run optimizer ---
var optimized = streamline(compiled, log)
// --- Output ---
var emit = function(obj) {
print(json.encode(obj))
print("\n")
}
// Pass summaries (always)
i = 0
while (i < length(log.passes)) {
p = log.passes[i]
p.type = "pass"
emit(p)
i = i + 1
}
// Rewrite events
if (opt_events && log.events != null) {
i = 0
while (i < length(log.events)) {
e = log.events[i]
e.type = "event"
emit(e)
i = i + 1
}
}
// Type deltas
if (opt_types && log.type_deltas != null) {
i = 0
while (i < length(log.type_deltas)) {
td = log.type_deltas[i]
td.type = "types"
emit(td)
i = i + 1
}
}
// --- Canonical IR printing ---
var print_ir = function(ir_obj, when_label, pass_name) {
var fname = null
var fi = 0
var func = null
if (ir_obj.main != null) {
fname = ir_obj.name != null ? ir_obj.name : "<main>"
emit({
type: "ir",
when: when_label,
pass: pass_name,
fn: fname,
text: ir_stats.canonical_ir(ir_obj.main, fname, {show_nops: true})
})
}
if (ir_obj.functions != null) {
fi = 0
while (fi < length(ir_obj.functions)) {
func = ir_obj.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
emit({
type: "ir",
when: when_label,
pass: pass_name,
fn: fname,
text: ir_stats.canonical_ir(func, fname, {show_nops: true})
})
fi = fi + 1
}
}
return null
}
if (need_snapshots) {
if (opt_ir_all) {
print_ir(before_ir, "before", "all")
print_ir(optimized, "after", "all")
} else {
if (opt_ir_before != null) {
print_ir(before_ir, "before", opt_ir_before)
}
if (opt_ir_after != null) {
print_ir(optimized, "after", opt_ir_after)
}
}
}
$stop()

357
ir_stats.cm Normal file
View File

@@ -0,0 +1,357 @@
// ir_stats.cm — IR statistics, fingerprinting, and canonical printing
//
// Usage: var ir_stats = use("ir_stats")
// ir_stats.detailed_stats(func)
// ir_stats.ir_fingerprint(func)
// ir_stats.canonical_ir(func, name, opts)
// ir_stats.type_snapshot(slot_types)
// ir_stats.type_delta(before_types, after_types)
var json = use("json")
// --- Category maps ---
var load_ops = {
load_field: true, load_index: true, load_dynamic: true,
get: true
}
var store_ops = {
store_field: true, store_index: true, store_dynamic: true,
set_var: true, put: true, push: true
}
var branch_ops = {
jump: true, jump_true: true, jump_false: true, jump_not_null: true
}
var call_ops = {
invoke: true, goinvoke: true
}
var guard_ops = {
is_int: true, is_text: true, is_num: true, is_bool: true,
is_null: true, is_array: true, is_func: true, is_record: true,
is_stone: true
}
var arith_ops = {
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
concat: true, neg_int: true, neg_float: true,
bitnot: true, bitand: true, bitor: true, bitxor: true,
shl: true, shr: true, ushr: true
}
var move_ops = {
move: true
}
var const_ops = {
int: true, true: true, false: true, null: true
}
var nop_reasons = {
tc: "tc",
bl: "bl",
mv: "mv",
dj: "dj",
ur: "ur"
}
var category_tag = function(op) {
if (guard_ops[op] == true) { return "guard" }
if (branch_ops[op] == true) { return "branch" }
if (load_ops[op] == true) { return "load" }
if (store_ops[op] == true) { return "store" }
if (call_ops[op] == true) { return "call" }
if (arith_ops[op] == true) { return "arith" }
if (move_ops[op] == true) { return "move" }
if (const_ops[op] == true) { return "const" }
return null
}
// --- detailed_stats ---
var detailed_stats = function(func) {
var instructions = func.instructions
var stats = {
instr: 0, nop: 0,
load: 0, store: 0, branch: 0, call: 0,
guard: 0, arith: 0, move: 0, const: 0,
label: 0, other: 0
}
var i = 0
var instr = null
var op = null
var num = 0
if (instructions == null) {
return stats
}
num = length(instructions)
while (i < num) {
instr = instructions[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
stats.nop = stats.nop + 1
stats.instr = stats.instr + 1
} else {
stats.label = stats.label + 1
}
} else if (is_array(instr)) {
stats.instr = stats.instr + 1
op = instr[0]
if (op == "access" && !is_number(instr[2]) && !is_logical(instr[2])) {
stats.load = stats.load + 1
} else if (op == "access") {
stats.const = stats.const + 1
} else if (load_ops[op] == true) {
stats.load = stats.load + 1
} else if (store_ops[op] == true) {
stats.store = stats.store + 1
} else if (branch_ops[op] == true) {
stats.branch = stats.branch + 1
} else if (call_ops[op] == true) {
stats.call = stats.call + 1
} else if (guard_ops[op] == true) {
stats.guard = stats.guard + 1
} else if (arith_ops[op] == true) {
stats.arith = stats.arith + 1
} else if (move_ops[op] == true) {
stats.move = stats.move + 1
} else if (const_ops[op] == true) {
stats.const = stats.const + 1
} else {
stats.other = stats.other + 1
}
}
i = i + 1
}
return stats
}
// --- ir_fingerprint ---
// djb2 hash computed over the JSON-encoded instructions
var djb2 = function(s) {
var chars = array(s)
var hash = 5381
var i = 0
var num = length(chars)
while (i < num) {
hash = ((hash * 33) + number(chars[i])) % 4294967296
i = i + 1
}
return text(hash, 16)
}
var ir_fingerprint = function(func) {
return djb2(json.encode(func.instructions))
}
// --- canonical_ir ---
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var nop_reason = function(s) {
// extract reason from _nop_XX_NNN
var parts = array(s, "_")
// parts: ["", "nop", "XX", "NNN"]
if (length(parts) >= 3) {
return parts[2]
}
return "?"
}
var fmt_operand = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_logical(v)) {
if (v) { return "true" }
return "false"
}
return text(v)
}
var canonical_ir = function(func, name, opts) {
var instructions = func.instructions
var nr_args = func.nr_args != null ? func.nr_args : 0
var nr_slots = func.nr_slots != null ? func.nr_slots : 0
var show_nops = false
var show_types = false
var slot_types = null
var lines = []
var i = 0
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var idx_str = null
var op_str = null
var operands = null
var suffix = null
var tag = null
var typ = null
var reason = null
var num = 0
if (opts != null) {
if (opts.show_nops == true) { show_nops = true }
if (opts.show_types == true) { show_types = true }
if (opts.slot_types != null) { slot_types = opts.slot_types }
}
lines[] = `fn ${name != null ? name : "<anon>"} (args=${text(nr_args)}, slots=${text(nr_slots)})`
if (instructions == null) {
return text(lines, "\n")
}
num = length(instructions)
while (i < num) {
instr = instructions[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
if (show_nops) {
reason = nop_reason(instr)
idx_str = pad_right(`@${text(i)}`, 6)
lines[] = ` ${idx_str}--- nop (${reason}) ---`
}
} else {
lines[] = ` ${instr}:`
}
i = i + 1
continue
}
if (!is_array(instr)) {
i = i + 1
continue
}
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
if (is_number(instr[j]) && op != "int" && !(op == "access" && j == 2)) {
parts[] = `s${text(instr[j])}`
} else {
parts[] = fmt_operand(instr[j])
}
j = j + 1
}
operands = text(parts, ", ")
idx_str = pad_right(`@${text(i)}`, 6)
op_str = pad_right(op, 16)
suffix = ""
tag = category_tag(op)
if (show_types && slot_types != null) {
// show type for dest slot if known
if (is_number(instr[1])) {
typ = slot_types[text(instr[1])]
if (typ != null) {
suffix = `; -> ${typ}`
}
}
if (tag != null) {
suffix = suffix + ` [${tag}]`
}
} else if (tag != null) {
suffix = suffix + `; [${tag}]`
}
if (length(suffix) > 0) {
lines[] = ` ${idx_str}${op_str}${operands} ${suffix}`
} else {
lines[] = ` ${idx_str}${op_str}${operands}`
}
i = i + 1
}
return text(lines, "\n")
}
// --- type_snapshot ---
var type_snapshot = function(slot_types) {
if (slot_types == null) {
return {}
}
return stone(record(slot_types))
}
// --- type_delta ---
var type_delta = function(before_types, after_types) {
var result = {
added: {},
removed: {},
strengthened: {},
weakened: {}
}
var bt = before_types != null ? before_types : {}
var at = after_types != null ? after_types : {}
var keys = null
var i = 0
var k = null
var bv = null
var av = null
// check after for added/changed
keys = array(at)
i = 0
while (i < length(keys)) {
k = keys[i]
av = at[k]
bv = bt[k]
if (bv == null) {
result.added[k] = av
} else if (bv != av) {
if (bv == "unknown" || (bv == "num" && (av == "int" || av == "float"))) {
result.strengthened[k] = {from: bv, to: av}
} else if (av == "unknown" || (av == "num" && (bv == "int" || bv == "float"))) {
result.weakened[k] = {from: bv, to: av}
} else {
result.strengthened[k] = {from: bv, to: av}
}
}
i = i + 1
}
// check before for removed
keys = array(bt)
i = 0
while (i < length(keys)) {
k = keys[i]
if (at[k] == null) {
result.removed[k] = bt[k]
}
i = i + 1
}
return result
}
return {
detailed_stats: detailed_stats,
ir_fingerprint: ir_fingerprint,
canonical_ir: canonical_ir,
type_snapshot: type_snapshot,
type_delta: type_delta,
category_tag: category_tag
}

681
mcode.cm
View File

@@ -28,6 +28,13 @@ var mcode = function(ast) {
"<<=": "shl", ">>=": "shr", ">>>=": "ushr"
}
var sensory_ops = {
is_array: "is_array", is_function: "is_func", is_object: "is_record",
is_stone: "is_stone", is_integer: "is_int", is_text: "is_text",
is_number: "is_num", is_logical: "is_bool", is_null: "is_null",
length: "length"
}
// Compiler state
var s_instructions = null
var s_data = null
@@ -52,6 +59,7 @@ var mcode = function(ast) {
var s_cur_line = 0
var s_cur_col = 0
var s_filename = null
var s_has_disruption = false
// Shared closure vars for binop helpers (avoids >4 param functions)
var _bp_dest = 0
@@ -78,7 +86,8 @@ var mcode = function(ast) {
function_nr: s_function_nr,
intrinsic_cache: s_intrinsic_cache,
cur_line: s_cur_line,
cur_col: s_cur_col
cur_col: s_cur_col,
has_disruption: s_has_disruption
}
}
@@ -99,6 +108,7 @@ var mcode = function(ast) {
s_intrinsic_cache = saved.intrinsic_cache
s_cur_line = saved.cur_line
s_cur_col = saved.cur_col
s_has_disruption = saved.has_disruption
}
// Slot allocation
@@ -270,88 +280,39 @@ var mcode = function(ast) {
return node.kind == "null"
}
// emit_add_decomposed: int path -> text path -> float path -> disrupt
// emit_add_decomposed: emit type-dispatched add (text → concat, num → add)
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_add_decomposed = function() {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(_bp_ln)
var left_is_text = is_known_text(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_text = is_known_text(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var not_int = null
var not_text = null
var done = null
var err = null
// Both sides known int
if (left_is_int && right_is_int) {
emit_3("add_int", dest, left, right)
if (is_known_text(_bp_ln) && is_known_text(_bp_rn)) {
emit_3("concat", _bp_dest, _bp_left, _bp_right)
return null
}
// Both sides known text
if (left_is_text && right_is_text) {
emit_3("concat", dest, left, right)
return null
}
// Both sides known number (but not both int)
if (left_is_num && right_is_num) {
if (left_is_int && right_is_int) {
emit_3("add_int", dest, left, right)
} else {
emit_3("add_float", dest, left, right)
}
if (is_known_number(_bp_ln) && is_known_number(_bp_rn)) {
emit_3("add", _bp_dest, _bp_left, _bp_right)
return null
}
// Unknown types: emit full dispatch
var t0 = alloc_slot()
var t1 = alloc_slot()
var done = gen_label("add_done")
var check_num = gen_label("add_cn")
not_int = gen_label("add_ni")
not_text = gen_label("add_nt")
done = gen_label("add_done")
err = gen_label("add_err")
// Int path
t0 = alloc_slot()
if (!left_is_int) {
emit_2("is_int", t0, left)
emit_jump_cond("jump_false", t0, not_int)
}
t1 = alloc_slot()
if (!right_is_int) {
emit_2("is_int", t1, right)
emit_jump_cond("jump_false", t1, not_int)
}
emit_3("add_int", dest, left, right)
// Check text path first (since add doubles as concat)
emit_2("is_text", t0, _bp_left)
emit_jump_cond("jump_false", t0, check_num)
emit_2("is_text", t1, _bp_right)
emit_jump_cond("jump_false", t1, check_num)
emit_3("concat", _bp_dest, _bp_left, _bp_right)
emit_jump(done)
// Text path
emit_label(not_int)
if (!left_is_text) {
emit_2("is_text", t0, left)
emit_jump_cond("jump_false", t0, not_text)
}
if (!right_is_text) {
emit_2("is_text", t1, right)
emit_jump_cond("jump_false", t1, not_text)
}
emit_3("concat", dest, left, right)
emit_jump(done)
// Float path
emit_label(not_text)
if (!left_is_num) {
emit_2("is_num", t0, left)
emit_jump_cond("jump_false", t0, err)
}
if (!right_is_num) {
emit_2("is_num", t1, right)
emit_jump_cond("jump_false", t1, err)
}
emit_3("add_float", dest, left, right)
// Numeric path
var err = gen_label("add_err")
emit_label(check_num)
emit_2("is_num", t0, _bp_left)
emit_jump_cond("jump_false", t0, err)
emit_2("is_num", t1, _bp_right)
emit_jump_cond("jump_false", t1, err)
emit_3("add", _bp_dest, _bp_left, _bp_right)
emit_jump(done)
emit_label(err)
@@ -360,60 +321,22 @@ var mcode = function(ast) {
return null
}
// emit_numeric_binop: int path -> float path -> disrupt
// emit_numeric_binop: emit type-guarded numeric binary op
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_numeric_binop = function(int_op, float_op) {
var dest = _bp_dest
var left = _bp_left
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(_bp_ln)
var left_is_num = is_known_number(_bp_ln)
var right_is_int = is_known_int(_bp_rn)
var right_is_num = is_known_number(_bp_rn)
var not_int = null
var done = null
var err = null
// Both sides known int
if (left_is_int && right_is_int) {
emit_3(int_op, dest, left, right)
var emit_numeric_binop = function(op_str) {
if (is_known_number(_bp_ln) && is_known_number(_bp_rn)) {
emit_3(op_str, _bp_dest, _bp_left, _bp_right)
return null
}
// Both sides known number (but not both int)
if (left_is_num && right_is_num) {
emit_3(float_op, dest, left, right)
return null
}
not_int = gen_label("num_ni")
done = gen_label("num_done")
err = gen_label("num_err")
t0 = alloc_slot()
if (!left_is_int) {
emit_2("is_int", t0, left)
emit_jump_cond("jump_false", t0, not_int)
}
t1 = alloc_slot()
if (!right_is_int) {
emit_2("is_int", t1, right)
emit_jump_cond("jump_false", t1, not_int)
}
emit_3(int_op, dest, left, right)
emit_jump(done)
emit_label(not_int)
if (!left_is_num) {
emit_2("is_num", t0, left)
emit_jump_cond("jump_false", t0, err)
}
if (!right_is_num) {
emit_2("is_num", t1, right)
emit_jump_cond("jump_false", t1, err)
}
emit_3(float_op, dest, left, right)
var t0 = alloc_slot()
var t1 = alloc_slot()
var err = gen_label("num_err")
var done = gen_label("num_done")
emit_2("is_num", t0, _bp_left)
emit_jump_cond("jump_false", t0, err)
emit_2("is_num", t1, _bp_right)
emit_jump_cond("jump_false", t1, err)
emit_3(op_str, _bp_dest, _bp_left, _bp_right)
emit_jump(done)
emit_label(err)
@@ -646,36 +569,18 @@ var mcode = function(ast) {
return null
}
// emit_neg_decomposed: int path -> float path -> disrupt
// emit_neg_decomposed: emit type-guarded negate
var emit_neg_decomposed = function(dest, src, src_node) {
var t0 = 0
var not_int = null
var done = null
var err = null
if (is_known_int(src_node)) {
emit_2("neg_int", dest, src)
return null
}
if (is_known_number(src_node)) {
emit_2("neg_float", dest, src)
emit_2("negate", dest, src)
return null
}
not_int = gen_label("neg_ni")
done = gen_label("neg_done")
err = gen_label("neg_err")
t0 = alloc_slot()
emit_2("is_int", t0, src)
emit_jump_cond("jump_false", t0, not_int)
emit_2("neg_int", dest, src)
emit_jump(done)
emit_label(not_int)
var t0 = alloc_slot()
var err = gen_label("neg_err")
var done = gen_label("neg_done")
emit_2("is_num", t0, src)
emit_jump_cond("jump_false", t0, err)
emit_2("neg_float", dest, src)
emit_2("negate", dest, src)
emit_jump(done)
emit_label(err)
@@ -686,35 +591,34 @@ var mcode = function(ast) {
// Central router: maps op string to decomposition helper
// Sets _bp_* closure vars then calls helper with reduced args
var relational_ops = {
lt: ["lt_int", "lt_float", "lt_text"],
le: ["le_int", "le_float", "le_text"],
gt: ["gt_int", "gt_float", "gt_text"],
ge: ["ge_int", "ge_float", "ge_text"]
}
var emit_binop = function(op_str, dest, left, right) {
var rel = null
_bp_dest = dest
_bp_left = left
_bp_right = right
if (op_str == "add") {
emit_add_decomposed()
} else if (op_str == "subtract") {
emit_numeric_binop("sub_int", "sub_float")
} else if (op_str == "multiply") {
emit_numeric_binop("mul_int", "mul_float")
} else if (op_str == "divide") {
emit_numeric_binop("div_int", "div_float")
} else if (op_str == "modulo") {
emit_numeric_binop("mod_int", "mod_float")
} else if (op_str == "eq") {
emit_eq_decomposed()
} else if (op_str == "ne") {
emit_ne_decomposed()
} else if (op_str == "lt") {
emit_relational("lt_int", "lt_float", "lt_text")
} else if (op_str == "le") {
emit_relational("le_int", "le_float", "le_text")
} else if (op_str == "gt") {
emit_relational("gt_int", "gt_float", "gt_text")
} else if (op_str == "ge") {
emit_relational("ge_int", "ge_float", "ge_text")
} else {
// Passthrough for bitwise, pow, in, etc.
emit_3(op_str, dest, left, right)
rel = relational_ops[op_str]
if (rel != null) {
emit_relational(rel[0], rel[1], rel[2])
} else if (op_str == "subtract" || op_str == "multiply" ||
op_str == "divide" || op_str == "modulo" || op_str == "pow") {
emit_numeric_binop(op_str)
} else {
// Passthrough for bitwise, in, etc.
emit_3(op_str, dest, left, right)
}
}
return null
}
@@ -782,13 +686,12 @@ var mcode = function(ast) {
var name_str = alloc_slot()
emit_const_str(name_str, prop)
var args_arr = alloc_slot()
var arr_instr = ["array", args_arr, argc]
add_instr(["array", args_arr, 0])
_i = 0
while (_i < argc) {
push(arr_instr, args[_i])
emit_2("push", args_arr, args[_i])
_i = _i + 1
}
add_instr(arr_instr)
var pf = alloc_slot()
emit_3("frame", pf, obj, 2)
emit_3("setarg", pf, 0, null_slot)
@@ -836,13 +739,12 @@ var mcode = function(ast) {
var null_slot = alloc_slot()
emit_const_null(null_slot)
var args_arr = alloc_slot()
var arr_instr = ["array", args_arr, argc]
add_instr(["array", args_arr, 0])
_i = 0
while (_i < argc) {
push(arr_instr, args[_i])
emit_2("push", args_arr, args[_i])
_i = _i + 1
}
add_instr(arr_instr)
var pf = alloc_slot()
emit_3("frame", pf, obj, 2)
emit_3("setarg", pf, 0, null_slot)
@@ -986,6 +888,323 @@ var mcode = function(ast) {
return -1
}
// --- Inline expansion toggle flags ---
var inline_arrfor = true
var inline_filter = true
var inline_every = true
var inline_some = true
var inline_reduce = true
// --- Helper: emit a reduce loop body ---
// r = {acc, i, arr, fn, len}; emits loop updating acc in-place.
// Caller must emit the done_label after calling this.
var emit_reduce_loop = function(r, forward, done_label) {
var acc = r.acc
var i = r.i
var arr_slot = r.arr
var fn_slot = r.fn
var len = r.len
var check = alloc_slot()
var item = alloc_slot()
var null_s = alloc_slot()
var one = alloc_slot()
var zero = alloc_slot()
var f = alloc_slot()
var loop_label = gen_label("reduce_loop")
emit_2("int", one, 1)
emit_1("null", null_s)
emit_label(loop_label)
if (forward) {
emit_3("lt_int", check, i, len)
} else {
emit_2("int", zero, 0)
emit_3("ge_int", check, i, zero)
}
emit_jump_cond("jump_false", check, done_label)
emit_3("load_index", item, arr_slot, i)
emit_3("frame", f, fn_slot, 2)
emit_3("setarg", f, 0, null_s)
emit_3("setarg", f, 1, acc)
emit_3("setarg", f, 2, item)
emit_2("invoke", f, acc)
if (forward) {
emit_3("add", i, i, one)
} else {
emit_3("subtract", i, i, one)
}
emit_jump(loop_label)
}
// --- Inline expansion: arrfor(arr, fn) ---
var expand_inline_arrfor = function(dest, arr_slot, fn_slot) {
var len = alloc_slot()
var i = alloc_slot()
var check = alloc_slot()
var item = alloc_slot()
var null_s = alloc_slot()
var one = alloc_slot()
var f = alloc_slot()
var discard = alloc_slot()
var loop_label = gen_label("arrfor_loop")
var done_label = gen_label("arrfor_done")
emit_2("length", len, arr_slot)
emit_2("int", i, 0)
emit_2("int", one, 1)
emit_1("null", null_s)
emit_label(loop_label)
emit_3("lt_int", check, i, len)
emit_jump_cond("jump_false", check, done_label)
emit_3("load_index", item, arr_slot, i)
emit_3("frame", f, fn_slot, 2)
emit_3("setarg", f, 0, null_s)
emit_3("setarg", f, 1, item)
emit_3("setarg", f, 2, i)
emit_2("invoke", f, discard)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(done_label)
emit_1("null", dest)
return dest
}
// --- Inline expansion: every(arr, fn) ---
var expand_inline_every = function(dest, arr_slot, fn_slot) {
var len = alloc_slot()
var i = alloc_slot()
var check = alloc_slot()
var item = alloc_slot()
var null_s = alloc_slot()
var one = alloc_slot()
var f = alloc_slot()
var val = alloc_slot()
var loop_label = gen_label("every_loop")
var ret_true = gen_label("every_true")
var ret_false = gen_label("every_false")
var done_label = gen_label("every_done")
emit_2("length", len, arr_slot)
emit_2("int", i, 0)
emit_2("int", one, 1)
emit_1("null", null_s)
emit_label(loop_label)
emit_3("lt_int", check, i, len)
emit_jump_cond("jump_false", check, ret_true)
emit_3("load_index", item, arr_slot, i)
emit_3("frame", f, fn_slot, 1)
emit_3("setarg", f, 0, null_s)
emit_3("setarg", f, 1, item)
emit_2("invoke", f, val)
emit_jump_cond("jump_false", val, ret_false)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(ret_true)
emit_1("true", dest)
emit_jump(done_label)
emit_label(ret_false)
emit_1("false", dest)
emit_label(done_label)
return dest
}
// --- Inline expansion: some(arr, fn) ---
var expand_inline_some = function(dest, arr_slot, fn_slot) {
var len = alloc_slot()
var i = alloc_slot()
var check = alloc_slot()
var item = alloc_slot()
var null_s = alloc_slot()
var one = alloc_slot()
var f = alloc_slot()
var val = alloc_slot()
var loop_label = gen_label("some_loop")
var ret_true = gen_label("some_true")
var ret_false = gen_label("some_false")
var done_label = gen_label("some_done")
emit_2("length", len, arr_slot)
emit_2("int", i, 0)
emit_2("int", one, 1)
emit_1("null", null_s)
emit_label(loop_label)
emit_3("lt_int", check, i, len)
emit_jump_cond("jump_false", check, ret_false)
emit_3("load_index", item, arr_slot, i)
emit_3("frame", f, fn_slot, 1)
emit_3("setarg", f, 0, null_s)
emit_3("setarg", f, 1, item)
emit_2("invoke", f, val)
emit_jump_cond("jump_true", val, ret_true)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(ret_true)
emit_1("true", dest)
emit_jump(done_label)
emit_label(ret_false)
emit_1("false", dest)
emit_label(done_label)
return dest
}
// --- Inline expansion: filter(arr, fn) ---
var expand_inline_filter = function(dest, arr_slot, fn_slot) {
var result = alloc_slot()
var len = alloc_slot()
var i = alloc_slot()
var check = alloc_slot()
var item = alloc_slot()
var null_s = alloc_slot()
var one = alloc_slot()
var f = alloc_slot()
var val = alloc_slot()
var loop_label = gen_label("filter_loop")
var skip_label = gen_label("filter_skip")
var done_label = gen_label("filter_done")
add_instr(["array", result, 0])
emit_2("length", len, arr_slot)
emit_2("int", i, 0)
emit_2("int", one, 1)
emit_1("null", null_s)
emit_label(loop_label)
emit_3("lt_int", check, i, len)
emit_jump_cond("jump_false", check, done_label)
emit_3("load_index", item, arr_slot, i)
emit_3("frame", f, fn_slot, 2)
emit_3("setarg", f, 0, null_s)
emit_3("setarg", f, 1, item)
emit_3("setarg", f, 2, i)
emit_2("invoke", f, val)
emit_jump_cond("jump_false", val, skip_label)
emit_2("push", result, item)
emit_label(skip_label)
emit_3("add", i, i, one)
emit_jump(loop_label)
emit_label(done_label)
emit_2("move", dest, result)
return dest
}
// --- Inline expansion: reduce(arr, fn[, initial[, reverse]]) ---
var expand_inline_reduce = function(dest, args, nargs) {
var arr_slot = args.arr
var fn_slot = args.fn
var init_slot = args.init
var rev_slot = args.rev
var len = alloc_slot()
var acc = alloc_slot()
var i = alloc_slot()
var check = alloc_slot()
var zero = alloc_slot()
var one = alloc_slot()
var final_label = gen_label("reduce_final")
var has_init = null
var no_init_rev = null
var init_rev = null
var null_label = null
var d1 = null
var d2 = null
var d3 = null
var d4 = null
var r = null
emit_2("length", len, arr_slot)
emit_2("int", zero, 0)
emit_2("int", one, 1)
r = {acc: acc, i: i, arr: arr_slot, fn: fn_slot, len: len}
if (nargs == 2) {
null_label = gen_label("reduce_null")
d1 = gen_label("reduce_d1")
emit_3("lt_int", check, zero, len)
emit_jump_cond("jump_false", check, null_label)
emit_3("load_index", acc, arr_slot, zero)
emit_2("move", i, one)
emit_reduce_loop(r, true, d1)
emit_label(d1)
emit_2("move", dest, acc)
emit_jump(final_label)
emit_label(null_label)
emit_1("null", dest)
emit_label(final_label)
} else if (nargs == 3) {
has_init = gen_label("reduce_has_init")
null_label = gen_label("reduce_null")
d1 = gen_label("reduce_d1")
d2 = gen_label("reduce_d2")
emit_2("is_null", check, init_slot)
emit_jump_cond("jump_false", check, has_init)
// No initial, forward
emit_3("lt_int", check, zero, len)
emit_jump_cond("jump_false", check, null_label)
emit_3("load_index", acc, arr_slot, zero)
emit_2("move", i, one)
emit_reduce_loop(r, true, d1)
emit_label(d1)
emit_2("move", dest, acc)
emit_jump(final_label)
emit_label(null_label)
emit_1("null", dest)
emit_jump(final_label)
// Has initial, forward
emit_label(has_init)
emit_2("move", acc, init_slot)
emit_2("int", i, 0)
emit_reduce_loop(r, true, d2)
emit_label(d2)
emit_2("move", dest, acc)
emit_label(final_label)
} else {
// nargs == 4: full branching
has_init = gen_label("reduce_has_init")
no_init_rev = gen_label("reduce_no_init_rev")
init_rev = gen_label("reduce_init_rev")
null_label = gen_label("reduce_null")
d1 = gen_label("reduce_d1")
d2 = gen_label("reduce_d2")
d3 = gen_label("reduce_d3")
d4 = gen_label("reduce_d4")
emit_2("is_null", check, init_slot)
emit_jump_cond("jump_false", check, has_init)
// No initial
emit_3("lt_int", check, zero, len)
emit_jump_cond("jump_false", check, null_label)
emit_jump_cond("jump_true", rev_slot, no_init_rev)
// No initial, forward
emit_3("load_index", acc, arr_slot, zero)
emit_2("move", i, one)
emit_reduce_loop(r, true, d1)
emit_label(d1)
emit_2("move", dest, acc)
emit_jump(final_label)
// No initial, reverse
emit_label(no_init_rev)
emit_3("subtract", i, len, one)
emit_3("load_index", acc, arr_slot, i)
emit_3("subtract", i, i, one)
emit_reduce_loop(r, false, d2)
emit_label(d2)
emit_2("move", dest, acc)
emit_jump(final_label)
emit_label(null_label)
emit_1("null", dest)
emit_jump(final_label)
// Has initial
emit_label(has_init)
emit_jump_cond("jump_true", rev_slot, init_rev)
// Has initial, forward
emit_2("move", acc, init_slot)
emit_2("int", i, 0)
emit_reduce_loop(r, true, d3)
emit_label(d3)
emit_2("move", dest, acc)
emit_jump(final_label)
// Has initial, reverse
emit_label(init_rev)
emit_2("move", acc, init_slot)
emit_3("subtract", i, len, one)
emit_reduce_loop(r, false, d4)
emit_label(d4)
emit_2("move", dest, acc)
emit_label(final_label)
}
return dest
}
// Forward declarations via var
var gen_expr = null
var gen_statement = null
@@ -1181,17 +1400,29 @@ var mcode = function(ast) {
var obj_slot = 0
var idx_expr = null
var idx_slot = 0
var guard_t = 0
var guard_err = null
var guard_done = null
if (cop != null) {
return gen_compound_assign(node, cop)
}
// Push syntax: arr[] = val
// Push syntax: arr[] = val (guarded)
if (node.push == true) {
arr_expr = left.left
arr_slot = gen_expr(arr_expr, -1)
val_slot = gen_expr(right, -1)
guard_t = alloc_slot()
guard_err = gen_label("push_err")
guard_done = gen_label("push_done")
emit_2("is_array", guard_t, arr_slot)
emit_jump_cond("jump_false", guard_t, guard_err)
emit_2("push", arr_slot, val_slot)
emit_jump(guard_done)
emit_label(guard_err)
emit_0("disrupt")
emit_label(guard_done)
return val_slot
}
@@ -1270,6 +1501,7 @@ var mcode = function(ast) {
var a0 = 0
var a1 = 0
var a2 = 0
var a3 = 0
var d = 0
var top = null
var arg_slots = null
@@ -1306,6 +1538,9 @@ var mcode = function(ast) {
var kname = null
var func = null
var func_id = 0
var guard_t = 0
var guard_err = null
var guard_done = null
if (expr == null) {
return -1
@@ -1343,13 +1578,12 @@ var mcode = function(ast) {
}
// Create array from expression results
arr_slot = alloc_slot()
arr_instr = ["array", arr_slot, nexpr]
add_instr(["array", arr_slot, 0])
_i = 0
while (_i < nexpr) {
push(arr_instr, expr_slots[_i])
emit_2("push", arr_slot, expr_slots[_i])
_i = _i + 1
}
add_instr(arr_instr)
// Load format intrinsic
fmt_func_slot = find_intrinsic("format")
if (fmt_func_slot < 0) {
@@ -1509,6 +1743,68 @@ var mcode = function(ast) {
return d
}
// Tier 1 intrinsic inlining: emit direct opcodes instead of frame/invoke
if (callee_kind == "name" && callee.intrinsic == true) {
fname = callee.name
nargs = args_list != null ? length(args_list) : 0
// 1-arg type check intrinsics → direct opcode
if (nargs == 1 && sensory_ops[fname] != null) {
a0 = gen_expr(args_list[0], -1)
d = alloc_slot()
emit_2(sensory_ops[fname], d, a0)
return d
}
// 2-arg push: push(arr, val) → guarded direct opcode
if (nargs == 2 && fname == "push") {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
guard_t = alloc_slot()
guard_err = gen_label("push_err")
guard_done = gen_label("push_done")
emit_2("is_array", guard_t, a0)
emit_jump_cond("jump_false", guard_t, guard_err)
emit_2("push", a0, a1)
emit_jump(guard_done)
emit_label(guard_err)
emit_0("disrupt")
emit_label(guard_done)
return a1
}
// Callback intrinsics → inline mcode loops
if (nargs == 2 && fname == "arrfor" && inline_arrfor) {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
d = alloc_slot()
return expand_inline_arrfor(d, a0, a1)
}
if (nargs == 2 && fname == "every" && inline_every) {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
d = alloc_slot()
return expand_inline_every(d, a0, a1)
}
if (nargs == 2 && fname == "some" && inline_some) {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
d = alloc_slot()
return expand_inline_some(d, a0, a1)
}
if (nargs == 2 && fname == "filter" && inline_filter) {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
d = alloc_slot()
return expand_inline_filter(d, a0, a1)
}
if (fname == "reduce" && nargs >= 2 && nargs <= 4 && inline_reduce) {
a0 = gen_expr(args_list[0], -1)
a1 = gen_expr(args_list[1], -1)
a2 = nargs >= 3 ? gen_expr(args_list[2], -1) : -1
a3 = nargs >= 4 ? gen_expr(args_list[3], -1) : -1
d = alloc_slot()
return expand_inline_reduce(d, {arr: a0, fn: a1, init: a2, rev: a3}, nargs)
}
}
// Collect arg slots
arg_slots = []
_i = 0
@@ -1686,13 +1982,12 @@ var mcode = function(ast) {
_i = _i + 1
}
dest = alloc_slot()
instr = ["array", dest, count]
add_instr(["array", dest, count])
_i = 0
while (_i < count) {
push(instr, elem_slots[_i])
emit_2("push", dest, elem_slots[_i])
_i = _i + 1
}
push(s_instructions, instr)
return dest
}
@@ -1700,7 +1995,7 @@ var mcode = function(ast) {
if (kind == "record") {
list = expr.list
dest = alloc_slot()
push(s_instructions, ["record", dest, 0])
push(s_instructions, ["record", dest, length(list)])
_i = 0
while (_i < length(list)) {
pair = list[_i]
@@ -1810,6 +2105,10 @@ var mcode = function(ast) {
var func = null
var func_id = 0
var dest = 0
var guard_t = 0
var guard_err = null
var guard_done = null
var last_instr = null
if (stmt == null) {
return null
@@ -1825,12 +2124,21 @@ var mcode = function(ast) {
right = stmt.right
name = left.name
local_slot = find_var(name)
// Pop: var val = arr[]
// Pop: var val = arr[] (guarded)
if (stmt.pop == true && right != null) {
arr_expr = right.left
arr_slot = gen_expr(arr_expr, -1)
if (local_slot >= 0) {
guard_t = alloc_slot()
guard_err = gen_label("pop_err")
guard_done = gen_label("pop_done")
emit_2("is_array", guard_t, arr_slot)
emit_jump_cond("jump_false", guard_t, guard_err)
emit_2("pop", local_slot, arr_slot)
emit_jump(guard_done)
emit_label(guard_err)
emit_0("disrupt")
emit_label(guard_done)
}
return null
}
@@ -2007,6 +2315,13 @@ var mcode = function(ast) {
expr = stmt.expression
if (expr != null) {
slot = gen_expr(expr, -1)
// Mark tail calls: rename last invoke to tail_invoke
if (stmt.tail == true && !s_has_disruption) {
last_instr = s_instructions[length(s_instructions) - 1]
if (is_array(last_instr) && last_instr[0] == "invoke") {
last_instr[0] = "tail_invoke"
}
}
emit_1("return", slot)
} else {
null_slot = alloc_slot()
@@ -2189,6 +2504,7 @@ var mcode = function(ast) {
s_label_map = {}
s_is_arrow = is_arrow
s_has_disruption = disrupt_clause != null && is_array(disrupt_clause)
s_function_nr = fn_nr_node != null ? fn_nr_node : 0
@@ -2297,6 +2613,7 @@ var mcode = function(ast) {
// Compile disruption clause
if (disrupt_clause != null && is_array(disrupt_clause)) {
emit_label(gen_label("disruption"))
disruption_start = length(s_instructions)
_i = 0
while (_i < length(disrupt_clause)) {

Binary file not shown.

View File

@@ -18,6 +18,14 @@ add_project_arguments(
)
add_project_arguments('-Wno-narrowing', language: 'cpp')
if get_option('validate_gc')
add_project_arguments('-DVALIDATE_GC', language: 'c')
endif
if get_option('force_gc')
add_project_arguments('-DFORCE_GC_AT_MALLOC', language: 'c')
endif
deps = []
if host_machine.system() == 'darwin'

4
meson.options Normal file
View File

@@ -0,0 +1,4 @@
option('validate_gc', type: 'boolean', value: false,
description: 'Enable GC validation checks (stale pointer detection, pre-GC frame validation)')
option('force_gc', type: 'boolean', value: false,
description: 'Force GC on every allocation (makes stale pointer bugs deterministic)')

View File

@@ -570,19 +570,21 @@ static const JSCFunctionListEntry js_enet_peer_funcs[] = {
JSValue js_enet_use(JSContext *ctx)
{
JS_FRAME(ctx);
JS_NewClassID(&enet_host_id);
JS_NewClass(ctx, enet_host_id, &enet_host);
JSValue host_proto = JS_NewObject(ctx);
JS_SetPropertyFunctionList(ctx, host_proto, js_enet_host_funcs, countof(js_enet_host_funcs));
JS_SetClassProto(ctx, enet_host_id, host_proto);
JS_ROOT(host_proto, JS_NewObject(ctx));
JS_SetPropertyFunctionList(ctx, host_proto.val, js_enet_host_funcs, countof(js_enet_host_funcs));
JS_SetClassProto(ctx, enet_host_id, host_proto.val);
JS_NewClassID(&enet_peer_class_id);
JS_NewClass(ctx, enet_peer_class_id, &enet_peer_class);
JSValue peer_proto = JS_NewObject(ctx);
JS_SetPropertyFunctionList(ctx, peer_proto, js_enet_peer_funcs, countof(js_enet_peer_funcs));
JS_SetClassProto(ctx, enet_peer_class_id, peer_proto);
JS_ROOT(peer_proto, JS_NewObject(ctx));
JS_SetPropertyFunctionList(ctx, peer_proto.val, js_enet_peer_funcs, countof(js_enet_peer_funcs));
JS_SetClassProto(ctx, enet_peer_class_id, peer_proto.val);
JSValue export_obj = JS_NewObject(ctx);
JS_SetPropertyFunctionList(ctx, export_obj, js_enet_funcs, countof(js_enet_funcs));
return export_obj;
JS_ROOT(export_obj, JS_NewObject(ctx));
JS_SetPropertyFunctionList(ctx, export_obj.val, js_enet_funcs, countof(js_enet_funcs));
JS_RETURN(export_obj.val);
}

View File

@@ -319,9 +319,10 @@ static const JSCFunctionListEntry js_http_funcs[] = {
};
JSValue js_http_use(JSContext *js) {
JS_FRAME(js);
par_easycurl_init(0); // Initialize platform HTTP backend
JSValue obj = JS_NewObject(js);
JS_SetPropertyFunctionList(js, obj, js_http_funcs,
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_http_funcs,
sizeof(js_http_funcs)/sizeof(js_http_funcs[0]));
return obj;
JS_RETURN(mod.val);
}

View File

@@ -595,26 +595,27 @@ static const JSCFunctionListEntry js_socket_funcs[] = {
};
JSValue js_socket_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_socket_funcs, countof(js_socket_funcs));
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_socket_funcs, countof(js_socket_funcs));
// Add constants
JS_SetPropertyStr(js, mod, "AF_UNSPEC", JS_NewInt32(js, AF_UNSPEC));
JS_SetPropertyStr(js, mod, "AF_INET", JS_NewInt32(js, AF_INET));
JS_SetPropertyStr(js, mod, "AF_INET6", JS_NewInt32(js, AF_INET6));
JS_SetPropertyStr(js, mod, "AF_UNIX", JS_NewInt32(js, AF_UNIX));
JS_SetPropertyStr(js, mod, "SOCK_STREAM", JS_NewInt32(js, SOCK_STREAM));
JS_SetPropertyStr(js, mod, "SOCK_DGRAM", JS_NewInt32(js, SOCK_DGRAM));
JS_SetPropertyStr(js, mod, "AI_PASSIVE", JS_NewInt32(js, AI_PASSIVE));
JS_SetPropertyStr(js, mod, "SHUT_RD", JS_NewInt32(js, SHUT_RD));
JS_SetPropertyStr(js, mod, "SHUT_WR", JS_NewInt32(js, SHUT_WR));
JS_SetPropertyStr(js, mod, "SHUT_RDWR", JS_NewInt32(js, SHUT_RDWR));
JS_SetPropertyStr(js, mod, "SOL_SOCKET", JS_NewInt32(js, SOL_SOCKET));
JS_SetPropertyStr(js, mod, "SO_REUSEADDR", JS_NewInt32(js, SO_REUSEADDR));
return mod;
JS_SetPropertyStr(js, mod.val, "AF_UNSPEC", JS_NewInt32(js, AF_UNSPEC));
JS_SetPropertyStr(js, mod.val, "AF_INET", JS_NewInt32(js, AF_INET));
JS_SetPropertyStr(js, mod.val, "AF_INET6", JS_NewInt32(js, AF_INET6));
JS_SetPropertyStr(js, mod.val, "AF_UNIX", JS_NewInt32(js, AF_UNIX));
JS_SetPropertyStr(js, mod.val, "SOCK_STREAM", JS_NewInt32(js, SOCK_STREAM));
JS_SetPropertyStr(js, mod.val, "SOCK_DGRAM", JS_NewInt32(js, SOCK_DGRAM));
JS_SetPropertyStr(js, mod.val, "AI_PASSIVE", JS_NewInt32(js, AI_PASSIVE));
JS_SetPropertyStr(js, mod.val, "SHUT_RD", JS_NewInt32(js, SHUT_RD));
JS_SetPropertyStr(js, mod.val, "SHUT_WR", JS_NewInt32(js, SHUT_WR));
JS_SetPropertyStr(js, mod.val, "SHUT_RDWR", JS_NewInt32(js, SHUT_RDWR));
JS_SetPropertyStr(js, mod.val, "SOL_SOCKET", JS_NewInt32(js, SOL_SOCKET));
JS_SetPropertyStr(js, mod.val, "SO_REUSEADDR", JS_NewInt32(js, SO_REUSEADDR));
JS_RETURN(mod.val);
}

104
parse.cm
View File

@@ -1,6 +1,11 @@
var parse = function(tokens, src, filename, tokenizer) {
var _src_len = length(src)
var template_escape_map = {
n: "\n", t: "\t", r: "\r", "\\": "\\",
"`": "`", "$": "$", "0": character(0)
}
// ============================================================
// Parser Cursor
// ============================================================
@@ -175,6 +180,7 @@ var parse = function(tokens, src, filename, tokenizer) {
var tc = null
var tq = null
var esc_ch = null
var esc_val = null
var expr_tokens = null
var sub_ast = null
var sub_stmt = null
@@ -223,13 +229,8 @@ var parse = function(tokens, src, filename, tokenizer) {
while (tvi < tvlen) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
esc_ch = tv[tvi + 1]
if (esc_ch == "n") { push(fmt_parts, "\n") }
else if (esc_ch == "t") { push(fmt_parts, "\t") }
else if (esc_ch == "r") { push(fmt_parts, "\r") }
else if (esc_ch == "\\") { push(fmt_parts, "\\") }
else if (esc_ch == "`") { push(fmt_parts, "`") }
else if (esc_ch == "$") { push(fmt_parts, "$") }
else if (esc_ch == "0") { push(fmt_parts, character(0)) }
esc_val = template_escape_map[esc_ch]
if (esc_val != null) { push(fmt_parts, esc_val) }
else { push(fmt_parts, esc_ch) }
tvi = tvi + 2
} else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") {
@@ -692,10 +693,10 @@ var parse = function(tokens, src, filename, tokenizer) {
if (tok.kind == "?") {
start = tok
advance()
then_expr = parse_expr()
then_expr = parse_assign_expr()
if (tok.kind == ":") advance()
else parse_error(tok, "expected ':' in ternary expression")
else_expr = parse_expr()
else_expr = parse_assign_expr()
node = ast_node("then", start)
node.expression = cond
node.then = then_expr
@@ -1425,7 +1426,9 @@ var parse = function(tokens, src, filename, tokenizer) {
vars: [],
in_loop: opts.in_loop == true,
function_nr: fn_nr,
is_function_scope: opts.is_func == true
is_function_scope: opts.is_func == true,
func_node: null,
has_inner_func: false
}
}
@@ -1478,6 +1481,15 @@ var parse = function(tokens, src, filename, tokenizer) {
return false
}
var sem_find_func_scope = function(scope) {
var s = scope
while (s != null) {
if (s.is_function_scope) return s
s = s.parent
}
return null
}
var sem_add_intrinsic = function(name) {
if (find(intrinsics, name) == null) push(intrinsics, name)
}
@@ -1616,6 +1628,46 @@ var parse = function(tokens, src, filename, tokenizer) {
if (kind == "[" && left_node.right != null) {
sem_check_expr(scope, left_node.right)
}
// Type error detection for known-type constant objects
if (obj_expr != null && obj_expr.kind == "name" && obj_expr.name != null) {
v = sem_find_var(scope, obj_expr.name)
if (v != null && v.is_const && v.type_tag != null) {
if (kind == ".") {
if (v.type_tag == "array") {
sem_error(left_node, "cannot set property on array '" + obj_expr.name + "'")
}
} else if (kind == "[") {
if (left_node.right == null) {
// Push: a[] = val
if (v.type_tag != "array") {
sem_error(left_node, "push only works on arrays, not " + v.type_tag + " '" + obj_expr.name + "'")
}
} else if (v.type_tag == "array") {
if (left_node.right.kind == "text") {
sem_error(left_node, "cannot use text key on array '" + obj_expr.name + "'")
}
} else if (v.type_tag == "record") {
if (left_node.right.kind == "number" && is_integer(left_node.right.number)) {
sem_error(left_node, "cannot use integer key on record '" + obj_expr.name + "'; use text key")
}
}
}
} else if (v != null && v.is_const && v.type_tag == null) {
// Infer type_tag from usage pattern (def only)
if (kind == ".") {
v.type_tag = "record"
} else if (kind == "[") {
if (left_node.right == null) {
// Push: a[] = val → array
v.type_tag = "array"
} else if (left_node.right.kind == "number" && is_integer(left_node.right.number)) {
v.type_tag = "array"
} else if (left_node.right.kind == "text") {
v.type_tag = "record"
}
}
}
}
}
}
@@ -1635,6 +1687,7 @@ var parse = function(tokens, src, filename, tokenizer) {
var pname = null
var def_val = null
var sr = null
var enclosing = null
if (_assign_kinds[kind] == true) {
sem_check_assign_target(scope, expr.left)
@@ -1736,9 +1789,12 @@ var parse = function(tokens, src, filename, tokenizer) {
}
if (kind == "function") {
enclosing = sem_find_func_scope(scope)
if (enclosing != null) enclosing.has_inner_func = true
fn_nr_val = expr.function_nr
if (fn_nr_val == null) fn_nr_val = scope.function_nr
fn_scope = make_scope(scope, fn_nr_val, {is_func: true})
fn_scope.func_node = expr
expr.outer = scope.function_nr
i = 0
while (i < length(expr.list)) {
@@ -1819,6 +1875,8 @@ var parse = function(tokens, src, filename, tokenizer) {
var pname = null
var def_val = null
var sr = null
var enclosing = null
var func_scope = null
var tt = null
if (kind == "var_list") {
@@ -1861,7 +1919,7 @@ var parse = function(tokens, src, filename, tokenizer) {
sem_check_expr(scope, stmt.right)
if (name != null) {
tt = derive_type_tag(stmt.right)
if (tt != null) {
if (tt != null && tt != "null") {
existing = sem_find_var(scope, name)
if (existing != null) existing.type_tag = tt
}
@@ -1941,7 +1999,26 @@ var parse = function(tokens, src, filename, tokenizer) {
return null
}
if (kind == "return" || kind == "go") {
if (kind == "go") {
sem_check_expr(scope, stmt.expression)
if (stmt.expression == null || stmt.expression.kind != "(") {
sem_error(stmt, "'go' must be followed by a function call")
} else {
func_scope = sem_find_func_scope(scope)
if (func_scope != null && func_scope.func_node != null) {
if (func_scope.func_node.disruption != null) {
sem_error(stmt, "cannot use 'go' in a function with a disruption clause")
}
if (func_scope.has_inner_func) {
sem_error(stmt, "cannot use 'go' in a function that defines inner functions")
}
}
stmt.tail = true
}
return null
}
if (kind == "return") {
sem_check_expr(scope, stmt.expression)
if (stmt.expression != null && stmt.expression.kind == "(") {
stmt.tail = true
@@ -1982,11 +2059,14 @@ var parse = function(tokens, src, filename, tokenizer) {
}
if (kind == "function") {
enclosing = sem_find_func_scope(scope)
if (enclosing != null) enclosing.has_inner_func = true
name = stmt.name
if (name != null) sem_add_var(scope, name, {make: "function", fn_nr: scope.function_nr})
fn_nr_val = stmt.function_nr
if (fn_nr_val == null) fn_nr_val = scope.function_nr
fn_scope = make_scope(scope, fn_nr_val, {is_func: true})
fn_scope.func_node = stmt
stmt.outer = scope.function_nr
i = 0
while (i < length(stmt.list)) {

Binary file not shown.

116
prettify_mcode.ce Normal file
View File

@@ -0,0 +1,116 @@
// prettify_mcode.ce — reformat .mcode files to be human-readable
// Usage: ./cell --dev prettify_mcode boot/tokenize.cm.mcode
// ./cell --dev prettify_mcode boot/*.mcode
var fd = use("fd")
var json = use("json")
if (length(args) == 0) {
print("usage: cell prettify_mcode <file.mcode> [...]")
disrupt
}
// Collapse leaf arrays (instruction arrays) onto single lines
var compact_arrays = function(json_text) {
var lines = array(json_text, "\n")
var result = []
var i = 0
var line = null
var trimmed = null
var collecting = false
var collected = null
var indent = null
var is_leaf = null
var j = 0
var inner = null
var parts = null
var trailing = null
var chars = null
var k = 0
while (i < length(lines)) {
line = lines[i]
trimmed = trim(line)
if (collecting == false && trimmed == "[") {
collecting = true
chars = array(line)
k = 0
while (k < length(chars) && chars[k] == " ") {
k = k + 1
}
indent = text(line, 0, k)
collected = []
i = i + 1
continue
}
if (collecting) {
if (trimmed == "]" || trimmed == "],") {
is_leaf = true
j = 0
while (j < length(collected)) {
inner = trim(collected[j])
if (starts_with(inner, "[") || starts_with(inner, "{")) {
is_leaf = false
}
j = j + 1
}
if (is_leaf && length(collected) > 0) {
parts = []
j = 0
while (j < length(collected)) {
inner = trim(collected[j])
if (ends_with(inner, ",")) {
inner = text(inner, 0, length(inner) - 1)
}
parts[] = inner
j = j + 1
}
trailing = ""
if (ends_with(trimmed, ",")) {
trailing = ","
}
result[] = `${indent}[${text(parts, ", ")}]${trailing}`
} else {
result[] = `${indent}[`
j = 0
while (j < length(collected)) {
result[] = collected[j]
j = j + 1
}
result[] = line
}
collecting = false
} else {
collected[] = line
}
i = i + 1
continue
}
result[] = line
i = i + 1
}
return text(result, "\n")
}
var i = 0
var path = null
var raw = null
var obj = null
var pretty = null
var f = null
while (i < length(args)) {
path = args[i]
if (!fd.is_file(path)) {
print(`skip ${path} (not found)`)
i = i + 1
continue
}
raw = text(fd.slurp(path))
obj = json.decode(raw)
pretty = compact_arrays(json.encode(obj, null, 2))
f = fd.open(path, "w")
fd.write(f, pretty)
fd.close(f)
print(`prettified ${path}`)
i = i + 1
}

531
qbe.cm
View File

@@ -98,6 +98,7 @@ var is_text = function(p, v) {
jmp @${p}.done
@${p}.no
%${p} =w copy 0
jmp @${p}.done
@${p}.done
`
}
@@ -174,6 +175,7 @@ var to_float64 = function(p, v) {
%${p}.fbits =l or %${p}.fs63, %${p}.fe52
%${p}.fbits =l or %${p}.fbits, %${p}.fmant
%${p} =d cast %${p}.fbits
jmp @${p}.done
@${p}.done
`
}
@@ -199,201 +201,37 @@ var new_bool = function(p, b) {
// new_float64 — C call to __JS_NewFloat64(ctx, val). Result: %{p}
var new_float64 = function(p, ctx, d) {
return ` %${p} =l call $__JS_NewFloat64(l ${ctx}, d ${d})
return ` %${p} =l call $qbe_new_float64(l ${ctx}, d ${d})
`
}
// ============================================================
// Arithmetic — add(p, ctx, a, b)
// Int fast path inline, text concat and float as C calls.
// Jumps to @disrupt on type mismatch.
// Arithmetic — add/sub/mul/div/mod(p, ctx, a, b)
// Simple C call wrappers. Type dispatch is handled in mcode.cm.
// ============================================================
var add = function(p, ctx, a, b) {
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.not_both_int, @${p}.int_path
@${p}.int_path
%${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.sum =l add %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.sum, ${int32_min}
%${p}.hi =w csgtl %${p}.sum, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.int_overflow, @${p}.int_ok
@${p}.int_ok
%${p}.rw =w copy %${p}.sum
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.int_overflow
%${p}.fd =d sltof %${p}.sum
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
jmp @${p}.done
@${p}.not_both_int
%${p}.a_is_text =w call $JS_IsText(l ${a})
%${p}.b_is_text =w call $JS_IsText(l ${b})
%${p}.both_text =w and %${p}.a_is_text, %${p}.b_is_text
jnz %${p}.both_text, @${p}.text_path, @${p}.chk_num
@${p}.text_path
%${p} =l call $JS_ConcatString(l ${ctx}, l ${a}, l ${b})
jmp @${p}.done
@${p}.chk_num
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_path, @disrupt
@${p}.float_path
%${p} =l call $qbe_float_add(l ${ctx}, l ${a}, l ${b})
@${p}.done
return ` %${p} =l call $qbe_float_add(l ${ctx}, l ${a}, l ${b})
`
}
var sub = function(p, ctx, a, b) {
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.not_both_int, @${p}.int_path
@${p}.int_path
%${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.diff =l sub %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.diff, ${int32_min}
%${p}.hi =w csgtl %${p}.diff, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.int_overflow, @${p}.int_ok
@${p}.int_ok
%${p}.rw =w copy %${p}.diff
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.int_overflow
%${p}.fd =d sltof %${p}.diff
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
jmp @${p}.done
@${p}.not_both_int
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_path, @disrupt
@${p}.float_path
%${p} =l call $qbe_float_sub(l ${ctx}, l ${a}, l ${b})
@${p}.done
return ` %${p} =l call $qbe_float_sub(l ${ctx}, l ${a}, l ${b})
`
}
var mul = function(p, ctx, a, b) {
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.not_both_int, @${p}.int_path
@${p}.int_path
%${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.prod =l mul %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.prod, ${int32_min}
%${p}.hi =w csgtl %${p}.prod, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.int_overflow, @${p}.int_ok
@${p}.int_ok
%${p}.rw =w copy %${p}.prod
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.int_overflow
%${p}.fd =d sltof %${p}.prod
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
jmp @${p}.done
@${p}.not_both_int
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_path, @disrupt
@${p}.float_path
%${p} =l call $qbe_float_mul(l ${ctx}, l ${a}, l ${b})
@${p}.done
return ` %${p} =l call $qbe_float_mul(l ${ctx}, l ${a}, l ${b})
`
}
var div = function(p, ctx, a, b) {
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.not_both_int, @${p}.int_path
@${p}.int_path
%${p}.ia =w copy 0
%${p}.tmp =l sar ${a}, 1
%${p}.ia =w copy %${p}.tmp
%${p}.ib =w copy 0
%${p}.tmp2 =l sar ${b}, 1
%${p}.ib =w copy %${p}.tmp2
%${p}.div0 =w ceqw %${p}.ib, 0
jnz %${p}.div0, @${p}.ret_null, @${p}.chk_exact
@${p}.ret_null
%${p} =l copy ${js_null}
jmp @${p}.done
@${p}.chk_exact
%${p}.rem =w rem %${p}.ia, %${p}.ib
%${p}.exact =w ceqw %${p}.rem, 0
jnz %${p}.exact, @${p}.int_div, @${p}.int_to_float
@${p}.int_div
%${p}.q =w div %${p}.ia, %${p}.ib
%${p}.qext =l extuw %${p}.q
%${p} =l shl %${p}.qext, 1
jmp @${p}.done
@${p}.int_to_float
%${p}.da =d swtof %${p}.ia
%${p}.db =d swtof %${p}.ib
%${p}.dr =d div %${p}.da, %${p}.db
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.dr)
jmp @${p}.done
@${p}.not_both_int
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_path, @disrupt
@${p}.float_path
%${p} =l call $qbe_float_div(l ${ctx}, l ${a}, l ${b})
@${p}.done
return ` %${p} =l call $qbe_float_div(l ${ctx}, l ${a}, l ${b})
`
}
var mod = function(p, ctx, a, b) {
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.not_both_int, @${p}.int_path
@${p}.int_path
%${p}.ia =w copy 0
%${p}.tmp =l sar ${a}, 1
%${p}.ia =w copy %${p}.tmp
%${p}.ib =w copy 0
%${p}.tmp2 =l sar ${b}, 1
%${p}.ib =w copy %${p}.tmp2
%${p}.div0 =w ceqw %${p}.ib, 0
jnz %${p}.div0, @${p}.ret_null, @${p}.do_mod
@${p}.ret_null
%${p} =l copy ${js_null}
jmp @${p}.done
@${p}.do_mod
%${p}.r =w rem %${p}.ia, %${p}.ib
%${p}.rext =l extuw %${p}.r
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.not_both_int
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_path, @disrupt
@${p}.float_path
%${p} =l call $qbe_float_mod(l ${ctx}, l ${a}, l ${b})
@${p}.done
return ` %${p} =l call $qbe_float_mod(l ${ctx}, l ${a}, l ${b})
`
}
@@ -484,6 +322,7 @@ var cmp = function(p, ctx, a, b) {
jmp @${p}.done
@${p}.mismatch
%${p} =l copy ${mismatch_val}
jmp @${p}.done
@${p}.done
`
}
@@ -518,90 +357,28 @@ var gt = function(p, ctx, a, b) {
var ge = function(p, ctx, a, b) {
_qflags = {int_cmp_op: "csgew", float_id: 5, is_eq: false, is_ne: false, null_true: true}
return cmp(p, ctx, a, b)
}
// ============================================================
// Unary Ops
// ============================================================
// neg(p, ctx, v) — negate. Int fast path (INT32_MIN edge case), else C call.
// neg(p, ctx, v) — negate via C call (type guards in mcode)
var neg = function(p, ctx, v) {
return `@${p}.start
%${p}.tag =l and ${v}, 1
%${p}.is_int =w ceql %${p}.tag, 0
jnz %${p}.is_int, @${p}.int_path, @${p}.float_path
@${p}.int_path
%${p}.sl =l sar ${v}, 1
%${p}.iw =w copy %${p}.sl
%${p}.is_min =w ceqw %${p}.iw, ${int32_min}
jnz %${p}.is_min, @${p}.min_overflow, @${p}.int_ok
@${p}.min_overflow
%${p}.fd =d swtof %${p}.iw
%${p}.fdn =d neg %${p}.fd
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fdn)
jmp @${p}.done
@${p}.int_ok
%${p}.ni =w sub 0, %${p}.iw
%${p}.niext =l extuw %${p}.ni
%${p} =l shl %${p}.niext, 1
jmp @${p}.done
@${p}.float_path
%${p} =l call $qbe_float_neg(l ${ctx}, l ${v})
@${p}.done
return ` %${p} =l call $qbe_float_neg(l ${ctx}, l ${v})
`
}
// inc(p, ctx, v) — increment. Int fast path (INT32_MAX edge case), else C call.
// inc(p, ctx, v) — increment via C call (type guards in mcode)
var inc = function(p, ctx, v) {
return `@${p}.start
%${p}.tag =l and ${v}, 1
%${p}.is_int =w ceql %${p}.tag, 0
jnz %${p}.is_int, @${p}.int_path, @${p}.float_path
@${p}.int_path
%${p}.sl =l sar ${v}, 1
%${p}.iw =w copy %${p}.sl
%${p}.is_max =w ceqw %${p}.iw, ${int32_max}
jnz %${p}.is_max, @${p}.max_overflow, @${p}.int_ok
@${p}.max_overflow
%${p}.fd =d swtof %${p}.iw
%${p}.fd1 =d add %${p}.fd, d_1.0
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd1)
jmp @${p}.done
@${p}.int_ok
%${p}.ni =w add %${p}.iw, 1
%${p}.niext =l extuw %${p}.ni
%${p} =l shl %${p}.niext, 1
jmp @${p}.done
@${p}.float_path
%${p} =l call $qbe_float_inc(l ${ctx}, l ${v})
@${p}.done
return ` %${p} =l call $qbe_float_inc(l ${ctx}, l ${v})
`
}
// dec(p, ctx, v) — decrement. Int fast path (INT32_MIN edge case), else C call.
// dec(p, ctx, v) — decrement via C call (type guards in mcode)
var dec = function(p, ctx, v) {
return `@${p}.start
%${p}.tag =l and ${v}, 1
%${p}.is_int =w ceql %${p}.tag, 0
jnz %${p}.is_int, @${p}.int_path, @${p}.float_path
@${p}.int_path
%${p}.sl =l sar ${v}, 1
%${p}.iw =w copy %${p}.sl
%${p}.is_min =w ceqw %${p}.iw, ${int32_min}
jnz %${p}.is_min, @${p}.min_overflow, @${p}.int_ok
@${p}.min_overflow
%${p}.fd =d swtof %${p}.iw
%${p}.fd1 =d sub %${p}.fd, d_1.0
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd1)
jmp @${p}.done
@${p}.int_ok
%${p}.ni =w sub %${p}.iw, 1
%${p}.niext =l extuw %${p}.ni
%${p} =l shl %${p}.niext, 1
jmp @${p}.done
@${p}.float_path
%${p} =l call $qbe_float_dec(l ${ctx}, l ${v})
@${p}.done
return ` %${p} =l call $qbe_float_dec(l ${ctx}, l ${v})
`
}
@@ -615,22 +392,9 @@ var lnot = function(p, ctx, v) {
`
}
// bnot(p, ctx, v) — bitwise not. Convert to int32, ~, re-tag.
// bnot(p, ctx, v) — bitwise not via C call
var bnot = function(p, ctx, v) {
return `@${p}.start
%${p}.tag =l and ${v}, 1
%${p}.is_int =w ceql %${p}.tag, 0
jnz %${p}.is_int, @${p}.int_path, @${p}.slow_path
@${p}.int_path
%${p}.sl =l sar ${v}, 1
%${p}.iw =w copy %${p}.sl
%${p}.nw =w xor %${p}.iw, -1
%${p}.nex =l extuw %${p}.nw
%${p} =l shl %${p}.nex, 1
jmp @${p}.done
@${p}.slow_path
%${p} =l call $qbe_bnot(l ${ctx}, l ${v})
@${p}.done
return ` %${p} =l call $qbe_bnot(l ${ctx}, l ${v})
`
}
@@ -639,92 +403,34 @@ var bnot = function(p, ctx, v) {
// Both operands must be numeric. Int fast path, float -> convert to int32.
// ============================================================
// reads _qop from closure
var bitwise_op = function(p, ctx, a, b) {
var qbe_op = _qop
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.slow_path, @${p}.int_path
@${p}.int_path
%${p}.ia =l sar ${a}, 1
%${p}.iaw =w copy %${p}.ia
%${p}.ib =l sar ${b}, 1
%${p}.ibw =w copy %${p}.ib
%${p}.rw =w ${qbe_op} %${p}.iaw, %${p}.ibw
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.slow_path
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_to_int, @disrupt
@${p}.float_to_int
%${p} =l call $qbe_bitwise_${qbe_op}(l ${ctx}, l ${a}, l ${b})
@${p}.done
var band = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_bitwise_and(l ${ctx}, l ${a}, l ${b})
`
}
var band = function(p, ctx, a, b) {
_qop = "and"
return bitwise_op(p, ctx, a, b)
}
var bor = function(p, ctx, a, b) {
_qop = "or"
return bitwise_op(p, ctx, a, b)
return ` %${p} =l call $qbe_bitwise_or(l ${ctx}, l ${a}, l ${b})
`
}
var bxor = function(p, ctx, a, b) {
_qop = "xor"
return bitwise_op(p, ctx, a, b)
}
// Shift ops: mask shift amount to 5 bits (& 31)
// reads _qop from closure
var shift_op = function(p, ctx, a, b) {
var qbe_op = _qop
return `@${p}.start
%${p}.at =l and ${a}, 1
%${p}.bt =l and ${b}, 1
%${p}.not_int =l or %${p}.at, %${p}.bt
jnz %${p}.not_int, @${p}.slow_path, @${p}.int_path
@${p}.int_path
%${p}.ia =l sar ${a}, 1
%${p}.iaw =w copy %${p}.ia
%${p}.ib =l sar ${b}, 1
%${p}.ibw =w copy %${p}.ib
%${p}.sh =w and %${p}.ibw, 31
%${p}.rw =w ${qbe_op} %${p}.iaw, %${p}.sh
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.slow_path
%${p}.a_is_num =w call $JS_IsNumber(l ${a})
%${p}.b_is_num =w call $JS_IsNumber(l ${b})
%${p}.both_num =w and %${p}.a_is_num, %${p}.b_is_num
jnz %${p}.both_num, @${p}.float_to_int, @disrupt
@${p}.float_to_int
%${p} =l call $qbe_shift_${qbe_op}(l ${ctx}, l ${a}, l ${b})
@${p}.done
return ` %${p} =l call $qbe_bitwise_xor(l ${ctx}, l ${a}, l ${b})
`
}
var shl = function(p, ctx, a, b) {
_qop = "shl"
return shift_op(p, ctx, a, b)
return ` %${p} =l call $qbe_shift_shl(l ${ctx}, l ${a}, l ${b})
`
}
var shr = function(p, ctx, a, b) {
_qop = "sar"
return shift_op(p, ctx, a, b)
return ` %${p} =l call $qbe_shift_sar(l ${ctx}, l ${a}, l ${b})
`
}
var ushr = function(p, ctx, a, b) {
_qop = "shr"
return shift_op(p, ctx, a, b)
return ` %${p} =l call $qbe_shift_shr(l ${ctx}, l ${a}, l ${b})
`
}
// ============================================================
@@ -732,167 +438,6 @@ var ushr = function(p, ctx, a, b) {
// These map directly to the new IR ops emitted by mcode.cm.
// ============================================================
// --- Arithmetic (int path) ---
// add_int: assume both operands are tagged ints. Overflow -> float.
var add_int = function(p, ctx, a, b) {
return ` %${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.sum =l add %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.sum, ${int32_min}
%${p}.hi =w csgtl %${p}.sum, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.ov, @${p}.ok
@${p}.ok
%${p}.rw =w copy %${p}.sum
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.ov
%${p}.fd =d sltof %${p}.sum
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
@${p}.done
`
}
var sub_int = function(p, ctx, a, b) {
return ` %${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.diff =l sub %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.diff, ${int32_min}
%${p}.hi =w csgtl %${p}.diff, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.ov, @${p}.ok
@${p}.ok
%${p}.rw =w copy %${p}.diff
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.ov
%${p}.fd =d sltof %${p}.diff
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
@${p}.done
`
}
var mul_int = function(p, ctx, a, b) {
return ` %${p}.ia =l sar ${a}, 1
%${p}.ib =l sar ${b}, 1
%${p}.prod =l mul %${p}.ia, %${p}.ib
%${p}.lo =w csltl %${p}.prod, ${int32_min}
%${p}.hi =w csgtl %${p}.prod, ${int32_max}
%${p}.ov =w or %${p}.lo, %${p}.hi
jnz %${p}.ov, @${p}.ov, @${p}.ok
@${p}.ok
%${p}.rw =w copy %${p}.prod
%${p}.rext =l extuw %${p}.rw
%${p} =l shl %${p}.rext, 1
jmp @${p}.done
@${p}.ov
%${p}.fd =d sltof %${p}.prod
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fd)
@${p}.done
`
}
var div_int = function(p, ctx, a, b) {
return ` %${p}.ia =w copy 0
%${p}.tmp =l sar ${a}, 1
%${p}.ia =w copy %${p}.tmp
%${p}.ib =w copy 0
%${p}.tmp2 =l sar ${b}, 1
%${p}.ib =w copy %${p}.tmp2
%${p}.div0 =w ceqw %${p}.ib, 0
jnz %${p}.div0, @${p}.null, @${p}.chk
@${p}.null
%${p} =l copy ${js_null}
jmp @${p}.done
@${p}.chk
%${p}.rem =w rem %${p}.ia, %${p}.ib
%${p}.exact =w ceqw %${p}.rem, 0
jnz %${p}.exact, @${p}.idiv, @${p}.fdiv
@${p}.idiv
%${p}.q =w div %${p}.ia, %${p}.ib
%${p}.qext =l extuw %${p}.q
%${p} =l shl %${p}.qext, 1
jmp @${p}.done
@${p}.fdiv
%${p}.da =d swtof %${p}.ia
%${p}.db =d swtof %${p}.ib
%${p}.dr =d div %${p}.da, %${p}.db
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.dr)
@${p}.done
`
}
var mod_int = function(p, ctx, a, b) {
return ` %${p}.ia =w copy 0
%${p}.tmp =l sar ${a}, 1
%${p}.ia =w copy %${p}.tmp
%${p}.ib =w copy 0
%${p}.tmp2 =l sar ${b}, 1
%${p}.ib =w copy %${p}.tmp2
%${p}.div0 =w ceqw %${p}.ib, 0
jnz %${p}.div0, @${p}.null, @${p}.do_mod
@${p}.null
%${p} =l copy ${js_null}
jmp @${p}.done
@${p}.do_mod
%${p}.r =w rem %${p}.ia, %${p}.ib
%${p}.rext =l extuw %${p}.r
%${p} =l shl %${p}.rext, 1
@${p}.done
`
}
var neg_int = function(p, ctx, v) {
return ` %${p}.sl =l sar ${v}, 1
%${p}.iw =w copy %${p}.sl
%${p}.is_min =w ceqw %${p}.iw, ${int32_min}
jnz %${p}.is_min, @${p}.ov, @${p}.ok
@${p}.ov
%${p}.fd =d swtof %${p}.iw
%${p}.fdn =d neg %${p}.fd
%${p} =l call $__JS_NewFloat64(l ${ctx}, d %${p}.fdn)
jmp @${p}.done
@${p}.ok
%${p}.ni =w sub 0, %${p}.iw
%${p}.niext =l extuw %${p}.ni
%${p} =l shl %${p}.niext, 1
@${p}.done
`
}
// --- Arithmetic (float path) ---
var add_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_add(l ${ctx}, l ${a}, l ${b})
`
}
var sub_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_sub(l ${ctx}, l ${a}, l ${b})
`
}
var mul_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_mul(l ${ctx}, l ${a}, l ${b})
`
}
var div_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_div(l ${ctx}, l ${a}, l ${b})
`
}
var mod_float = function(p, ctx, a, b) {
return ` %${p} =l call $qbe_float_mod(l ${ctx}, l ${a}, l ${b})
`
}
var neg_float = function(p, ctx, v) {
return ` %${p} =l call $qbe_float_neg(l ${ctx}, l ${v})
`
}
// --- Text concat ---
var concat = function(p, ctx, a, b) {
return ` %${p} =l call $JS_ConcatString(l ${ctx}, l ${a}, l ${b})
@@ -1039,20 +584,6 @@ return {
shl: shl,
shr: shr,
ushr: ushr,
// decomposed arithmetic (int path)
add_int: add_int,
sub_int: sub_int,
mul_int: mul_int,
div_int: div_int,
mod_int: mod_int,
neg_int: neg_int,
// decomposed arithmetic (float path)
add_float: add_float,
sub_float: sub_float,
mul_float: mul_float,
div_float: div_float,
mod_float: mod_float,
neg_float: neg_float,
// text concat
concat: concat,
// decomposed comparisons (int)

BIN
qbe.mach

Binary file not shown.

View File

@@ -76,6 +76,7 @@ var qbe_emit = function(ir, qbe) {
var instrs = fn.instructions
var nr_slots = fn.nr_slots
var nr_args = fn.nr_args
var captured = build_captured(fn)
var name = is_main ? "cell_main" : "cell_fn_" + text(fn_idx)
name = sanitize(name)
var i = 0
@@ -88,6 +89,7 @@ var qbe_emit = function(ir, qbe) {
var p = null
var pn = null
var sl = null
var lbl = null
var fop_id = 0
var nr_elems = 0
var ei = 0
@@ -113,22 +115,45 @@ var qbe_emit = function(ir, qbe) {
emit(` storel ${s(slot)}, %p${text(slot)}`)
}
// Reload captured slots from frame (after invoke, closures may have modified them)
var reload_captured = function() {
var ri = 0
while (ri < nr_slots) {
if (captured[text(ri)] == true) {
emit(` ${s(ri)} =l loadl %p${text(ri)}`)
}
ri = ri + 1
}
}
// Walk instructions
// Slot loads above are not terminators
var last_was_term = false
i = 0
while (i < length(instrs)) {
instr = instrs[i]
i = i + 1
// Labels are plain strings
// Labels are plain strings; skip _nop_ur_ pseudo-labels from streamline
if (is_text(instr)) {
emit("@" + sanitize(instr))
if (starts_with(instr, "_nop_ur_")) continue
lbl = sanitize(instr)
if (!last_was_term) {
emit(` jmp @${lbl}`)
}
emit("@" + lbl)
last_was_term = false
continue
}
// Skip dead code: non-label instructions after a terminator are unreachable
if (last_was_term) continue
op = instr[0]
a1 = instr[1]
a2 = instr[2]
a3 = instr[3]
last_was_term = false
// --- Constants ---
@@ -157,11 +182,11 @@ var qbe_emit = function(ir, qbe) {
if (is_integer(a2)) {
emit(` ${s(a1)} =l copy ${text(a2 * 2)}`)
} else {
emit(` ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2)})`)
emit(` ${s(a1)} =l call $qbe_new_float64(l %ctx, d d_${text(a2)})`)
}
} else if (is_text(a2)) {
sl = intern_str(a2)
emit(` ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`)
emit(` ${s(a1)} =l call $qbe_new_string(l %ctx, l ${sl})`)
} else if (is_object(a2)) {
if (a2.make == "intrinsic") {
sl = intern_str(a2.name)
@@ -170,13 +195,13 @@ var qbe_emit = function(ir, qbe) {
if (a2.number != null && is_integer(a2.number)) {
emit(` ${s(a1)} =l copy ${text(a2.number * 2)}`)
} else if (a2.number != null) {
emit(` ${s(a1)} =l call $__JS_NewFloat64(l %ctx, d d_${text(a2.number)})`)
emit(` ${s(a1)} =l call $qbe_new_float64(l %ctx, d d_${text(a2.number)})`)
} else {
emit(` ${s(a1)} =l copy ${text(qbe.js_null)}`)
}
} else if (a2.kind == "text") {
sl = intern_str(a2.value)
emit(` ${s(a1)} =l call $JS_NewString(l %ctx, l ${sl})`)
emit(` ${s(a1)} =l call $qbe_new_string(l %ctx, l ${sl})`)
} else if (a2.kind == "true") {
emit(` ${s(a1)} =l copy ${text(qbe.js_true)}`)
} else if (a2.kind == "false") {
@@ -201,78 +226,53 @@ var qbe_emit = function(ir, qbe) {
continue
}
// --- Arithmetic (int path) — use qbe.cm macros ---
// --- Generic arithmetic (VM dispatches int/float) ---
if (op == "add_int") {
if (op == "add") {
p = fresh()
emit(qbe.add_int(p, "%ctx", s(a2), s(a3)))
emit(` %${p} =l call $cell_rt_add(l %ctx, l ${s(a2)}, l ${s(a3)})`)
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "sub_int") {
if (op == "subtract") {
p = fresh()
emit(qbe.sub_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.sub(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mul_int") {
if (op == "multiply") {
p = fresh()
emit(qbe.mul_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.mul(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "div_int") {
if (op == "divide") {
p = fresh()
emit(qbe.div_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.div(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mod_int") {
if (op == "modulo") {
p = fresh()
emit(qbe.mod_int(p, "%ctx", s(a2), s(a3)))
emit(qbe.mod(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "negate") {
p = fresh()
emit(qbe.neg(p, "%ctx", s(a2)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
// --- Arithmetic (float path) ---
if (op == "add_float") {
p = fresh()
emit(qbe.add_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "sub_float") {
p = fresh()
emit(qbe.sub_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mul_float") {
p = fresh()
emit(qbe.mul_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "div_float") {
p = fresh()
emit(qbe.div_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "mod_float") {
p = fresh()
emit(qbe.mod_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
if (op == "pow") {
emit(` ${s(a1)} =l call $qbe_float_pow(l %ctx, l ${s(a2)}, l ${s(a3)})`)
wb(a1)
continue
}
@@ -336,6 +336,46 @@ var qbe_emit = function(ir, qbe) {
wb(a1)
continue
}
if (op == "is_array") {
p = fresh()
emit(` %${p} =w call $JS_IsArray(l ${s(a2)})`)
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
wb(a1)
continue
}
if (op == "is_func") {
p = fresh()
emit(` %${p} =w call $JS_IsFunction(l ${s(a2)})`)
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
wb(a1)
continue
}
if (op == "is_record") {
p = fresh()
emit(` %${p} =w call $JS_IsRecord(l ${s(a2)})`)
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
wb(a1)
continue
}
if (op == "is_stone") {
p = fresh()
emit(` %${p} =w call $JS_IsStone(l ${s(a2)})`)
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
wb(a1)
continue
}
if (op == "is_proxy") {
p = fresh()
emit(` %${p} =w call $cell_rt_is_proxy(l %ctx, l ${s(a2)})`)
emit(qbe.new_bool(p + ".r", "%" + p))
emit(` ${s(a1)} =l copy %${p}.r`)
wb(a1)
continue
}
// --- Comparisons (int path) ---
@@ -398,14 +438,30 @@ var qbe_emit = function(ir, qbe) {
wb(a1)
continue
}
if (op == "lt_float" || op == "gt_float" || op == "le_float" || op == "ge_float") {
if (op == "lt_float") {
p = fresh()
fop_id = 0
if (op == "lt_float") fop_id = 2
else if (op == "le_float") fop_id = 3
else if (op == "gt_float") fop_id = 4
else if (op == "ge_float") fop_id = 5
emit(qbe.cmp_float != null ? qbe.cmp_float(p, "%ctx", s(a2), s(a3), fop_id) : ` %${p} =l call $qbe_float_cmp(l %ctx, w ${text(fop_id)}, l ${s(a2)}, l ${s(a3)})`)
emit(qbe.lt_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "le_float") {
p = fresh()
emit(qbe.le_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "gt_float") {
p = fresh()
emit(qbe.gt_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
}
if (op == "ge_float") {
p = fresh()
emit(qbe.ge_float(p, "%ctx", s(a2), s(a3)))
emit(` ${s(a1)} =l copy %${p}`)
wb(a1)
continue
@@ -525,7 +581,10 @@ var qbe_emit = function(ir, qbe) {
// --- Property access — runtime calls ---
if (op == "load_field") {
pn = prop_name(a3)
pn = null
if (is_text(a3)) pn = a3
else if (is_object(a3) && a3.name != null) pn = a3.name
else if (is_object(a3) && a3.value != null) pn = a3.value
if (pn != null) {
sl = intern_str(pn)
emit(` ${s(a1)} =l call $cell_rt_load_field(l %ctx, l ${s(a2)}, l ${sl})`)
@@ -541,13 +600,28 @@ var qbe_emit = function(ir, qbe) {
continue
}
if (op == "load_dynamic") {
emit(` ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`)
pn = null
if (is_text(a3)) pn = a3
else if (is_object(a3) && a3.name != null) pn = a3.name
else if (is_object(a3) && a3.value != null) pn = a3.value
if (pn != null) {
sl = intern_str(pn)
emit(` ${s(a1)} =l call $cell_rt_load_field(l %ctx, l ${s(a2)}, l ${sl})`)
} else {
emit(` ${s(a1)} =l call $cell_rt_load_dynamic(l %ctx, l ${s(a2)}, l ${s(a3)})`)
}
wb(a1)
continue
}
if (op == "store_field") {
// IR: ["store_field", obj, val, prop] → C: (ctx, val, obj, name)
pn = prop_name(a3)
pn = null
if (is_text(a3)) {
pn = a3
} else if (is_object(a3)) {
if (a3.name != null) pn = a3.name
else if (a3.value != null) pn = a3.value
}
if (pn != null) {
sl = intern_str(pn)
emit(` call $cell_rt_store_field(l %ctx, l ${s(a2)}, l ${s(a1)}, l ${sl})`)
@@ -563,19 +637,30 @@ var qbe_emit = function(ir, qbe) {
}
if (op == "store_dynamic") {
// IR: ["store_dynamic", obj, val, key] → C: (ctx, val, obj, key)
emit(` call $cell_rt_store_dynamic(l %ctx, l ${s(a2)}, l ${s(a1)}, l ${s(a3)})`)
pn = null
if (is_text(a3)) pn = a3
else if (is_object(a3) && a3.name != null) pn = a3.name
else if (is_object(a3) && a3.value != null) pn = a3.value
if (pn != null) {
sl = intern_str(pn)
emit(` call $cell_rt_store_field(l %ctx, l ${s(a2)}, l ${s(a1)}, l ${sl})`)
} else {
emit(` call $cell_rt_store_dynamic(l %ctx, l ${s(a2)}, l ${s(a1)}, l ${s(a3)})`)
}
continue
}
// --- Closure access ---
if (op == "get") {
emit(` ${s(a1)} =l call $cell_rt_get_closure(l %ctx, l %fp, l ${text(a2)}, l ${text(a3)})`)
// mcode: get(dest, slot, depth) — a2=slot, a3=depth
emit(` ${s(a1)} =l call $cell_rt_get_closure(l %ctx, l %fp, l ${text(a3)}, l ${text(a2)})`)
wb(a1)
continue
}
if (op == "put") {
emit(` call $cell_rt_put_closure(l %ctx, l %fp, l ${s(a1)}, l ${text(a2)}, l ${text(a3)})`)
// mcode: put(val, slot, depth) — a2=slot, a3=depth
emit(` call $cell_rt_put_closure(l %ctx, l %fp, l ${s(a1)}, l ${text(a3)}, l ${text(a2)})`)
continue
}
@@ -583,6 +668,7 @@ var qbe_emit = function(ir, qbe) {
if (op == "jump") {
emit(` jmp @${sanitize(a1)}`)
last_was_term = true
continue
}
if (op == "jump_true") {
@@ -642,6 +728,13 @@ var qbe_emit = function(ir, qbe) {
if (op == "invoke") {
emit(` ${s(a2)} =l call $cell_rt_invoke(l %ctx, l ${s(a1)})`)
wb(a2)
reload_captured()
continue
}
if (op == "tail_invoke") {
emit(` ${s(a2)} =l call $cell_rt_invoke(l %ctx, l ${s(a1)})`)
wb(a2)
reload_captured()
continue
}
if (op == "goframe") {
@@ -650,7 +743,9 @@ var qbe_emit = function(ir, qbe) {
continue
}
if (op == "goinvoke") {
emit(` call $cell_rt_goinvoke(l %ctx, l ${s(a1)})`)
emit(` %_goret =l call $cell_rt_goinvoke(l %ctx, l ${s(a1)})`)
emit(` ret %_goret`)
last_was_term = true
continue
}
@@ -694,19 +789,38 @@ var qbe_emit = function(ir, qbe) {
continue
}
// --- Length ---
if (op == "length") {
emit(` ${s(a1)} =l call $JS_CellLength(l %ctx, l ${s(a2)})`)
wb(a1)
continue
}
// --- Misc ---
if (op == "return") {
emit(` ret ${s(a1)}`)
last_was_term = true
continue
}
if (op == "disrupt") {
emit(` call $cell_rt_disrupt(l %ctx)`)
emit(` ret ${text(qbe.js_null)}`)
last_was_term = true
continue
}
if (op == "delete") {
emit(` ${s(a1)} =l call $cell_rt_delete(l %ctx, l ${s(a2)}, l ${s(a3)})`)
pn = null
if (is_text(a3)) pn = a3
else if (is_object(a3) && a3.name != null) pn = a3.name
else if (is_object(a3) && a3.value != null) pn = a3.value
if (pn != null) {
sl = intern_str(pn)
emit(` ${s(a1)} =l call $cell_rt_delete(l %ctx, l ${s(a2)}, l ${sl})`)
} else {
emit(` ${s(a1)} =l call $cell_rt_delete(l %ctx, l ${s(a2)}, l ${s(a3)})`)
}
wb(a1)
continue
}
@@ -720,6 +834,14 @@ var qbe_emit = function(ir, qbe) {
emit(` # unknown: ${op}`)
}
// Emit @disrupt landing pad for arithmetic type-error branches
if (!last_was_term) {
emit(" jmp @disrupt")
}
emit("@disrupt")
emit(` call $cell_rt_disrupt(l %ctx)`)
emit(` ret ${text(qbe.js_null)}`)
emit("}")
emit("")
}
@@ -728,6 +850,70 @@ var qbe_emit = function(ir, qbe) {
// Main: compile all functions then main
// ============================================================
// ============================================================
// Pre-scan: find which slots each function has that are modified
// by child closures (via "put" instructions at depth=1).
// Build a map: fn_idx → array of captured slot numbers.
// ============================================================
// For each function, find which fn_idxes it creates via "function" op
var find_children = function(fn_instrs) {
var children = []
var ci = 0
var cinstr = null
while (ci < length(fn_instrs)) {
cinstr = fn_instrs[ci]
ci = ci + 1
if (!is_array(cinstr)) continue
if (cinstr[0] == "function") {
push(children, cinstr[2])
}
}
return children
}
// For a child function, find which parent slots it writes to via put(val, slot, depth=1)
var find_put_slots = function(fn_instrs) {
var slots = []
var pi = 0
var pinstr = null
while (pi < length(fn_instrs)) {
pinstr = fn_instrs[pi]
pi = pi + 1
if (!is_array(pinstr)) continue
// put format: ["put", val, slot, depth]
if (pinstr[0] == "put" && pinstr[3] == 1) {
push(slots, pinstr[2])
}
}
return slots
}
// Build captured_slots for each function (and main)
var build_captured = function(fn) {
var children = find_children(fn.instructions)
var captured = {}
var bi = 0
var child_idx = 0
var child_fn = null
var pslots = null
var si = 0
while (bi < length(children)) {
child_idx = children[bi]
bi = bi + 1
if (child_idx >= 0 && child_idx < length(ir.functions)) {
child_fn = ir.functions[child_idx]
pslots = find_put_slots(child_fn.instructions)
si = 0
while (si < length(pslots)) {
captured[text(pslots[si])] = true
si = si + 1
}
}
}
return captured
}
var fi = 0
while (fi < length(ir.functions)) {
compile_fn(ir.functions[fi], fi, false)

Binary file not shown.

20
qop.c
View File

@@ -457,19 +457,21 @@ static const JSCFunctionListEntry js_qop_funcs[] = {
};
JSValue js_qop_use(JSContext *js) {
JS_FRAME(js);
JS_NewClassID(&js_qop_archive_class_id);
JS_NewClass(js, js_qop_archive_class_id, &js_qop_archive_class);
JSValue archive_proto = JS_NewObject(js);
JS_SetPropertyFunctionList(js, archive_proto, js_qop_archive_funcs, countof(js_qop_archive_funcs));
JS_SetClassProto(js, js_qop_archive_class_id, archive_proto);
JS_ROOT(archive_proto, JS_NewObject(js));
JS_SetPropertyFunctionList(js, archive_proto.val, js_qop_archive_funcs, countof(js_qop_archive_funcs));
JS_SetClassProto(js, js_qop_archive_class_id, archive_proto.val);
JS_NewClassID(&js_qop_writer_class_id);
JS_NewClass(js, js_qop_writer_class_id, &js_qop_writer_class);
JSValue writer_proto = JS_NewObject(js);
JS_SetPropertyFunctionList(js, writer_proto, js_qop_writer_funcs, countof(js_qop_writer_funcs));
JS_SetClassProto(js, js_qop_writer_class_id, writer_proto);
JS_ROOT(writer_proto, JS_NewObject(js));
JS_SetPropertyFunctionList(js, writer_proto.val, js_qop_writer_funcs, countof(js_qop_writer_funcs));
JS_SetClassProto(js, js_qop_writer_class_id, writer_proto.val);
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_qop_funcs, countof(js_qop_funcs));
return mod;
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_qop_funcs, countof(js_qop_funcs));
JS_RETURN(mod.val);
}

104
regen.ce Normal file
View File

@@ -0,0 +1,104 @@
// regen.ce — regenerate .mcode bytecode files and pre-warm .mach cache
var fd = use("fd")
var json = use("json")
var crypto = use("crypto")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var files = [
{src: "tokenize.cm", name: "tokenize", out: "boot/tokenize.cm.mcode"},
{src: "parse.cm", name: "parse", out: "boot/parse.cm.mcode"},
{src: "fold.cm", name: "fold", out: "boot/fold.cm.mcode"},
{src: "mcode.cm", name: "mcode", out: "boot/mcode.cm.mcode"},
{src: "streamline.cm", name: "streamline", out: "boot/streamline.cm.mcode"},
{src: "qbe.cm", name: "qbe", out: "boot/qbe.cm.mcode"},
{src: "qbe_emit.cm", name: "qbe_emit", out: "boot/qbe_emit.cm.mcode"},
{src: "verify_ir.cm", name: "verify_ir", out: "boot/verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "boot/bootstrap.cm.mcode"},
{src: "internal/engine.cm", name: "engine", out: "boot/engine.cm.mcode"},
{src: "boot/seed_bootstrap.cm", name: "seed_bootstrap", out: "boot/seed_bootstrap.cm.mcode"}
]
// Resolve shop_path for cache writes
var os = use('os')
var shop = os.getenv('CELL_SHOP')
var home = null
var cache_dir = null
if (!shop) {
home = os.getenv('HOME')
if (home) {
shop = home + '/.cell'
}
}
if (shop) {
cache_dir = shop + '/build'
if (!fd.is_dir(cache_dir)) {
fd.mkdir(cache_dir)
}
}
var i = 0
var entry = null
var src = null
var tok_result = null
var ast = null
var folded = null
var mcode_blob = null
var hash = null
var mach_blob = null
var compiled = null
var optimized = null
var mcode_text = null
var f = null
var errs = null
var ei = 0
var e = null
var had_errors = false
while (i < length(files)) {
entry = files[i]
src = text(fd.slurp(entry.src))
tok_result = tokenize(src, entry.src)
ast = parse(tok_result.tokens, src, entry.src, tokenize)
// Check for parse/semantic errors
errs = ast.errors
if (errs != null && length(errs) > 0) {
ei = 0
while (ei < length(errs)) {
e = errs[ei]
if (e.line != null) {
print(`${entry.src}:${text(e.line)}:${text(e.column)}: error: ${e.message}`)
} else {
print(`${entry.src}: error: ${e.message}`)
}
ei = ei + 1
}
had_errors = true
i = i + 1
continue
}
folded = fold(ast)
compiled = mcode(folded)
optimized = streamline(compiled)
mcode_text = json.encode(optimized)
f = fd.open(entry.out, "w")
fd.write(f, mcode_text)
fd.close(f)
print(`wrote ${entry.out}`)
// Pre-warm .mach cache
if (cache_dir) {
mcode_blob = stone(blob(mcode_text))
hash = text(crypto.blake2(mcode_blob), 'h')
mach_blob = mach_compile_mcode_bin(entry.name, mcode_text)
fd.slurpwrite(cache_dir + '/' + hash + '.mach', mach_blob)
print(` cached ${hash}.mach`)
}
i = i + 1
}
if (had_errors) {
print("regen aborted: fix errors above")
}

View File

@@ -1,75 +0,0 @@
// regen.cm — regenerate .mach bytecode files
// Run with: ./cell --core . regen.cm
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var files = [
{src: "tokenize.cm", name: "tokenize", out: "tokenize.mach"},
{src: "parse.cm", name: "parse", out: "parse.mach"},
{src: "fold.cm", name: "fold", out: "fold.mach"},
{src: "mcode.cm", name: "mcode", out: "mcode.mach"},
{src: "streamline.cm", name: "streamline", out: "streamline.mach"},
{src: "qbe.cm", name: "qbe", out: "qbe.mach"},
{src: "qbe_emit.cm", name: "qbe_emit", out: "qbe_emit.mach"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.mach"},
{src: "internal/engine.cm", name: "engine", out: "internal/engine.mach"}
]
var i = 0
var entry = null
var src = null
var tok_result = null
var ast = null
var folded = null
var compiled = null
var optimized = null
var mcode_json = null
var bytecode = null
var f = null
var errs = null
var ei = 0
var e = null
var had_errors = false
while (i < length(files)) {
entry = files[i]
src = text(fd.slurp(entry.src))
tok_result = tokenize(src, entry.src)
ast = parse(tok_result.tokens, src, entry.src, tokenize)
// Check for parse/semantic errors
errs = ast.errors
if (errs != null && length(errs) > 0) {
ei = 0
while (ei < length(errs)) {
e = errs[ei]
if (e.line != null) {
print(`${entry.src}:${text(e.line)}:${text(e.column)}: error: ${e.message}`)
} else {
print(`${entry.src}: error: ${e.message}`)
}
ei = ei + 1
}
had_errors = true
i = i + 1
continue
}
folded = fold(ast)
compiled = mcode(folded)
optimized = streamline(compiled)
mcode_json = json.encode(optimized)
bytecode = mach_compile_mcode_bin(entry.name, mcode_json)
f = fd.open(entry.out, "w")
fd.write(f, bytecode)
fd.close(f)
print(`wrote ${entry.out}`)
i = i + 1
}
if (had_errors) {
print("regen aborted: fix errors above")
}

View File

@@ -1,16 +1,16 @@
// run_native.ce — load a module both interpreted and native, compare speed
//
// Usage:
// cell --core . run_native.ce <module>
// cell --dev run_native.ce <module>
//
// Loads <module>.cm via use() (interpreted) and <module>.dylib (native),
// Loads <module>.cm via use() (interpreted) and <module>.cm.dylib (native),
// runs both and compares results and timing.
var os = use('os')
if (length(args) < 1) {
print('usage: cell --core . run_native.ce <module>')
print(' e.g. cell --core . run_native.ce num_torture')
print('usage: cell --dev run_native.ce <module>')
print(' e.g. cell --dev run_native.ce num_torture')
return
}
@@ -21,7 +21,7 @@ if (ends_with(name, '.cm')) {
var safe = replace(replace(name, '/', '_'), '-', '_')
var symbol = 'js_' + safe + '_use'
var dylib_path = './' + name + '.dylib'
var dylib_path = './' + name + '.cm.dylib'
var fd = use('fd')
// --- Test argument for function-returning modules ---

78
run_native_seed.ce Normal file
View File

@@ -0,0 +1,78 @@
// run_native_seed.ce — load and run a native .dylib module (seed mode)
// Usage: ./cell --dev --seed run_native_seed benches/fibonacci
var fd = use("fd")
var os = use("os")
if (length(args) < 1) {
print("usage: cell --dev --seed run_native_seed <module>")
disrupt
}
var name = args[0]
if (ends_with(name, ".cm")) {
name = text(name, 0, length(name) - 3)
}
var safe = replace(replace(name, "/", "_"), "-", "_")
var symbol = "js_" + safe + "_use"
var dylib_path = "./" + name + ".cm.dylib"
var test_arg = 30
if (length(args) > 1) {
test_arg = number(args[1])
}
// --- Interpreted run ---
print("--- interpreted ---")
var t1 = os.now()
var mod_interp = use(name)
var t2 = os.now()
var result_interp = null
if (is_function(mod_interp)) {
print("module returns a function, calling with " + text(test_arg))
t1 = os.now()
result_interp = mod_interp(test_arg)
t2 = os.now()
}
result_interp = result_interp != null ? result_interp : mod_interp
var ms_interp = (t2 - t1) / 1000000
print("result: " + text(result_interp))
print("time: " + text(ms_interp) + " ms")
// --- Native run ---
if (!fd.is_file(dylib_path)) {
print("\nno " + dylib_path + " found")
disrupt
}
print("\n--- native ---")
var t3 = os.now()
var lib = os.dylib_open(dylib_path)
var t4 = os.now()
var mod_native = os.dylib_symbol(lib, symbol)
var t5 = os.now()
var result_native = null
if (is_function(mod_native)) {
print("module returns a function, calling with " + text(test_arg))
t4 = os.now()
result_native = mod_native(test_arg)
t5 = os.now()
}
result_native = result_native != null ? result_native : mod_native
var ms_native = (t5 - t3) / 1000000
var ms_exec = (t5 - t4) / 1000000
print("result: " + text(result_native))
print("load: " + text((t4 - t3) / 1000000) + " ms")
print("exec: " + text(ms_exec) + " ms")
print("total: " + text(ms_native) + " ms")
// --- Comparison ---
print("\n--- comparison ---")
print("match: " + text(result_interp == result_native))
if (ms_native > 0) {
print("speedup: " + text(ms_interp / ms_native) + "x (total)")
}
if (ms_exec > 0) {
print("speedup: " + text(ms_interp / ms_exec) + "x (exec only)")
}

66
source/buddy_debug.c Normal file
View File

@@ -0,0 +1,66 @@
/* buddy_debug.c — ASCII visualization for buddy allocator
Included from runtime.c only when DUMP_BUDDY is defined. */
static void buddy_dump(BuddyPool *pool, const char *op,
uint8_t *block, uint8_t order) {
if (!pool || !pool->base) return;
int levels = pool->max_order - BUDDY_MIN_ORDER + 1;
/* Bitmap: one byte per min-block slot */
size_t num_slots = pool->total_size >> BUDDY_MIN_ORDER;
/* Dynamic VLA — pool sizes vary now */
uint8_t *bitmap = alloca(num_slots);
memset(bitmap, 0, num_slots); /* 0 = allocated */
/* Walk all free lists and mark free slots */
for (int i = 0; i < levels; i++) {
for (BuddyBlock *p = pool->free_lists[i]; p; p = p->next) {
size_t off = (uint8_t *)p - pool->base;
size_t slot = off >> BUDDY_MIN_ORDER;
size_t count = 1ULL << i; /* number of min-block slots in this block */
for (size_t s = 0; s < count && (slot + s) < num_slots; s++)
bitmap[slot + s] = 1;
}
}
/* Render 64-char ASCII bar */
size_t slots_per_char = num_slots / 64;
if (slots_per_char == 0) slots_per_char = 1;
char bar[65];
size_t total_free_slots = 0;
for (int c = 0; c < 64; c++) {
size_t base_slot = c * slots_per_char;
size_t free_count = 0;
for (size_t s = 0; s < slots_per_char && (base_slot + s) < num_slots; s++) {
if (bitmap[base_slot + s]) free_count++;
}
total_free_slots += free_count;
/* Majority vote: if more than half are free, show free */
bar[c] = (free_count > slots_per_char / 2) ? '.' : '#';
}
bar[64] = '\0';
size_t blk_offset = block - pool->base;
size_t blk_size = 1ULL << order;
size_t total_free = total_free_slots << BUDDY_MIN_ORDER;
size_t total_alloc = pool->total_size - total_free;
fprintf(stderr, "buddy %s: pool %zuKB order %u (%zuKB) @ +%zuKB allocs=%u\n",
op, pool->total_size / 1024, order, blk_size / 1024,
blk_offset / 1024, pool->alloc_count);
fprintf(stderr, " [%s]\n", bar);
fprintf(stderr, " alloc: %zuKB free: %zuKB total: %zuKB\n",
total_alloc / 1024, total_free / 1024, pool->total_size / 1024);
/* Print free list population */
fprintf(stderr, " free lists:");
for (int i = 0; i < levels; i++) {
int count = 0;
for (BuddyBlock *p = pool->free_lists[i]; p; p = p->next)
count++;
if (count > 0)
fprintf(stderr, " o%d:%d", i + BUDDY_MIN_ORDER, count);
}
fprintf(stderr, "\n");
}

View File

@@ -11,8 +11,9 @@
#include "cell_internal.h"
#include "cJSON.h"
#define BOOTSTRAP_MACH "internal/bootstrap.mach"
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
#define BOOTSTRAP_MCODE "boot/bootstrap.cm.mcode"
#define SEED_BOOTSTRAP_MCODE "boot/seed_bootstrap.cm.mcode"
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
#define CELL_SHOP_DIR ".cell"
#define CELL_CORE_DIR "packages/core"
@@ -20,6 +21,7 @@
#include <signal.h>
#include <unistd.h>
#include <sys/stat.h>
#include "monocypher.h"
/* Test suite declarations */
int run_c_test_suite(JSContext *ctx);
@@ -30,6 +32,83 @@ static char *shop_path = NULL;
static char *core_path = NULL;
static JSRuntime *g_runtime = NULL;
// Compute blake2b hash of data and return hex string (caller must free)
static char *compute_blake2_hex(const char *data, size_t size) {
uint8_t hash[32];
crypto_blake2b(hash, 32, (const uint8_t *)data, size);
char *hex = malloc(65);
for (int i = 0; i < 32; i++)
snprintf(hex + i * 2, 3, "%02x", hash[i]);
return hex;
}
// Build cache path: shop_path/build/<hex>.mach (caller must free)
static char *build_cache_path(const char *hex) {
if (!shop_path) return NULL;
size_t len = strlen(shop_path) + strlen("/build/") + 64 + strlen(".mach") + 1;
char *path = malloc(len);
snprintf(path, len, "%s/build/%s.mach", shop_path, hex);
return path;
}
// Write binary data to file
static int write_cache_file(const char *path, const uint8_t *data, size_t size) {
FILE *fh = fopen(path, "wb");
if (!fh) return 0;
size_t written = fwrite(data, 1, size, fh);
fclose(fh);
return written == size;
}
// Load cached .mach or compile from .mcode and cache result
// Returns heap-allocated binary data and sets *out_size, or NULL on failure
static char *load_or_cache_bootstrap(const char *mcode_data, size_t mcode_size, size_t *out_size) {
char *hex = compute_blake2_hex(mcode_data, mcode_size);
char *cpath = build_cache_path(hex);
free(hex);
if (cpath) {
// Try loading from cache
FILE *fh = fopen(cpath, "rb");
if (fh) {
fseek(fh, 0, SEEK_END);
long file_size = ftell(fh);
fseek(fh, 0, SEEK_SET);
char *data = malloc(file_size);
if (data && fread(data, 1, file_size, fh) == (size_t)file_size) {
fclose(fh);
free(cpath);
*out_size = file_size;
return data;
}
free(data);
fclose(fh);
}
}
// Cache miss: compile mcode to binary
cJSON *mcode = cJSON_Parse(mcode_data);
if (!mcode) { free(cpath); return NULL; }
MachCode *mc = mach_compile_mcode(mcode);
cJSON_Delete(mcode);
if (!mc) { free(cpath); return NULL; }
size_t bin_size;
uint8_t *bin = JS_SerializeMachCode(mc, &bin_size);
JS_FreeMachCode(mc);
if (!bin) { free(cpath); return NULL; }
// Write to cache
if (cpath) {
write_cache_file(cpath, bin, bin_size);
free(cpath);
}
*out_size = bin_size;
return (char *)bin;
}
// Get the home directory
static const char* get_home_dir(void) {
const char *home = getenv("HOME");
@@ -178,48 +257,70 @@ void script_startup(cell_rt *prt)
cell_rt *crt = JS_GetContextOpaque(js);
JS_FreeValue(js, js_blob_use(js));
// Load pre-compiled bootstrap bytecode (.mach)
// Load pre-compiled bootstrap .mcode
size_t boot_size;
char *boot_data = load_core_file(BOOTSTRAP_MACH, &boot_size);
char *boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
if (!boot_data) {
printf("ERROR: Could not load bootstrap from %s!\n", core_path);
return;
}
// Try cache or compile mcode → binary
size_t bin_size;
char *bin_data = load_or_cache_bootstrap(boot_data, boot_size, &bin_size);
free(boot_data);
if (!bin_data) {
printf("ERROR: Failed to compile bootstrap mcode!\n");
return;
}
// Create hidden environment
JSValue hidden_env = JS_NewObject(js);
JS_SetPropertyStr(js, hidden_env, "os", js_os_use(js));
JS_SetPropertyStr(js, hidden_env, "json", js_json_use(js));
JS_SetPropertyStr(js, hidden_env, "nota", js_nota_use(js));
JS_SetPropertyStr(js, hidden_env, "wota", js_wota_use(js));
// Note: evaluate allocating calls into temporaries before passing to
// JS_SetPropertyStr, so env_ref.val is read AFTER GC may have moved it.
JSGCRef env_ref;
JS_AddGCRef(js, &env_ref);
env_ref.val = JS_NewObject(js);
JSValue tmp;
tmp = js_os_use(js);
JS_SetPropertyStr(js, env_ref.val, "os", tmp);
tmp = js_json_use(js);
JS_SetPropertyStr(js, env_ref.val, "json", tmp);
tmp = js_nota_use(js);
JS_SetPropertyStr(js, env_ref.val, "nota", tmp);
tmp = js_wota_use(js);
JS_SetPropertyStr(js, env_ref.val, "wota", tmp);
crt->actor_sym_ref.val = JS_NewObject(js);
JS_SetPropertyStr(js, hidden_env, "actorsym", JS_DupValue(js, crt->actor_sym_ref.val));
JS_SetPropertyStr(js, env_ref.val, "actorsym", JS_DupValue(js, crt->actor_sym_ref.val));
// Always set init (even if null)
if (crt->init_wota) {
JS_SetPropertyStr(js, hidden_env, "init", wota2value(js, crt->init_wota));
tmp = wota2value(js, crt->init_wota);
JS_SetPropertyStr(js, env_ref.val, "init", tmp);
free(crt->init_wota);
crt->init_wota = NULL;
} else {
JS_SetPropertyStr(js, hidden_env, "init", JS_NULL);
JS_SetPropertyStr(js, env_ref.val, "init", JS_NULL);
}
// Set args to null for actor spawn (not CLI mode)
JS_SetPropertyStr(js, hidden_env, "args", JS_NULL);
JS_SetPropertyStr(js, env_ref.val, "args", JS_NULL);
if (core_path)
JS_SetPropertyStr(js, hidden_env, "core_path", JS_NewString(js, core_path));
JS_SetPropertyStr(js, hidden_env, "shop_path",
shop_path ? JS_NewString(js, shop_path) : JS_NULL);
if (core_path) {
tmp = JS_NewString(js, core_path);
JS_SetPropertyStr(js, env_ref.val, "core_path", tmp);
}
tmp = shop_path ? JS_NewString(js, shop_path) : JS_NULL;
JS_SetPropertyStr(js, env_ref.val, "shop_path", tmp);
// Stone the environment
hidden_env = JS_Stone(js, hidden_env);
JSValue hidden_env = JS_Stone(js, env_ref.val);
JS_DeleteGCRef(js, &env_ref);
// Run through MACH VM
// Run from binary
crt->state = ACTOR_RUNNING;
JSValue v = JS_RunMachBin(js, (const uint8_t *)boot_data, boot_size, hidden_env);
free(boot_data);
JSValue v = JS_RunMachBin(js, (const uint8_t *)bin_data, bin_size, hidden_env);
free(bin_data);
uncaught_exception(js, v);
crt->state = ACTOR_IDLE;
set_actor_state(crt);
@@ -269,13 +370,14 @@ static int run_test_suite(size_t heap_size)
static void print_usage(const char *prog)
{
printf("Usage: %s [options] <script> [args...]\n\n", prog);
printf("Run a cell script (.ce actor or .cm module).\n\n");
printf("Usage: %s [options] <program> [args...]\n\n", prog);
printf("Run a cell program (.ce actor).\n\n");
printf("Options:\n");
printf(" --core <path> Set core path directly (overrides CELL_CORE)\n");
printf(" --shop <path> Set shop path (overrides CELL_SHOP)\n");
printf(" --emit-qbe Emit QBE IL (for native compilation)\n");
printf(" --dump-mach Dump MACH bytecode disassembly\n");
printf(" --dev Dev mode (shop=.cell, core=.)\n");
printf(" --heap <size> Initial heap size (e.g. 256MB, 1GB)\n");
printf(" --seed Use seed bootstrap (minimal, for regen)\n");
printf(" --test [heap_size] Run C test suite\n");
printf(" -h, --help Show this help message\n");
printf("\nEnvironment:\n");
@@ -307,24 +409,15 @@ int cell_init(int argc, char **argv)
}
/* Default: run script through bootstrap pipeline */
int emit_qbe = 0;
int dump_mach = 0;
int arg_start = 1;
int seed_mode = 0;
size_t heap_size = 1024 * 1024; /* 1MB default */
const char *shop_override = NULL;
const char *core_override = NULL;
// Parse flags (order-independent)
while (arg_start < argc && argv[arg_start][0] == '-') {
if (strcmp(argv[arg_start], "--mcode") == 0) {
/* --mcode is now always on; accept and ignore for compat */
arg_start++;
} else if (strcmp(argv[arg_start], "--emit-qbe") == 0) {
emit_qbe = 1;
arg_start++;
} else if (strcmp(argv[arg_start], "--dump-mach") == 0) {
dump_mach = 1;
arg_start++;
} else if (strcmp(argv[arg_start], "--shop") == 0) {
if (strcmp(argv[arg_start], "--shop") == 0) {
if (arg_start + 1 >= argc) {
printf("ERROR: --shop requires a path argument\n");
return 1;
@@ -338,32 +431,72 @@ int cell_init(int argc, char **argv)
}
core_override = argv[arg_start + 1];
arg_start += 2;
} else if (strcmp(argv[arg_start], "--seed") == 0) {
seed_mode = 1;
arg_start++;
} else if (strcmp(argv[arg_start], "--heap") == 0) {
if (arg_start + 1 >= argc) {
printf("ERROR: --heap requires a size argument (e.g. 1GB, 256MB, 65536)\n");
return 1;
}
char *end = NULL;
heap_size = strtoull(argv[arg_start + 1], &end, 0);
if (end && (*end == 'G' || *end == 'g')) heap_size *= 1024ULL * 1024 * 1024;
else if (end && (*end == 'M' || *end == 'm')) heap_size *= 1024ULL * 1024;
else if (end && (*end == 'K' || *end == 'k')) heap_size *= 1024ULL;
arg_start += 2;
} else if (strcmp(argv[arg_start], "--dev") == 0) {
shop_override = ".cell";
core_override = ".";
mkdir(".cell", 0755);
mkdir(".cell/build", 0755);
mkdir(".cell/packages", 0755);
/* Ensure .cell/packages/core -> . symlink exists */
struct stat lst;
if (lstat(".cell/packages/core", &lst) != 0)
symlink("../..", ".cell/packages/core");
arg_start++;
} else {
break;
}
}
if (arg_start >= argc) {
print_usage(argv[0]);
return 1;
}
if (!find_cell_shop(shop_override, core_override)) return 1;
actor_initialize();
const char *boot_mcode = seed_mode ? SEED_BOOTSTRAP_MCODE : BOOTSTRAP_MCODE;
size_t boot_size;
char *boot_data = load_core_file(BOOTSTRAP_MACH, &boot_size);
char *boot_data = load_core_file(boot_mcode, &boot_size);
if (!boot_data) {
printf("ERROR: Could not load bootstrap from %s\n", core_path);
return 1;
}
// Try cache or compile mcode → binary
size_t bin_size;
char *bin_data = load_or_cache_bootstrap(boot_data, boot_size, &bin_size);
free(boot_data);
if (!bin_data) {
printf("ERROR: Failed to compile bootstrap mcode\n");
return 1;
}
g_runtime = JS_NewRuntime();
if (!g_runtime) {
printf("Failed to create JS runtime\n");
free(boot_data);
free(bin_data);
return 1;
}
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, 16 * 1024 * 1024);
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, heap_size);
if (!ctx) {
printf("Failed to create JS context\n");
free(boot_data); JS_FreeRuntime(g_runtime);
free(bin_data); JS_FreeRuntime(g_runtime);
return 1;
}
@@ -397,28 +530,38 @@ int cell_init(int argc, char **argv)
JS_FreeValue(ctx, js_blob_use(ctx));
JSValue hidden_env = JS_NewObject(ctx);
JS_SetPropertyStr(ctx, hidden_env, "os", js_os_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "core_path", JS_NewString(ctx, core_path));
JS_SetPropertyStr(ctx, hidden_env, "shop_path",
shop_path ? JS_NewString(ctx, shop_path) : JS_NULL);
JS_SetPropertyStr(ctx, hidden_env, "emit_qbe", JS_NewBool(ctx, emit_qbe));
JS_SetPropertyStr(ctx, hidden_env, "dump_mach", JS_NewBool(ctx, dump_mach));
JS_SetPropertyStr(ctx, hidden_env, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val));
JS_SetPropertyStr(ctx, hidden_env, "json", js_json_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "nota", js_nota_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "wota", js_wota_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "init", JS_NULL);
JSValue args_arr = JS_NewArray(ctx);
JSGCRef env_ref;
JS_AddGCRef(ctx, &env_ref);
env_ref.val = JS_NewObject(ctx);
JSValue tmp;
tmp = js_os_use(ctx);
JS_SetPropertyStr(ctx, env_ref.val, "os", tmp);
tmp = JS_NewString(ctx, core_path);
JS_SetPropertyStr(ctx, env_ref.val, "core_path", tmp);
tmp = shop_path ? JS_NewString(ctx, shop_path) : JS_NULL;
JS_SetPropertyStr(ctx, env_ref.val, "shop_path", tmp);
JS_SetPropertyStr(ctx, env_ref.val, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val));
tmp = js_json_use(ctx);
JS_SetPropertyStr(ctx, env_ref.val, "json", tmp);
tmp = js_nota_use(ctx);
JS_SetPropertyStr(ctx, env_ref.val, "nota", tmp);
tmp = js_wota_use(ctx);
JS_SetPropertyStr(ctx, env_ref.val, "wota", tmp);
JS_SetPropertyStr(ctx, env_ref.val, "init", JS_NULL);
JSGCRef args_ref;
JS_AddGCRef(ctx, &args_ref);
args_ref.val = JS_NewArray(ctx);
for (int i = arg_start; i < argc; i++) {
JSValue str = JS_NewString(ctx, argv[i]);
JS_ArrayPush(ctx, &args_arr, str);
JS_ArrayPush(ctx, &args_ref.val, str);
}
JS_SetPropertyStr(ctx, hidden_env, "args", args_arr);
hidden_env = JS_Stone(ctx, hidden_env);
JS_SetPropertyStr(ctx, env_ref.val, "args", args_ref.val);
JS_DeleteGCRef(ctx, &args_ref);
JSValue hidden_env = JS_Stone(ctx, env_ref.val);
JS_DeleteGCRef(ctx, &env_ref);
JSValue result = JS_RunMachBin(ctx, (const uint8_t *)boot_data, boot_size, hidden_env);
free(boot_data);
JSValue result = JS_RunMachBin(ctx, (const uint8_t *)bin_data, bin_size, hidden_env);
free(bin_data);
int exit_code = 0;
if (JS_IsException(result)) {

View File

@@ -78,8 +78,7 @@ void cell_trace_sethook(cell_hook);
#define QJSCLASS(TYPE, ...)\
JSClassID js_##TYPE##_id;\
static void js_##TYPE##_finalizer(JSRuntime *rt, JSValue val){\
JSContext *js = JS_GetContext(rt);\
TYPE *n = JS_GetOpaque2(js, val, js_##TYPE##_id); \
TYPE *n = JS_GetOpaque(val, js_##TYPE##_id); \
TYPE##_free(rt,n);}\
static JSClassDef js_##TYPE##_class = {\
.class_name = #TYPE,\
@@ -157,6 +156,43 @@ JS_SetClassProto(js, js_##TYPE##_id, TYPE##_proto); \
#define countof(x) (sizeof(x)/sizeof((x)[0]))
/* GC safety macros for C functions that allocate multiple heap objects.
Any allocation call (JS_NewObject, JS_SetPropertyStr, etc.) can trigger GC.
JS_ROOT style: explicit, use .val to access the rooted value.
JS_LOCAL style: transparent, GC updates the C local through a pointer. */
#define JS_FRAME(ctx) \
JSContext *_js_ctx = (ctx); \
JSGCRef *_js_gc_frame = JS_GetGCFrame(_js_ctx); \
JSLocalRef *_js_local_frame = JS_GetLocalFrame(_js_ctx)
#define JS_ROOT(name, init) \
JSGCRef name; \
JS_PushGCRef(_js_ctx, &name); \
name.val = (init)
#define JS_LOCAL(name, init) \
JSValue name = (init); \
JSLocalRef name##__lr; \
name##__lr.ptr = &name; \
JS_PushLocalRef(_js_ctx, &name##__lr)
#define JS_RETURN(val) do { \
JSValue _js_ret = (val); \
JS_RestoreFrame(_js_ctx, _js_gc_frame, _js_local_frame); \
return _js_ret; \
} while (0)
#define JS_RETURN_NULL() do { \
JS_RestoreFrame(_js_ctx, _js_gc_frame, _js_local_frame); \
return JS_NULL; \
} while (0)
#define JS_RETURN_EX() do { \
JS_RestoreFrame(_js_ctx, _js_gc_frame, _js_local_frame); \
return JS_EXCEPTION; \
} while (0)
// Common macros for property access
#define JS_GETPROP(JS, TARGET, VALUE, PROP, TYPE) {\
JSValue __##PROP##__v = JS_GetPropertyStr(JS,VALUE,#PROP); \

View File

@@ -442,7 +442,7 @@ JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count) {
if (!frame) return NULL;
/* cap56 = slot count (used by gc_object_size) */
frame->hdr = objhdr_make(slot_count, OBJ_FRAME, 0, 0, 0, 0);
frame->header = objhdr_make(slot_count, OBJ_FRAME, 0, 0, 0, 0);
frame->function = JS_NULL;
frame->caller = JS_NULL;
frame->address = JS_NewInt32(ctx, 0);
@@ -547,7 +547,7 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) {
}
/* String concat for ADD */
if (op == MACH_ADD && JS_IsText(a) && JS_IsText(b))
if (op == MACH_ADD && mist_is_text(a) && mist_is_text(b))
return JS_ConcatString(ctx, a, b);
/* Comparison ops allow mixed types — return false for mismatches */
@@ -576,7 +576,7 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) {
}
}
/* String comparisons */
if (JS_IsText(a) && JS_IsText(b)) {
if (mist_is_text(a) && mist_is_text(b)) {
int cmp = js_string_compare_value(ctx, a, b, FALSE);
switch (op) {
case MACH_EQ: return JS_NewBool(ctx, cmp == 0);
@@ -696,7 +696,7 @@ void __asan_on_error(void) {
fprintf(stderr, "\n=== ASAN error: VM stack trace ===\n");
int is_first = 1;
while (frame) {
if (!JS_IsFunction(frame->function)) break;
if (!mist_is_function(frame->function)) break;
JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function);
const char *func_name = NULL;
const char *file = NULL;
@@ -767,9 +767,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
/* Setup initial frame — wrap top-level code in a function object so that
returning from a called register function can read code/env from frame */
JSValue top_fn = js_new_register_function(ctx, code, env_gc.val, of_gc.val);
JS_PopGCRef(ctx, &of_gc);
env = env_gc.val; /* refresh — GC may have moved env during allocation */
JS_PopGCRef(ctx, &env_gc);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
frame->function = top_fn;
frame->slots[0] = this_gc.val; /* slot 0 = this */
@@ -780,6 +778,8 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
for (int i = nargs_copy - 1; i >= 0; i--) JS_PopGCRef(ctx, &arg_gcs[i]);
JS_PopGCRef(ctx, &this_gc);
JS_PopGCRef(ctx, &of_gc);
JS_PopGCRef(ctx, &env_gc);
uint32_t pc = code->entry_point;
JSValue result = JS_NULL;
@@ -815,12 +815,10 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
ctx->reg_current_frame = frame_ref.val;
ctx->current_register_pc = pc > 0 ? pc - 1 : 0;
int op = MACH_GET_OP(instr);
/* trace disabled */
int a = MACH_GET_A(instr);
int b = MACH_GET_B(instr);
int c = MACH_GET_C(instr);
switch (op) {
case MACH_NOP:
break;
@@ -882,7 +880,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
JS_ToFloat64(ctx, &dt, tol);
BOOL eq = fabs(da - db) <= dt;
frame->slots[a] = JS_NewBool(ctx, is_eq_op ? eq : !eq);
} else if (JS_IsText(left) && JS_IsText(right) && JS_VALUE_GET_TAG(tol) == JS_TAG_BOOL && JS_VALUE_GET_BOOL(tol)) {
} else if (mist_is_text(left) && mist_is_text(right) && JS_VALUE_GET_TAG(tol) == JS_TAG_BOOL && JS_VALUE_GET_BOOL(tol)) {
BOOL eq = js_string_compare_value_nocase(ctx, left, right) == 0;
frame->slots[a] = JS_NewBool(ctx, is_eq_op ? eq : !eq);
} else {
@@ -960,7 +958,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
JSValue obj = frame->slots[b];
JSValue key = code->cpool[c];
/* Non-proxy functions (arity != 2) can't have properties read */
if (JS_IsFunction(obj)) {
if (mist_is_function(obj)) {
JSFunction *fn_chk = JS_VALUE_GET_FUNCTION(obj);
if (fn_chk->length != 2) {
JS_ThrowTypeError(ctx, "cannot read property of non-proxy function");
@@ -983,17 +981,14 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
int ret = JS_SetProperty(ctx, obj, key, val);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (ret < 0) goto disrupt;
mach_resolve_forward(&frame->slots[a]);
break;
}
case MACH_GETINDEX: {
/* R(A) = R(B)[R(C)] — mcode guarantees R(C) is int */
JSValue obj = frame->slots[b];
JSValue idx = frame->slots[c];
JSValue val;
if (JS_IsInt(idx))
val = JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx));
else
val = JS_GetProperty(ctx, obj, idx);
JSValue val = JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(frame->slots[c]));
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(val)) goto disrupt;
frame->slots[a] = val;
@@ -1001,25 +996,13 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
case MACH_SETINDEX: {
/* R(A)[R(B)] = R(C) */
/* R(A)[R(B)] = R(C) — mcode guarantees R(B) is int */
JSValue obj = frame->slots[a];
JSValue idx = frame->slots[b];
JSValue val = frame->slots[c];
int ret;
if (JS_IsInt(idx)) {
JSValue r = JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx), val);
ret = JS_IsException(r) ? -1 : 0;
} else if (JS_IsArray(obj)) {
JS_ThrowTypeError(ctx, "array index must be a number");
ret = -1;
} else if (JS_IsRecord(obj) && !JS_IsText(idx) && !JS_IsRecord(idx)) {
JS_ThrowTypeError(ctx, "object key must be a string or object");
ret = -1;
} else {
ret = JS_SetProperty(ctx, obj, idx, val);
}
JSValue r = JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(frame->slots[b]), val);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (ret < 0) goto disrupt;
if (JS_IsException(r)) goto disrupt;
mach_resolve_forward(&frame->slots[a]);
break;
}
@@ -1146,6 +1129,17 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
result = frame->slots[a];
if (JS_IsNull(frame->caller)) goto done;
{
#ifdef VALIDATE_GC
const char *callee_name = "?";
const char *callee_file = "?";
{
JSFunction *callee_fn = JS_VALUE_GET_FUNCTION(frame->function);
if (callee_fn->kind == JS_FUNC_KIND_REGISTER && callee_fn->u.reg.code) {
if (callee_fn->u.reg.code->name_cstr) callee_name = callee_fn->u.reg.code->name_cstr;
if (callee_fn->u.reg.code->filename_cstr) callee_file = callee_fn->u.reg.code->filename_cstr;
}
}
#endif
JSFrameRegister *caller = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->caller);
frame->caller = JS_NULL;
frame = caller;
@@ -1156,7 +1150,22 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
env = fn->u.reg.env_record;
pc = ret_info >> 16;
int ret_slot = ret_info & 0xFFFF;
if (ret_slot != 0xFFFF) frame->slots[ret_slot] = result;
if (ret_slot != 0xFFFF) {
#ifdef VALIDATE_GC
if (JS_IsPtr(result)) {
void *rp = JS_VALUE_GET_PTR(result);
if ((uint8_t *)rp < ctx->heap_base || (uint8_t *)rp >= ctx->heap_free) {
if (!is_ct_ptr(ctx, rp))
fprintf(stderr, "VALIDATE_GC: stale RETURN into slot %d, ptr=%p heap=[%p,%p) fn_slots=%d pc=%u callee=%s (%s) caller=%s (%s)\n",
ret_slot, rp, (void*)ctx->heap_base, (void*)ctx->heap_free, code->nr_slots, pc,
callee_name, callee_file,
code->name_cstr ? code->name_cstr : "?",
code->filename_cstr ? code->filename_cstr : "?");
}
}
#endif
frame->slots[ret_slot] = result;
}
}
break;
@@ -1187,16 +1196,10 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
case MACH_NEWARRAY: {
int count = b;
JSValue arr = JS_NewArray(ctx);
JSValue arr = JS_NewArrayCap(ctx, b);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(arr)) { goto disrupt; }
/* Store array in dest immediately so GC can track it */
frame->slots[a] = arr;
for (int i = 0; i < count; i++) {
JS_SetPropertyNumber(ctx, frame->slots[a], i, frame->slots[a + 1 + i]);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
}
break;
}
@@ -1216,14 +1219,9 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
case MACH_PUSH: {
/* push R(B) onto array R(A) */
/* push R(B) onto array R(A) — mcode guarantees R(A) is array */
JSValue arr = frame->slots[a];
JSValue val = frame->slots[b];
if (!JS_IsArray(arr)) {
JS_ThrowTypeError(ctx, "cannot push to non-array");
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
goto disrupt;
}
JSGCRef arr_gc;
JS_PushGCRef(ctx, &arr_gc);
arr_gc.val = arr;
@@ -1236,13 +1234,8 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
case MACH_POP: {
/* R(A) = pop last element from array R(B) */
/* R(A) = pop last element from array R(B) — mcode guarantees R(B) is array */
JSValue arr = frame->slots[b];
if (!JS_IsArray(arr)) {
JS_ThrowTypeError(ctx, "cannot pop from non-array");
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
goto disrupt;
}
JSValue val = JS_ArrayPop(ctx, arr);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(val)) goto disrupt;
@@ -1295,100 +1288,6 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
/* === New mcode-derived opcodes === */
/* Typed integer arithmetic — inline with overflow to float */
case MACH_ADD_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
int64_t r = (int64_t)ia + (int64_t)ib;
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
break;
}
case MACH_SUB_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
int64_t r = (int64_t)ia - (int64_t)ib;
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
break;
}
case MACH_MUL_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
int64_t r = (int64_t)ia * (int64_t)ib;
frame->slots[a] = (r >= INT32_MIN && r <= INT32_MAX)
? JS_NewInt32(ctx, (int32_t)r) : JS_NewFloat64(ctx, (double)r);
break;
}
case MACH_DIV_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
if (ib == 0) { frame->slots[a] = JS_NULL; break; }
if (ia % ib == 0) frame->slots[a] = JS_NewInt32(ctx, ia / ib);
else frame->slots[a] = JS_NewFloat64(ctx, (double)ia / (double)ib);
break;
}
case MACH_MOD_INT: {
int32_t ia = JS_VALUE_GET_INT(frame->slots[b]);
int32_t ib = JS_VALUE_GET_INT(frame->slots[c]);
if (ib == 0) { frame->slots[a] = JS_NULL; break; }
frame->slots[a] = JS_NewInt32(ctx, ia % ib);
break;
}
case MACH_NEG_INT: {
int32_t i = JS_VALUE_GET_INT(frame->slots[b]);
if (i == INT32_MIN)
frame->slots[a] = JS_NewFloat64(ctx, -(double)i);
else
frame->slots[a] = JS_NewInt32(ctx, -i);
break;
}
/* Typed float arithmetic */
case MACH_ADD_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
frame->slots[a] = JS_NewFloat64(ctx, da + db);
break;
}
case MACH_SUB_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
frame->slots[a] = JS_NewFloat64(ctx, da - db);
break;
}
case MACH_MUL_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
frame->slots[a] = JS_NewFloat64(ctx, da * db);
break;
}
case MACH_DIV_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
if (db == 0.0) { frame->slots[a] = JS_NULL; break; }
frame->slots[a] = JS_NewFloat64(ctx, da / db);
break;
}
case MACH_MOD_FLOAT: {
double da, db;
JS_ToFloat64(ctx, &da, frame->slots[b]);
JS_ToFloat64(ctx, &db, frame->slots[c]);
if (db == 0.0) { frame->slots[a] = JS_NULL; break; }
frame->slots[a] = JS_NewFloat64(ctx, fmod(da, db));
break;
}
case MACH_NEG_FLOAT: {
double d;
JS_ToFloat64(ctx, &d, frame->slots[b]);
frame->slots[a] = JS_NewFloat64(ctx, -d);
break;
}
/* Text concatenation */
case MACH_CONCAT: {
JSValue res = JS_ConcatString(ctx, frame->slots[b], frame->slots[c]);
@@ -1482,7 +1381,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
frame->slots[a] = JS_NewBool(ctx, JS_IsNumber(frame->slots[b]));
break;
case MACH_IS_TEXT:
frame->slots[a] = JS_NewBool(ctx, JS_IsText(frame->slots[b]));
frame->slots[a] = JS_NewBool(ctx, mist_is_text(frame->slots[b]));
break;
case MACH_IS_BOOL:
frame->slots[a] = JS_NewBool(ctx, JS_IsBool(frame->slots[b]));
@@ -1490,35 +1389,43 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
case MACH_IS_NULL:
frame->slots[a] = JS_NewBool(ctx, JS_IsNull(frame->slots[b]));
break;
case MACH_IS_FUNC:
frame->slots[a] = JS_NewBool(ctx, JS_IsFunction(frame->slots[b]));
case MACH_IS_ARRAY:
frame->slots[a] = JS_NewBool(ctx, mist_is_array(frame->slots[b]));
break;
case MACH_IS_FUNC:
frame->slots[a] = JS_NewBool(ctx, mist_is_function(frame->slots[b]));
break;
case MACH_IS_RECORD:
frame->slots[a] = JS_NewBool(ctx, mist_is_record(frame->slots[b]));
break;
case MACH_IS_STONE:
frame->slots[a] = JS_NewBool(ctx, mist_is_stone(frame->slots[b]));
break;
case MACH_LENGTH: {
JSValue v = frame->slots[b];
if (mist_is_array(v)) {
JSArray *arr = JS_VALUE_GET_ARRAY(v);
frame->slots[a] = JS_NewInt32(ctx, (int32_t)arr->len);
} else if (MIST_IsImmediateASCII(v)) {
frame->slots[a] = JS_NewInt32(ctx, MIST_GetImmediateASCIILen(v));
} else {
/* fallback to C for text/blob/function (still a GC safepoint) */
JSValue res = JS_CellLength(ctx, v);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
frame->slots[a] = res;
}
break;
}
case MACH_IS_PROXY: {
JSValue v = frame->slots[b];
int is_proxy = 0;
if (JS_IsFunction(v)) {
if (mist_is_function(v)) {
JSFunction *fn = JS_VALUE_GET_FUNCTION(v);
is_proxy = (fn->length == 2);
}
frame->slots[a] = JS_NewBool(ctx, is_proxy);
break;
}
case MACH_TYPEOF: {
JSValue val = frame->slots[b];
const char *tname = "unknown";
if (JS_IsNull(val)) tname = "null";
else if (JS_IsInt(val) || JS_IsNumber(val)) tname = "number";
else if (JS_IsBool(val)) tname = "logical";
else if (JS_IsText(val)) tname = "text";
else if (JS_IsFunction(val)) tname = "function";
else if (JS_IsArray(val)) tname = "array";
else if (JS_IsRecord(val)) tname = "object";
JSValue res = JS_NewString(ctx, tname);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
frame->slots[a] = res;
break;
}
/* Logical */
case MACH_NOT: {
int bval = JS_ToBool(ctx, frame->slots[b]);
@@ -1565,7 +1472,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
case MACH_LOAD_FIELD: {
JSValue obj = frame->slots[b];
JSValue key = code->cpool[c];
if (JS_IsFunction(obj)) {
if (mist_is_function(obj)) {
JS_ThrowTypeError(ctx, "cannot read property of function");
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
goto disrupt;
@@ -1583,33 +1490,26 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
int ret = JS_SetProperty(ctx, obj, key, val);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (ret < 0) goto disrupt;
mach_resolve_forward(&frame->slots[a]);
break;
}
case MACH_LOAD_INDEX: {
/* R(A) = R(B)[R(C)] — mcode guarantees R(C) is int */
JSValue obj = frame->slots[b];
JSValue idx = frame->slots[c];
JSValue val;
if (JS_IsInt(idx))
val = JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx));
else
val = JS_GetProperty(ctx, obj, idx);
JSValue val = JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(frame->slots[c]));
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(val)) goto disrupt;
frame->slots[a] = val;
break;
}
case MACH_STORE_INDEX: {
/* R(A)[R(B)] = R(C) — mcode guarantees R(B) is int */
JSValue obj = frame->slots[a];
JSValue idx = frame->slots[b];
JSValue val = frame->slots[c];
int ret;
if (JS_IsInt(idx)) {
JSValue r = JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx), val);
ret = JS_IsException(r) ? -1 : 0;
} else
ret = JS_SetProperty(ctx, obj, idx, val);
JSValue r = JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(frame->slots[b]), val);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (ret < 0) goto disrupt;
if (JS_IsException(r)) goto disrupt;
mach_resolve_forward(&frame->slots[a]);
break;
}
case MACH_LOAD_DYNAMIC: {
@@ -1633,10 +1533,10 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
if (JS_IsInt(key)) {
JSValue r = JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(key), val);
ret = JS_IsException(r) ? -1 : 0;
} else if (JS_IsArray(obj)) {
} else if (mist_is_array(obj)) {
JS_ThrowTypeError(ctx, "array index must be a number");
ret = -1;
} else if (JS_IsBool(key) || JS_IsNull(key) || JS_IsArray(key) || JS_IsFunction(key)) {
} else if (JS_IsBool(key) || JS_IsNull(key) || mist_is_array(key) || mist_is_function(key)) {
JS_ThrowTypeError(ctx, "object key must be text");
ret = -1;
} else {
@@ -1644,12 +1544,13 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (ret < 0) goto disrupt;
mach_resolve_forward(&frame->slots[a]);
break;
}
/* New record */
case MACH_NEWRECORD: {
JSValue obj = JS_NewObject(ctx);
JSValue obj = b > 0 ? JS_NewObjectCap(ctx, b) : JS_NewObject(ctx);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(obj)) goto disrupt;
frame->slots[a] = obj;
@@ -1661,7 +1562,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
case MACH_GOFRAME: {
/* A=frame_slot, B=func_reg, C=argc */
JSValue func_val = frame->slots[b];
if (!JS_IsFunction(func_val)) {
if (!mist_is_function(func_val)) {
JS_ThrowTypeError(ctx, "not a function");
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
goto disrupt;
@@ -1687,7 +1588,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
case MACH_INVOKE: {
/* A=frame_slot, B=result_slot */
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
int nr = (int)objhdr_cap56(fr->hdr);
int nr = (int)objhdr_cap56(fr->header);
int c_argc = (nr >= 2) ? nr - 2 : 0;
JSValue fn_val = fr->function;
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
@@ -1731,22 +1632,89 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
ctx->reg_current_frame = JS_NULL;
if (JS_IsException(ret)) goto disrupt;
#ifdef VALIDATE_GC
if (JS_IsPtr(ret)) {
void *rp = JS_VALUE_GET_PTR(ret);
if ((uint8_t *)rp < ctx->heap_base || (uint8_t *)rp >= ctx->heap_free) {
if (!is_ct_ptr(ctx, rp)) {
int magic = (fn->kind == JS_FUNC_KIND_C) ? fn->u.cfunc.magic : -1;
void *cfp = (fn->kind == JS_FUNC_KIND_C) ? (void *)fn->u.cfunc.c_function.generic : NULL;
fprintf(stderr, "VALIDATE_GC: stale INVOKE result into slot %d, ptr=%p heap=[%p,%p) fn_slots=%d pc=%u kind=%d magic=%d cfunc=%p caller=%s (%s)\n",
b, rp, (void*)ctx->heap_base, (void*)ctx->heap_free, code->nr_slots, pc - 1, fn->kind,
magic, cfp,
code->name_cstr ? code->name_cstr : "?",
code->filename_cstr ? code->filename_cstr : "?");
}
}
}
#endif
frame->slots[b] = ret;
}
break;
}
case MACH_GOINVOKE: {
/* Async invoke: call and discard result */
/* Tail call: replace current frame with callee */
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
int nr = (int)objhdr_cap56(fr->hdr);
int nr = (int)objhdr_cap56(fr->header);
int c_argc = (nr >= 2) ? nr - 2 : 0;
ctx->reg_current_frame = frame_ref.val;
ctx->current_register_pc = pc > 0 ? pc - 1 : 0;
JSValue ret = JS_Call(ctx, fr->function, fr->slots[0],
c_argc, &fr->slots[1]);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
ctx->reg_current_frame = JS_NULL;
if (JS_IsException(ret)) goto disrupt;
JSValue fn_val = fr->function;
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
if (fn->kind == JS_FUNC_KIND_REGISTER) {
/* Register function: tail call by replacing current frame */
JSCodeRegister *fn_code = fn->u.reg.code;
JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots);
if (!new_frame) {
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
goto disrupt;
}
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
fn_val = fr->function;
fn = JS_VALUE_GET_FUNCTION(fn_val);
fn_code = fn->u.reg.code;
new_frame->function = fn_val;
/* Copy this + args from call frame to new frame */
int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity;
new_frame->slots[0] = fr->slots[0]; /* this */
for (int i = 0; i < copy_count; i++)
new_frame->slots[1 + i] = fr->slots[1 + i];
/* Tail call: callee returns to OUR caller, not to us */
new_frame->caller = frame->caller;
frame->caller = JS_NULL; /* detach current frame */
/* Switch to callee */
frame = new_frame;
frame_ref.val = JS_MKPTR(frame);
code = fn_code;
env = fn->u.reg.env_record;
pc = code->entry_point;
} else {
/* C/bytecode function: call it, then return result to our caller */
ctx->reg_current_frame = frame_ref.val;
ctx->current_register_pc = pc > 0 ? pc - 1 : 0;
JSValue ret;
if (fn->kind == JS_FUNC_KIND_C)
ret = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]);
else
ret = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1], 0);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
ctx->reg_current_frame = JS_NULL;
if (JS_IsException(ret)) goto disrupt;
/* Tail-return: act like MACH_RETURN with the result */
result = ret;
if (JS_IsNull(frame->caller)) goto done;
JSFrameRegister *caller = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->caller);
frame->caller = JS_NULL;
frame = caller;
frame_ref.val = JS_MKPTR(frame);
int ret_info = JS_VALUE_GET_INT(frame->address);
JSFunction *ret_fn = JS_VALUE_GET_FUNCTION(frame->function);
code = ret_fn->u.reg.code;
env = ret_fn->u.reg.env_record;
pc = ret_info >> 16;
int ret_slot = ret_info & 0xFFFF;
if (ret_slot != 0xFFFF) frame->slots[ret_slot] = ret;
}
break;
}
@@ -1789,7 +1757,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
default:
result = JS_ThrowInternalError(ctx, "unknown register VM opcode %d", op);
result = JS_ThrowInternalError(ctx, "unknown register VM opcode %d: %s", op, mach_opcode_names[op]);
goto done;
}
continue;
@@ -1812,12 +1780,38 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
break;
}
if (JS_IsNull(frame->caller)) {
if (!JS_HasException(ctx)) {
/* Bare disrupt with no error message — provide location */
const char *fn_name = code->name_cstr ? code->name_cstr : "<anonymous>";
fprintf(stderr, "unhandled disruption in %s\n", fn_name);
} else {
fprintf(stderr, "unhandled disruption\n");
const char *fn_name = code->name_cstr ? code->name_cstr : "<anonymous>";
const char *file = code->filename_cstr ? code->filename_cstr : "<unknown>";
uint16_t line = 0, col = 0;
if (code->line_table && frame_pc > 0 && frame_pc - 1 < code->instr_count) {
line = code->line_table[frame_pc - 1].line;
col = code->line_table[frame_pc - 1].col;
}
fprintf(stderr, "unhandled disruption in %s (%s:%u:%u)\n", fn_name, file, line, col);
/* Walk and print the frame chain as a stack trace */
{
JSFrameRegister *trace_frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
int first = 1;
while (trace_frame) {
if (!mist_is_function(trace_frame->function)) break;
JSFunction *trace_fn = JS_VALUE_GET_FUNCTION(trace_frame->function);
if (trace_fn->kind == JS_FUNC_KIND_REGISTER && trace_fn->u.reg.code) {
JSCodeRegister *tc = trace_fn->u.reg.code;
uint32_t tpc = first ? (frame_pc > 0 ? frame_pc - 1 : 0)
: (uint32_t)(JS_VALUE_GET_INT(trace_frame->address) >> 16);
uint16_t tl = 0, tcol = 0;
if (tc->line_table && tpc < tc->instr_count) {
tl = tc->line_table[tpc].line;
tcol = tc->line_table[tpc].col;
}
fprintf(stderr, " at %s (%s:%u:%u)\n",
tc->name_cstr ? tc->name_cstr : "<anonymous>",
tc->filename_cstr ? tc->filename_cstr : "<unknown>", tl, tcol);
}
if (JS_IsNull(trace_frame->caller)) break;
trace_frame = (JSFrameRegister *)JS_VALUE_GET_PTR(trace_frame->caller);
first = 0;
}
}
result = JS_Throw(ctx, JS_NULL);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
@@ -1979,7 +1973,7 @@ static int mcode_reg_items(cJSON *it, cJSON **out) {
{ ADD(1); return c; }
/* invoke: [1]=frame, [2]=dest (result register) */
if (!strcmp(op, "invoke")) { ADD(1); ADD(2); return c; }
if (!strcmp(op, "invoke") || !strcmp(op, "tail_invoke")) { ADD(1); ADD(2); return c; }
/* goinvoke: [1]=frame only (no result) */
if (!strcmp(op, "goinvoke")) { ADD(1); return c; }
@@ -2014,11 +2008,9 @@ static int mcode_reg_items(cJSON *it, cJSON **out) {
/* record: [1]=dest, [2]=0(const) — no line/col suffix */
if (!strcmp(op, "record")) { ADD(1); return c; }
/* array: [1]=dest, [2]=count(const), [3..]=elements (no line/col suffix) */
/* array: [1]=dest, [2]=count(const)elements added via separate push instrs */
if (!strcmp(op, "array")) {
ADD(1);
int cnt = (int)cJSON_GetArrayItem(it, 2)->valuedouble;
for (int j = 0; j < cnt; j++) ADD(3 + j);
return c;
}
@@ -2074,8 +2066,8 @@ static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
int pinned = 1 + nr_args;
for (int i = 0; i < pinned; i++) { first_ref[i] = 0; last_ref[i] = n; }
for (int i = 0; i < n; i++) {
cJSON *it = cJSON_GetArrayItem(instrs, i);
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (!cJSON_IsArray(it)) continue;
cJSON *regs[MAX_REG_ITEMS];
int rc = mcode_reg_items(it, regs);
@@ -2085,7 +2077,7 @@ static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
if (first_ref[s] < 0) first_ref[s] = i;
last_ref[s] = i;
}
}
} }
/* Step 1a: extend live ranges for closure-captured slots.
If a child function captures a parent slot via get/put, that slot must
@@ -2107,8 +2099,8 @@ static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
typedef struct { const char *name; int pos; } LabelPos;
int lbl_cap = 32, lbl_n = 0;
LabelPos *lbls = sys_malloc(lbl_cap * sizeof(LabelPos));
for (int i = 0; i < n; i++) {
cJSON *it = cJSON_GetArrayItem(instrs, i);
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (cJSON_IsString(it)) {
if (lbl_n >= lbl_cap) {
lbl_cap *= 2;
@@ -2116,23 +2108,23 @@ static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
}
lbls[lbl_n++] = (LabelPos){it->valuestring, i};
}
}
} }
/* Find backward jumps and extend live ranges */
int changed = 1;
while (changed) {
changed = 0;
for (int i = 0; i < n; i++) {
cJSON *it = cJSON_GetArrayItem(instrs, i);
cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (!cJSON_IsArray(it)) continue;
int sz = cJSON_GetArraySize(it);
if (sz < 3) continue;
const char *op = cJSON_GetArrayItem(it, 0)->valuestring;
const char *op = it->child->valuestring;
const char *target = NULL;
if (!strcmp(op, "jump")) {
target = cJSON_GetArrayItem(it, 1)->valuestring;
target = it->child->next->valuestring;
} else if (!strcmp(op, "jump_true") || !strcmp(op, "jump_false") ||
!strcmp(op, "jump_not_null")) {
target = cJSON_GetArrayItem(it, 2)->valuestring;
target = it->child->next->next->valuestring;
}
if (!target) continue;
/* Find label position */
@@ -2238,8 +2230,8 @@ static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
}
/* Step 3: apply remap to instructions */
for (int i = 0; i < n; i++) {
cJSON *it = cJSON_GetArrayItem(instrs, i);
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (!cJSON_IsArray(it)) continue;
cJSON *regs[MAX_REG_ITEMS];
int rc = mcode_reg_items(it, regs);
@@ -2249,7 +2241,7 @@ static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
cJSON_SetNumberValue(regs[j], remap[old]);
}
}
}
} }
/* Update nr_slots in the JSON */
cJSON_SetNumberValue(nr_slots_j, new_max);
@@ -2281,8 +2273,8 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
s.flat_to_pc = sys_malloc((n + 1) * sizeof(int));
s.flat_count = n;
for (int i = 0; i < n; i++) {
cJSON *it = cJSON_GetArrayItem(instrs, i);
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
s.flat_to_pc[i] = s.code_count;
if (cJSON_IsString(it)) {
ml_label(&s, it->valuestring);
@@ -2332,24 +2324,16 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
else if (strcmp(op, "false") == 0) { EM(MACH_ABC(MACH_LOADFALSE, A1, 0, 0)); }
else if (strcmp(op, "null") == 0) { EM(MACH_ABC(MACH_LOADNULL, A1, 0, 0)); }
else if (strcmp(op, "move") == 0) { AB2(MACH_MOVE); }
/* Typed integer arithmetic */
else if (strcmp(op, "add_int") == 0) { ABC3(MACH_ADD_INT); }
else if (strcmp(op, "sub_int") == 0) { ABC3(MACH_SUB_INT); }
else if (strcmp(op, "mul_int") == 0) { ABC3(MACH_MUL_INT); }
else if (strcmp(op, "div_int") == 0) { ABC3(MACH_DIV_INT); }
else if (strcmp(op, "mod_int") == 0) { ABC3(MACH_MOD_INT); }
else if (strcmp(op, "neg_int") == 0) { AB2(MACH_NEG_INT); }
/* Typed float arithmetic */
else if (strcmp(op, "add_float") == 0) { ABC3(MACH_ADD_FLOAT); }
else if (strcmp(op, "sub_float") == 0) { ABC3(MACH_SUB_FLOAT); }
else if (strcmp(op, "mul_float") == 0) { ABC3(MACH_MUL_FLOAT); }
else if (strcmp(op, "div_float") == 0) { ABC3(MACH_DIV_FLOAT); }
else if (strcmp(op, "mod_float") == 0) { ABC3(MACH_MOD_FLOAT); }
else if (strcmp(op, "neg_float") == 0) { AB2(MACH_NEG_FLOAT); }
/* Text */
else if (strcmp(op, "concat") == 0) { ABC3(MACH_CONCAT); }
/* Generic arithmetic */
else if (strcmp(op, "add") == 0) { ABC3(MACH_ADD); }
else if (strcmp(op, "subtract") == 0) { ABC3(MACH_SUB); }
else if (strcmp(op, "multiply") == 0) { ABC3(MACH_MUL); }
else if (strcmp(op, "divide") == 0) { ABC3(MACH_DIV); }
else if (strcmp(op, "modulo") == 0) { ABC3(MACH_MOD); }
else if (strcmp(op, "pow") == 0) { ABC3(MACH_POW); }
else if (strcmp(op, "negate") == 0) { AB2(MACH_NEG); }
/* Typed integer comparisons */
else if (strcmp(op, "eq_int") == 0) { ABC3(MACH_EQ_INT); }
else if (strcmp(op, "ne_int") == 0) { ABC3(MACH_NE_INT); }
@@ -2400,11 +2384,14 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
else if (strcmp(op, "is_int") == 0) { AB2(MACH_IS_INT); }
else if (strcmp(op, "is_num") == 0) { AB2(MACH_IS_NUM); }
else if (strcmp(op, "is_text") == 0) { AB2(MACH_IS_TEXT); }
else if (strcmp(op, "is_bool") == 0) { AB2(MACH_IS_BOOL); }
else if (strcmp(op, "is_null") == 0) { AB2(MACH_IS_NULL); }
else if (strcmp(op, "is_func") == 0) { AB2(MACH_IS_FUNC); }
else if (strcmp(op, "is_proxy") == 0) { AB2(MACH_IS_PROXY); }
else if (strcmp(op, "typeof") == 0) { AB2(MACH_TYPEOF); }
else if (strcmp(op, "is_bool") == 0) { AB2(MACH_IS_BOOL); }
else if (strcmp(op, "is_null") == 0) { AB2(MACH_IS_NULL); }
else if (strcmp(op, "is_array") == 0) { AB2(MACH_IS_ARRAY); }
else if (strcmp(op, "is_func") == 0) { AB2(MACH_IS_FUNC); }
else if (strcmp(op, "is_record") == 0) { AB2(MACH_IS_RECORD); }
else if (strcmp(op, "is_stone") == 0) { AB2(MACH_IS_STONE); }
else if (strcmp(op, "length") == 0) { AB2(MACH_LENGTH); }
else if (strcmp(op, "is_proxy") == 0) { AB2(MACH_IS_PROXY); }
/* Logical */
else if (strcmp(op, "not") == 0) { AB2(MACH_NOT); }
else if (strcmp(op, "and") == 0) { ABC3(MACH_AND); }
@@ -2486,15 +2473,10 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
}
/* Array/Object creation */
else if (strcmp(op, "array") == 0) {
int dest = A1, count = A2;
EM(MACH_ABC(MACH_NEWARRAY, dest, 0, 0));
for (int j = 0; j < count; j++) {
int elem = ml_int(it, 3 + j);
EM(MACH_ABC(MACH_PUSH, dest, elem, 0));
}
EM(MACH_ABC(MACH_NEWARRAY, A1, A2, 0));
}
else if (strcmp(op, "record") == 0) {
EM(MACH_ABC(MACH_NEWRECORD, A1, 0, 0));
EM(MACH_ABC(MACH_NEWRECORD, A1, A2, 0));
}
/* Push/Pop */
else if (strcmp(op, "push") == 0) {
@@ -2523,7 +2505,7 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
else if (strcmp(op, "setarg") == 0) {
EM(MACH_ABC(MACH_SETARG, A1, A2, A3));
}
else if (strcmp(op, "invoke") == 0) {
else if (strcmp(op, "invoke") == 0 || strcmp(op, "tail_invoke") == 0) {
EM(MACH_ABC(MACH_INVOKE, A1, A2, 0));
}
else if (strcmp(op, "goframe") == 0) {
@@ -2587,7 +2569,7 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
/* Unknown opcode — emit NOP */
EM(MACH_ABC(MACH_NOP, 0, 0, 0));
}
}
} }
/* Sentinel for flat_to_pc */
s.flat_to_pc[n] = s.code_count;
@@ -2726,34 +2708,32 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
/* Scan main's instructions */
{
cJSON *main_instrs = cJSON_GetObjectItemCaseSensitive(main_obj, "instructions");
int mn = main_instrs ? cJSON_GetArraySize(main_instrs) : 0;
for (int i = 0; i < mn; i++) {
cJSON *it = cJSON_GetArrayItem(main_instrs, i);
cJSON *it = main_instrs ? main_instrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 3) continue;
const char *op = cJSON_GetArrayItem(it, 0)->valuestring;
const char *op = it->child->valuestring;
if (!strcmp(op, "function")) {
int child_idx = (int)cJSON_GetArrayItem(it, 2)->valuedouble;
int child_idx = (int)it->child->next->next->valuedouble;
if (child_idx >= 0 && child_idx < func_count)
parent_of[child_idx] = func_count; /* main */
}
}
}
/* Scan each function's instructions */
for (int fi = 0; fi < func_count; fi++) {
cJSON *fobj = cJSON_GetArrayItem(funcs_arr, fi);
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int fi = 0; fobj; fi++, fobj = fobj->next) {
cJSON *finstrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
int fn = finstrs ? cJSON_GetArraySize(finstrs) : 0;
for (int i = 0; i < fn; i++) {
cJSON *it = cJSON_GetArrayItem(finstrs, i);
cJSON *it = finstrs ? finstrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 3) continue;
const char *op = cJSON_GetArrayItem(it, 0)->valuestring;
const char *op = it->child->valuestring;
if (!strcmp(op, "function")) {
int child_idx = (int)cJSON_GetArrayItem(it, 2)->valuedouble;
int child_idx = (int)it->child->next->next->valuedouble;
if (child_idx >= 0 && child_idx < func_count)
parent_of[child_idx] = fi;
}
}
}
} }
/* Build per-function capture sets: for each function F, which of its slots
are captured by descendant functions via get/put. Captured slots must
@@ -2763,17 +2743,16 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
memset(cap_slots, 0, (func_count + 1) * sizeof(int *));
memset(cap_counts, 0, (func_count + 1) * sizeof(int));
for (int fi = 0; fi < func_count; fi++) {
cJSON *fobj = cJSON_GetArrayItem(funcs_arr, fi);
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int fi = 0; fobj; fi++, fobj = fobj->next) {
cJSON *finstrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
int fn = finstrs ? cJSON_GetArraySize(finstrs) : 0;
for (int i = 0; i < fn; i++) {
cJSON *it = cJSON_GetArrayItem(finstrs, i);
cJSON *it = finstrs ? finstrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 4) continue;
const char *op = cJSON_GetArrayItem(it, 0)->valuestring;
const char *op = it->child->valuestring;
if (strcmp(op, "get") && strcmp(op, "put")) continue;
int slot = (int)cJSON_GetArrayItem(it, 2)->valuedouble;
int level = (int)cJSON_GetArrayItem(it, 3)->valuedouble;
int slot = (int)it->child->next->next->valuedouble;
int level = (int)it->child->next->next->next->valuedouble;
/* Walk up parent chain to find the ancestor whose slot is referenced */
int ancestor = fi;
for (int l = 0; l < level && ancestor >= 0; l++)
@@ -2789,7 +2768,7 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
cap_slots[ancestor][cap_counts[ancestor]++] = slot;
}
}
}
} }
/* Compress registers for functions that exceed 8-bit slot limits.
Save remap tables so we can fix get/put parent_slot references. */
@@ -2797,9 +2776,11 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
int *remap_sizes = sys_malloc((func_count + 1) * sizeof(int));
memset(remaps, 0, (func_count + 1) * sizeof(int *));
for (int i = 0; i < func_count; i++)
remaps[i] = mcode_compress_regs(cJSON_GetArrayItem(funcs_arr, i),
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int i = 0; fobj; i++, fobj = fobj->next)
remaps[i] = mcode_compress_regs(fobj,
&remap_sizes[i], cap_slots[i], cap_counts[i]);
}
/* main is stored at index func_count in our arrays */
remaps[func_count] = mcode_compress_regs(main_obj,
&remap_sizes[func_count], cap_slots[func_count], cap_counts[func_count]);
@@ -2811,16 +2792,15 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
sys_free(cap_counts);
/* Fix up get/put parent_slot references using ancestor remap tables */
for (int fi = 0; fi < func_count; fi++) {
cJSON *fobj = cJSON_GetArrayItem(funcs_arr, fi);
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int fi = 0; fobj; fi++, fobj = fobj->next) {
cJSON *finstrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
int fn = finstrs ? cJSON_GetArraySize(finstrs) : 0;
for (int i = 0; i < fn; i++) {
cJSON *it = cJSON_GetArrayItem(finstrs, i);
cJSON *it = finstrs ? finstrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 4) continue;
const char *op = cJSON_GetArrayItem(it, 0)->valuestring;
const char *op = it->child->valuestring;
if (strcmp(op, "get") && strcmp(op, "put")) continue;
int level = (int)cJSON_GetArrayItem(it, 3)->valuedouble;
int level = (int)it->child->next->next->next->valuedouble;
/* Walk up parent chain 'level' times to find ancestor */
int ancestor = fi;
for (int l = 0; l < level && ancestor >= 0; l++) {
@@ -2829,14 +2809,14 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
if (ancestor < 0) continue; /* unknown parent — leave as is */
int *anc_remap = remaps[ancestor];
if (!anc_remap) continue; /* ancestor wasn't compressed */
cJSON *slot_item = cJSON_GetArrayItem(it, 2);
cJSON *slot_item = it->child->next->next;
int old_slot = (int)slot_item->valuedouble;
if (old_slot >= 0 && old_slot < remap_sizes[ancestor]) {
int new_slot = anc_remap[old_slot];
cJSON_SetNumberValue(slot_item, new_slot);
}
}
}
} }
/* Free remap tables */
for (int i = 0; i <= func_count; i++)
@@ -2850,8 +2830,10 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
if (func_count > 0) {
compiled = sys_malloc(func_count * sizeof(MachCode *));
memset(compiled, 0, func_count * sizeof(MachCode *));
for (int i = 0; i < func_count; i++)
compiled[i] = mcode_lower_func(cJSON_GetArrayItem(funcs_arr, i), filename);
{ cJSON *fobj = funcs_arr->child;
for (int i = 0; fobj; i++, fobj = fobj->next)
compiled[i] = mcode_lower_func(fobj, filename);
}
}
/* Compile main */
@@ -3147,6 +3129,29 @@ JSValue JS_RunMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue
return result;
}
JSValue JS_RunMachMcode(JSContext *ctx, const char *json_str, size_t len, JSValue env) {
(void)len;
cJSON *mcode = cJSON_Parse(json_str);
if (!mcode)
return JS_ThrowSyntaxError(ctx, "failed to parse mcode JSON");
MachCode *mc = mach_compile_mcode(mcode);
cJSON_Delete(mcode);
if (!mc)
return JS_ThrowInternalError(ctx, "mcode compilation failed");
JSGCRef env_ref;
JS_PushGCRef(ctx, &env_ref);
env_ref.val = env;
JSCodeRegister *code = JS_LoadMachCode(ctx, mc, env_ref.val);
JS_FreeMachCode(mc);
JSValue result = JS_CallRegisterVM(ctx, code, ctx->global_obj, 0, NULL, env_ref.val, JS_NULL);
JS_PopGCRef(ctx, &env_ref);
return result;
}
void JS_DumpMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue env) {
MachCode *mc = JS_DeserializeMachCode(data, size);
if (!mc) {

View File

@@ -9,6 +9,15 @@
#include "quickjs-internal.h"
#include <math.h>
/* Non-inline wrappers for static inline functions in quickjs.h */
JSValue qbe_new_float64(JSContext *ctx, double d) {
return __JS_NewFloat64(ctx, d);
}
JSValue qbe_new_string(JSContext *ctx, const char *str) {
return JS_NewString(ctx, str);
}
/* Comparison op IDs (must match qbe.cm float_cmp_op_id values) */
enum {
QBE_CMP_EQ = 0,
@@ -42,6 +51,16 @@ JSValue qbe_float_add(JSContext *ctx, JSValue a, JSValue b) {
return qbe_float_binop(ctx, a, b, op_add);
}
/* Generic add: concat if both text, float add if both numeric, else type error */
JSValue cell_rt_add(JSContext *ctx, JSValue a, JSValue b) {
if (JS_IsText(a) && JS_IsText(b))
return JS_ConcatString(ctx, a, b);
if (JS_IsNumber(a) && JS_IsNumber(b))
return qbe_float_binop(ctx, a, b, op_add);
JS_ThrowTypeError(ctx, "cannot add incompatible types");
return JS_NULL;
}
JSValue qbe_float_sub(JSContext *ctx, JSValue a, JSValue b) {
return qbe_float_binop(ctx, a, b, op_sub);
}
@@ -277,19 +296,37 @@ void cell_rt_put_closure(JSContext *ctx, void *fp, JSValue val, int64_t depth,
typedef JSValue (*cell_compiled_fn)(JSContext *ctx, void *fp);
/* Table mapping fn_idx → outer_fp at creation time.
Valid for single-threaded, non-recursive closure patterns. */
#define MAX_QBE_FUNCTIONS 256
static void *g_outer_fp[MAX_QBE_FUNCTIONS];
/* Per-module function registry.
Each native .cm module gets its own dylib. When a module creates closures
via cell_rt_make_function, we record the dylib handle so the trampoline
can look up the correct cell_fn_N in the right dylib. */
#define MAX_NATIVE_FN 4096
static struct {
void *dl_handle;
int fn_idx;
void *outer_fp;
} g_native_fn_registry[MAX_NATIVE_FN];
static int g_native_fn_count = 0;
/* Set before executing a native module's cell_main */
static void *g_current_dl_handle = NULL;
static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv, int magic) {
char name[64];
snprintf(name, sizeof(name), "cell_fn_%d", magic);
if (magic < 0 || magic >= g_native_fn_count)
return JS_ThrowTypeError(ctx, "invalid native function id %d", magic);
cell_compiled_fn fn = (cell_compiled_fn)dlsym(RTLD_DEFAULT, name);
void *handle = g_native_fn_registry[magic].dl_handle;
int fn_idx = g_native_fn_registry[magic].fn_idx;
char name[64];
snprintf(name, sizeof(name), "cell_fn_%d", fn_idx);
cell_compiled_fn fn = (cell_compiled_fn)dlsym(handle, name);
if (!fn)
return JS_ThrowTypeError(ctx, "native function %s not found", name);
return JS_ThrowTypeError(ctx, "native function %s not found in dylib", name);
/* Allocate frame: slot 0 = this, slots 1..argc = args */
JSValue frame[512];
@@ -299,17 +336,22 @@ static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val,
frame[1 + i] = argv[i];
/* Link to outer frame for closure access */
if (magic >= 0 && magic < MAX_QBE_FUNCTIONS)
frame[QBE_FRAME_OUTER_SLOT] = (JSValue)(uintptr_t)g_outer_fp[magic];
frame[QBE_FRAME_OUTER_SLOT] = (JSValue)(uintptr_t)g_native_fn_registry[magic].outer_fp;
return fn(ctx, frame);
}
JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp) {
if (fn_idx >= 0 && fn_idx < MAX_QBE_FUNCTIONS)
g_outer_fp[fn_idx] = outer_fp;
if (g_native_fn_count >= MAX_NATIVE_FN)
return JS_ThrowTypeError(ctx, "too many native functions (max %d)", MAX_NATIVE_FN);
int global_id = g_native_fn_count++;
g_native_fn_registry[global_id].dl_handle = g_current_dl_handle;
g_native_fn_registry[global_id].fn_idx = (int)fn_idx;
g_native_fn_registry[global_id].outer_fp = outer_fp;
return JS_NewCFunction2(ctx, (JSCFunction *)cell_fn_trampoline, "native_fn",
255, JS_CFUNC_generic_magic, (int)fn_idx);
255, JS_CFUNC_generic_magic, global_id);
}
/* --- Frame-based function calling --- */
@@ -333,7 +375,7 @@ void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) {
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
int nr_slots = (int)objhdr_cap56(fr->hdr);
int nr_slots = (int)objhdr_cap56(fr->header);
int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
/* Copy args to C stack */
@@ -351,8 +393,8 @@ JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int64_t nargs) {
return cell_rt_frame(ctx, fn, nargs);
}
void cell_rt_goinvoke(JSContext *ctx, JSValue frame_val) {
cell_rt_invoke(ctx, frame_val);
JSValue cell_rt_goinvoke(JSContext *ctx, JSValue frame_val) {
return cell_rt_invoke(ctx, frame_val);
}
/* --- Array push/pop --- */
@@ -423,6 +465,15 @@ JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b) {
return JS_NewBool(ctx, a != b);
}
/* --- Type check: is_proxy (function with arity 2) --- */
int cell_rt_is_proxy(JSContext *ctx, JSValue v) {
(void)ctx;
if (!JS_IsFunction(v)) return 0;
JSFunction *fn = JS_VALUE_GET_FUNCTION(v);
return fn->length == 2;
}
/* --- Disruption --- */
void cell_rt_disrupt(JSContext *ctx) {
@@ -430,20 +481,35 @@ void cell_rt_disrupt(JSContext *ctx) {
}
/* --- Module entry point ---
Called as symbol(ctx) by os.dylib_symbol. Looks up cell_main
in the loaded dylib, builds a heap-allocated frame (so closures
can reference it after the module returns), and runs the module body. */
Loads a native .cm module from a dylib handle.
Looks up cell_main, builds a heap-allocated frame, sets
g_current_dl_handle so closures register in the right module. */
JSValue cell_rt_module_entry(JSContext *ctx) {
cell_compiled_fn fn = (cell_compiled_fn)dlsym(RTLD_DEFAULT, "cell_main");
JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle) {
cell_compiled_fn fn = (cell_compiled_fn)dlsym(dl_handle, "cell_main");
if (!fn)
return JS_ThrowTypeError(ctx, "cell_main not found in loaded dylib");
return JS_ThrowTypeError(ctx, "cell_main not found in native module dylib");
/* Set current handle so cell_rt_make_function registers closures
against this module's dylib */
void *prev_handle = g_current_dl_handle;
g_current_dl_handle = dl_handle;
/* Heap-allocate so closures created in cell_main can reference
this frame after the module entry returns. */
JSValue *frame = calloc(512, sizeof(JSValue));
if (!frame)
if (!frame) {
g_current_dl_handle = prev_handle;
return JS_ThrowTypeError(ctx, "frame allocation failed");
}
return fn(ctx, frame);
JSValue result = fn(ctx, frame);
g_current_dl_handle = prev_handle;
return result;
}
/* Backward-compat: uses RTLD_DEFAULT (works when dylib opened with RTLD_GLOBAL) */
JSValue cell_rt_module_entry(JSContext *ctx) {
void *handle = dlopen(NULL, RTLD_LAZY);
return cell_rt_native_module_load(ctx, handle);
}

View File

@@ -156,7 +156,8 @@ static const JSCFunctionListEntry js_actor_funcs[] = {
};
JSValue js_actor_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js,mod,js_actor_funcs,countof(js_actor_funcs));
return mod;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_actor_funcs, countof(js_actor_funcs));
JS_RETURN(mod.val);
}

View File

@@ -95,9 +95,12 @@
/* test the GC by forcing it before each object allocation */
// #define FORCE_GC_AT_MALLOC
#define POISON_HEAP
/* POISON_HEAP: Use ASan's memory poisoning to detect stale pointer access */
#ifdef POISON_HEAP
#include <sys/mman.h>
#include <unistd.h>
/* HEAP_CHECK: validate heap pointers at JS_VALUE_GET_* macros */
// #define HEAP_CHECK
#if defined(__has_feature)
#if __has_feature(address_sanitizer)
#define HAVE_ASAN 1
@@ -106,17 +109,6 @@
#define HAVE_ASAN 1
#endif
#ifdef HAVE_ASAN
#include <sanitizer/asan_interface.h>
#define gc_poison_region(addr, size) __asan_poison_memory_region((addr), (size))
#define gc_unpoison_region(addr, size) __asan_unpoison_memory_region((addr), (size))
#else
/* Fallback: no-op when not building with ASan */
#define gc_poison_region(addr, size) ((void)0)
#define gc_unpoison_region(addr, size) ((void)0)
#endif
#endif /* POISON_HEAP */
#ifdef HAVE_ASAN
static struct JSContext *__asan_js_ctx;
#endif
@@ -294,23 +286,32 @@ typedef enum JSErrorEnum {
#define __exception __attribute__ ((warn_unused_result))
/* Forward declaration for bytecode freeing */
struct JSFunctionBytecode;
#define JS_VALUE_GET_BLOB(v) ((JSBlob *)JS_VALUE_GET_PTR (v))
#define JS_VALUE_GET_CODE(v) (JS_VALUE_GET_PTR (v))
#ifdef HEAP_CHECK
void heap_check_fail(void *ptr, struct JSContext *ctx);
#define JS_VALUE_GET_ARRAY(v) ((JSArray *)heap_check_chase(ctx, v))
#define JS_VALUE_GET_OBJ(v) ((JSRecord *)heap_check_chase(ctx, v))
#define JS_VALUE_GET_TEXT(v) ((JSText *)heap_check_chase(ctx, v))
#define JS_VALUE_GET_FUNCTION(v) ((JSFunction *)heap_check_chase(ctx, v))
#define JS_VALUE_GET_FRAME(v) ((JSFrame *)heap_check_chase(ctx, v))
#define JS_VALUE_GET_STRING(v) ((JSText *)heap_check_chase(ctx, v))
#define JS_VALUE_GET_RECORD(v) ((JSRecord *)heap_check_chase(ctx, v))
#else
#define JS_VALUE_GET_ARRAY(v) ((JSArray *)chase (v))
#define JS_VALUE_GET_OBJ(v) ((JSRecord *)chase (v))
#define JS_VALUE_GET_TEXT(v) ((JSText *)chase (v))
#define JS_VALUE_GET_BLOB(v) ((JSBlob *)JS_VALUE_GET_PTR (v))
#define JS_VALUE_GET_FUNCTION(v) ((JSFunction *)chase (v))
#define JS_VALUE_GET_FRAME(v) ((JSFrame *)chase (v))
#define JS_VALUE_GET_CODE(v) ((JSFunctionBytecode *)JS_VALUE_GET_PTR (v))
#define JS_VALUE_GET_STRING(v) ((JSText *)chase (v))
#define JS_VALUE_GET_RECORD(v) ((JSRecord *)chase (v))
#endif
/* Compatibility: JS_TAG_STRING is an alias for text type checks */
#define JS_TAG_STRING JS_TAG_STRING_IMM
/* JS_TAG_FUNCTION doesn't exist in new encoding - use JS_IsFunction check instead */
#define JS_TAG_FUNCTION 0xFE /* dummy value, never matches any tag */
/* JS_ThrowMemoryError is an alias for JS_ThrowOutOfMemory */
#define JS_ThrowMemoryError(ctx) JS_ThrowOutOfMemory(ctx)
@@ -319,8 +320,6 @@ static inline objhdr_t objhdr_set_cap56 (objhdr_t h, uint64_t cap) {
return (h & 0xFF) | ((cap & OBJHDR_CAP_MASK) << OBJHDR_CAP_SHIFT);
}
typedef enum OPCodeEnum OPCodeEnum;
/* ============================================================
Buddy Allocator for Actor Memory Blocks
============================================================ */
@@ -331,9 +330,8 @@ typedef enum OPCodeEnum OPCodeEnum;
#else
#define BUDDY_MIN_ORDER 9 /* 512B minimum on 32-bit */
#endif
#define BUDDY_MAX_ORDER 28 /* 256MB maximum */
#define BUDDY_LEVELS (BUDDY_MAX_ORDER - BUDDY_MIN_ORDER + 1)
#define BUDDY_POOL_SIZE (1ULL << BUDDY_MAX_ORDER)
#define BUDDY_MAX_LEVELS 40 /* supports pools up to 2^(BUDDY_MIN_ORDER+39) */
#define BUDDY_DEFAULT_POOL (1ULL << 24) /* 16MB initial pool */
typedef struct BuddyBlock {
struct BuddyBlock *next;
@@ -342,15 +340,26 @@ typedef struct BuddyBlock {
uint8_t is_free;
} BuddyBlock;
typedef struct BuddyPool {
struct BuddyPool *next;
uint8_t *base;
size_t total_size;
uint8_t max_order; /* log2(total_size) */
uint32_t alloc_count; /* outstanding allocations */
BuddyBlock *free_lists[BUDDY_MAX_LEVELS];
} BuddyPool;
typedef struct BuddyAllocator {
uint8_t *base; /* 256MB base address */
size_t total_size; /* 256MB */
BuddyBlock *free_lists[BUDDY_LEVELS];
uint8_t initialized;
BuddyPool *pools; /* linked list, newest first */
size_t next_pool_size; /* next pool doubles from this */
size_t initial_size; /* starting pool size */
size_t cap; /* 0 = no cap */
size_t total_mapped; /* sum of all pool sizes */
} BuddyAllocator;
/* Forward declarations for buddy allocator functions */
static void buddy_destroy (BuddyAllocator *b);
static size_t buddy_max_block (BuddyAllocator *b);
/* controls a host of contexts, handing out memory and scheduling */
struct JSRuntime {
@@ -367,13 +376,7 @@ struct JSClass {
#define JS_MODE_BACKTRACE_BARRIER \
(1 << 3) /* stop backtrace before this frame */
typedef struct JSFrameRegister {
objhdr_t hdr; // capacity in this is the total number of words of the object, including the 4 words of overhead and all slots
JSValue function; // JSFunction, function object being invoked
JSValue caller; // JSFrameRegister, the frame that called this one
JSValue address; // address of the instruction in the code that should be executed upon return
JSValue slots[]; // inline memory. order is [this][input args][closed over vars][non closed over vars][temporaries]
} JSFrameRegister; /// extra note: when this frame returns, caller should be set to 0. If caller is found to be 0, then the GC can reduce this frame's slots down to [this][input_args][closed over vars]; if no closed over vars it can be totally removed; may happen naturally in GC since it would have no refs?
/* JSFrameRegister is now an alias for JSFrame — see JSFrame definition below */
/* ============================================================
Register-Based VM Data Structures
@@ -405,6 +408,72 @@ typedef struct { uint16_t line; uint16_t col; } MachLineEntry;
#define MACH_GET_sBx(i) ((int16_t)((i) >> 16))
#define MACH_GET_sJ(i) ((int32_t)((i) & 0xFFFFFF00) >> 8)
/* ============================================================
GC Safepoint Classification for the MACH VM Dispatch Loop
============================================================
Every opcode falls into one of three categories:
[P] Pure inline — never calls C, never allocates. No GC possible.
No frame re-derivation needed after execution.
[N] Non-allocating C call — calls a C function that is guaranteed
to never allocate (e.g. JS_ToBool, js_string_compare_value).
No frame re-derivation needed.
[G] GC safepoint — calls C that may allocate, triggering GC.
After the call, all heap pointers (including `frame`) MUST be
re-derived via: frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
The 18 C entry points that can allocate (GC safepoints):
1. JS_GetProperty — key interning for string keys >7 chars
2. JS_SetProperty — rec_resize when record grows
3. JS_GetPropertyNumber — text substring extraction
4. JS_SetPropertyNumber — array grow
5. JS_NewObject — allocates record
6. JS_NewArray / JS_NewArrayLen — allocates array
7. js_new_register_function — allocates function object (closure)
8. alloc_frame_register — allocates frame via js_mallocz
9. js_call_c_function — arbitrary C code
10. JS_CallInternal — arbitrary bytecode
11. JS_Call — arbitrary call
12. JS_ConcatString — allocates new string
13. JS_ArrayPush — array grow
14. JS_ArrayPop — reads, but frame refresh needed
15. JS_DeleteProperty — mutates record
16. JS_HasProperty — complex traversal
17. js_regexp_constructor — allocates regex
18. reg_vm_binop — polymorphic dispatch (legacy opcodes)
Opcode-level classification:
[P] LOADK, LOADI, LOADNULL, LOADTRUE, LOADFALSE, MOVE, NOP
[P] ADD_INT..MOD_INT, NEG_INT, ADD_FLOAT..MOD_FLOAT, NEG_FLOAT
[P] EQ_INT..GE_INT, EQ_FLOAT..GE_FLOAT, EQ_BOOL, NE_BOOL
[P] IS_IDENTICAL, IS_INT, IS_NUM, IS_TEXT, IS_BOOL, IS_NULL
[P] IS_ARRAY, IS_FUNC, IS_RECORD, IS_STONE, IS_PROXY
[P] NOT, AND, OR, BITNOT, BITAND, BITOR, BITXOR
[P] JMP, JMPTRUE, JMPFALSE, JMPNULL, JMPNOTNULL
[P] RETURN, RETNIL, SETARG, GETUP, SETUP, DISRUPT, THROW
[P] LENGTH (array + imm-ASCII fast path only; text/blob fallback is [G])
[N] EQ_TEXT..GE_TEXT (js_string_compare_value — no allocation)
[N] LNOT (JS_ToBool — no allocation)
[G] ADD..USHR, NEG, INC, DEC, EQ..GE (legacy: reg_vm_binop)
[G] EQ_TOL, NEQ_TOL (tolerance comparison, may fall back)
[G] CONCAT (JS_ConcatString)
[G] GETFIELD, SETFIELD, GETINDEX, SETINDEX (property access)
[G] LOAD_FIELD, STORE_FIELD, LOAD_INDEX, STORE_INDEX
[G] LOAD_DYNAMIC, STORE_DYNAMIC
[G] GETNAME, GETINTRINSIC, GETENV, SET_VAR
[G] NEWOBJECT, NEWRECORD, NEWARRAY (object/array creation)
[G] CLOSURE (js_new_register_function)
[G] FRAME, GOFRAME (alloc_frame_register)
[G] INVOKE, GOINVOKE (function calls)
[G] PUSH (JS_ArrayPush), POP (JS_ArrayPop)
[G] DELETE, DELETEINDEX (JS_DeleteProperty)
[G] HASPROP, IN (JS_HasProperty)
[G] REGEXP (js_regexp_constructor)
============================================================ */
typedef enum MachOpcode {
/* === Legacy opcodes (used by existing .mach files) === */
@@ -494,22 +563,6 @@ typedef enum MachOpcode {
/* === New mcode-derived opcodes (1:1 mapping to mcode IR) === */
/* Typed integer arithmetic (ABC) */
MACH_ADD_INT, /* R(A) = R(B) + R(C) — int, overflow → float */
MACH_SUB_INT, /* R(A) = R(B) - R(C) — int */
MACH_MUL_INT, /* R(A) = R(B) * R(C) — int */
MACH_DIV_INT, /* R(A) = R(B) / R(C) — int */
MACH_MOD_INT, /* R(A) = R(B) % R(C) — int */
MACH_NEG_INT, /* R(A) = -R(B) — int (AB) */
/* Typed float arithmetic (ABC) */
MACH_ADD_FLOAT, /* R(A) = R(B) + R(C) — float */
MACH_SUB_FLOAT, /* R(A) = R(B) - R(C) — float */
MACH_MUL_FLOAT, /* R(A) = R(B) * R(C) — float */
MACH_DIV_FLOAT, /* R(A) = R(B) / R(C) — float */
MACH_MOD_FLOAT, /* R(A) = R(B) % R(C) — float */
MACH_NEG_FLOAT, /* R(A) = -R(B) — float (AB) */
/* Text */
MACH_CONCAT, /* R(A) = R(B) ++ R(C) — string concatenation */
@@ -550,7 +603,6 @@ typedef enum MachOpcode {
MACH_IS_TEXT, /* R(A) = is_text(R(B)) */
MACH_IS_BOOL, /* R(A) = is_bool(R(B)) */
MACH_IS_NULL, /* R(A) = is_null(R(B)) */
MACH_TYPEOF, /* R(A) = typeof(R(B)) */
/* Logical (mcode-style) */
MACH_NOT, /* R(A) = !R(B) — boolean not (AB) */
@@ -592,7 +644,13 @@ typedef enum MachOpcode {
/* Misc */
MACH_IN, /* R(A) = (R(B) in R(C)) — has property (ABC) */
/* Extended type checks (AB) */
MACH_IS_ARRAY, /* R(A) = is_array(R(B)) */
MACH_IS_FUNC, /* R(A) = is_function(R(B)) */
MACH_IS_RECORD, /* R(A) = is_object(R(B)) */
MACH_IS_STONE, /* R(A) = is_stone(R(B)) */
MACH_LENGTH, /* R(A) = length(R(B)) — array/text/blob length */
MACH_IS_PROXY, /* R(A) = is_function(R(B)) && R(B).length == 2 */
MACH_OP_COUNT
@@ -658,18 +716,6 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = {
[MACH_NEQ_TOL] = "neq_tol",
[MACH_NOP] = "nop",
/* Mcode-derived */
[MACH_ADD_INT] = "add_int",
[MACH_SUB_INT] = "sub_int",
[MACH_MUL_INT] = "mul_int",
[MACH_DIV_INT] = "div_int",
[MACH_MOD_INT] = "mod_int",
[MACH_NEG_INT] = "neg_int",
[MACH_ADD_FLOAT] = "add_float",
[MACH_SUB_FLOAT] = "sub_float",
[MACH_MUL_FLOAT] = "mul_float",
[MACH_DIV_FLOAT] = "div_float",
[MACH_MOD_FLOAT] = "mod_float",
[MACH_NEG_FLOAT] = "neg_float",
[MACH_CONCAT] = "concat",
[MACH_EQ_INT] = "eq_int",
[MACH_NE_INT] = "ne_int",
@@ -697,7 +743,6 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = {
[MACH_IS_TEXT] = "is_text",
[MACH_IS_BOOL] = "is_bool",
[MACH_IS_NULL] = "is_null",
[MACH_TYPEOF] = "typeof",
[MACH_NOT] = "not",
[MACH_AND] = "and",
[MACH_OR] = "or",
@@ -721,7 +766,12 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = {
[MACH_DISRUPT] = "disrupt",
[MACH_SET_VAR] = "set_var",
[MACH_IN] = "in",
/* Extended type checks */
[MACH_IS_ARRAY] = "is_array",
[MACH_IS_FUNC] = "is_func",
[MACH_IS_RECORD] = "is_record",
[MACH_IS_STONE] = "is_stone",
[MACH_LENGTH] = "length",
[MACH_IS_PROXY] = "is_proxy",
};
@@ -754,37 +804,17 @@ typedef struct JSCodeRegister {
} JSCodeRegister;
/* Frame for closures - used by link-time relocation model where closures
reference outer frames via (depth, slot) addressing.
Stores function as JSValue to survive GC movements. */
/* Unified frame struct — used by the register VM and closures.
All fields are JSValues so the GC can scan them uniformly. */
typedef struct JSFrame {
objhdr_t header; /* OBJ_FRAME, cap56 = slot count */
JSValue function; /* JSValue for GC safety (use JS_VALUE_GET_FUNCTION) */
JSValue caller; /* JSValue for GC safety (unused currently) */
uint32_t return_pc;
JSValue function; /* JSFunction, function object being invoked */
JSValue caller; /* JSFrame, the frame that called this one */
JSValue address; /* return PC stored as JS_NewInt32 */
JSValue slots[]; /* [this][args][captured][locals][temps] */
} JSFrame;
/* Execution state returned by vm_execute_frame */
typedef enum {
VM_EXEC_NORMAL, /* Continue executing current frame */
VM_EXEC_RETURN, /* Frame returned, pop and resume caller */
VM_EXEC_CALL, /* Need to push new frame for call */
VM_EXEC_EXCEPTION, /* Exception thrown, unwind frames */
} VMExecState;
/* Call info for frame push */
typedef struct {
JSValue func_obj;
JSValue this_obj;
int argc;
JSValue *argv;
const uint8_t *ret_pc;
int ret_sp_offset;
int call_argc;
int call_has_this;
int is_tail_call;
} VMCallInfo;
typedef JSFrame JSFrameRegister;
static inline objhdr_t objhdr_set_s (objhdr_t h, bool s) {
return s ? (h | OBJHDR_S_MASK) : (h & ~OBJHDR_S_MASK);
@@ -815,6 +845,50 @@ static inline objhdr_t *chase(JSValue v) {
return oh;
}
/* Resolve a forward pointer in-place. After rec_resize the old record
gets a forward header; any JSValue slot still pointing at it must be
updated to follow the chain to the live copy. */
static inline void mach_resolve_forward(JSValue *slot) {
if (JS_IsPtr(*slot)) {
objhdr_t h = *(objhdr_t *)JS_VALUE_GET_PTR(*slot);
if (objhdr_type(h) == OBJ_FORWARD) {
*slot = JS_MKPTR(objhdr_fwd_ptr(h));
}
}
}
/* Inline type checks — use these in the VM dispatch loop to avoid
function call overhead. The public API (JS_IsArray etc. in quickjs.h)
remains non-inline for external callers; those wrappers live in runtime.c. */
static inline JS_BOOL mist_is_gc_object(JSValue v) {
return JS_IsPtr(v);
}
static inline JS_BOOL mist_is_array(JSValue v) {
return mist_is_gc_object(v) && objhdr_type(*chase(v)) == OBJ_ARRAY;
}
static inline JS_BOOL mist_is_record(JSValue v) {
return mist_is_gc_object(v) && objhdr_type(*chase(v)) == OBJ_RECORD;
}
static inline JS_BOOL mist_is_function(JSValue v) {
return mist_is_gc_object(v) && objhdr_type(*chase(v)) == OBJ_FUNCTION;
}
static inline JS_BOOL mist_is_text(JSValue v) {
return MIST_IsImmediateASCII(v)
|| (mist_is_gc_object(v) && objhdr_type(*chase(v)) == OBJ_TEXT);
}
static inline JS_BOOL mist_is_blob(JSValue v) {
return mist_is_gc_object(v) && objhdr_type(*chase(v)) == OBJ_BLOB;
}
static inline JS_BOOL mist_is_stone(JSValue v) {
return !mist_is_gc_object(v) || objhdr_s(*chase(v));
}
/* Intrinsic array type - tagged as JS_TAG_PTR with mist_hdr type OBJ_ARRAY */
typedef struct JSArray {
@@ -823,7 +897,8 @@ typedef struct JSArray {
JSValue values[]; /* inline flexible array member */
} JSArray;
/* JSBlob - binary data per memory.md */
/* JSBlob — not allocated on GC heap (blobs use JSRecord + opaque).
Struct kept for reference; gc_object_size/gc_scan_object do not handle OBJ_BLOB. */
typedef struct JSBlob {
objhdr_t mist_hdr;
word_t length;
@@ -846,7 +921,7 @@ typedef slot JSRecordEntry;
typedef struct JSRecord {
objhdr_t mist_hdr;
struct JSRecord *proto;
JSValue proto; /* prototype as JSValue (JS_NULL if none) */
word_t len; /* number of entries */
slot slots[]; /* slots[0] reserved: key low32=class_id, key high32=rec_id, val=opaque */
} JSRecord;
@@ -991,6 +1066,13 @@ static JS_BOOL JSText_equal_ascii (const JSText *text, JSValue imm) {
enough to call the interrupt callback often. */
#define JS_INTERRUPT_COUNTER_INIT 10000
/* Auto-rooted C call argv — GC updates values in-place */
typedef struct CCallRoot {
JSValue *argv; /* points to C-stack-local array */
int argc;
struct CCallRoot *prev; /* stack for nesting (C -> JS -> C -> ...) */
} CCallRoot;
struct JSContext {
JSRuntime *rt;
@@ -1000,19 +1082,24 @@ struct JSContext {
uint8_t *heap_end; /* end of block */
size_t current_block_size; /* current block size (64KB initially) */
size_t next_block_size; /* doubles if <10% recovered after GC */
int gc_poor_streak; /* consecutive poor-recovery GC cycles */
/* Stone arena - permanent immutable allocations */
uint8_t *stone_base; /* stone arena base */
uint8_t *stone_free; /* stone arena bump pointer */
uint8_t *stone_end; /* stone arena end */
/* GC stats (lightweight, always on) */
uint64_t gc_count; /* number of GC cycles */
uint64_t gc_bytes_copied; /* total bytes copied across all GCs */
/* Stone text intern table */
void *st_pages; /* stone page list for large allocations */
uint32_t *st_text_hash; /* hash table (slot -> id) */
JSText **st_text_array; /* array of JSText pointers indexed by id */
uint32_t st_text_size; /* hash table size (power of 2) */
uint32_t st_text_count; /* number of interned texts */
uint32_t st_text_resize; /* threshold for resize */
/* Constant text pool — compilation constants */
uint8_t *ct_base; /* pool base */
uint8_t *ct_free; /* pool bump pointer */
uint8_t *ct_end; /* pool end */
/* Constant text intern table */
void *ct_pages; /* page list for large allocations */
uint32_t *ct_hash; /* hash table (slot -> id) */
JSText **ct_array; /* array of JSText pointers indexed by id */
uint32_t ct_size; /* hash table size (power of 2) */
uint32_t ct_count; /* number of interned texts */
uint32_t ct_resize_threshold; /* threshold for resize */
uint16_t binary_object_count;
int binary_object_size;
@@ -1022,6 +1109,8 @@ struct JSContext {
JSGCRef *top_gc_ref; /* used to reference temporary GC roots (stack top) */
JSGCRef *last_gc_ref; /* used to reference temporary GC roots (list) */
JSLocalRef *top_local_ref; /* for JS_LOCAL macro - GC updates C locals through pointers */
CCallRoot *c_call_root; /* stack of auto-rooted C call argv arrays */
int class_count; /* size of class_array and class_proto */
JSClass *class_array;
@@ -1057,9 +1146,6 @@ struct JSContext {
// todo: want this, but should be a simple increment/decrement counter while frames are pushed
size_t stack_depth;
size_t stack_limit;
/* Parser state (for GC to scan cpool during parsing) */
struct JSFunctionDef *current_parse_fd;
};
/* ============================================================
@@ -1084,22 +1170,22 @@ static inline const char *JS_KeyGetStr (JSContext *ctx, char *buf, size_t buf_si
/* ============================================================
Stone Arena Functions
Constant Text Pool Functions
============================================================ */
/* Stone page for large allocations */
typedef struct StonePage {
struct StonePage *next;
/* Constant text page for large allocations */
typedef struct CTPage {
struct CTPage *next;
size_t size;
uint8_t data[];
} StonePage;
} CTPage;
/* Initial stone text table size */
#define ST_TEXT_INITIAL_SIZE 256
/* Initial constant text table size */
#define CT_INITIAL_SIZE 256
/* Allocate from stone arena (permanent, immutable memory) */
/* Allocate from constant text pool */
/* Resize the stone text intern hash table */
/* Resize the constant text intern hash table */
/* Realloc with slack reporting (for bump allocator)
WARNING: This function is NOT GC-safe! The caller must protect the source
@@ -1115,11 +1201,30 @@ static int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size);
/* JS_MarkValue - mark a value during GC traversal.
With copying GC, this is a no-op as we discover live objects by tracing. */
/* Helper to check if a pointer is in stone memory */
static inline int is_stone_ptr (JSContext *ctx, void *ptr) {
return (uint8_t *)ptr >= ctx->stone_base && (uint8_t *)ptr < ctx->stone_end;
/* Helper to check if a pointer is in constant text pool memory */
static inline int is_ct_ptr (JSContext *ctx, void *ptr) {
uint8_t *p = (uint8_t *)ptr;
if (p >= ctx->ct_base && p < ctx->ct_end) return 1;
/* Also check overflow pages */
CTPage *page = (CTPage *)ctx->ct_pages;
while (page) {
if (p >= page->data && p < page->data + page->size) return 1;
page = page->next;
}
return 0;
}
#ifdef HEAP_CHECK
static inline objhdr_t *heap_check_chase(JSContext *ctx, JSValue v) {
objhdr_t *oh = chase(v);
uint8_t *p = (uint8_t *)oh;
if (!((p >= ctx->heap_base && p < ctx->heap_free) ||
(p >= ctx->ct_base && p < ctx->ct_end)))
heap_check_fail(oh, ctx);
return oh;
}
#endif
/* Intern a UTF-32 string as a stone text, returning a JSValue string */
/* Create a stoned, interned key from a UTF-8 C string.
@@ -1153,11 +1258,9 @@ typedef struct JSRegExp {
#define obj_is_stone(rec) objhdr_s ((rec)->mist_hdr)
#define obj_set_stone(rec) ((rec)->mist_hdr = objhdr_set_s ((rec)->mist_hdr, true))
#define JS_VALUE_GET_RECORD(v) ((JSRecord *)chase (v))
/* Get prototype from object (works for both JSRecord and JSRecord since they
* share layout) */
#define JS_OBJ_GET_PROTO(p) ((JSRecord *)((JSRecord *)(p))->proto)
#define JS_OBJ_GET_PROTO(p) (JS_IsNull(((JSRecord *)(p))->proto) ? NULL : (JSRecord *)JS_VALUE_GET_PTR(((JSRecord *)(p))->proto))
/* Initial capacity for new records (mask = 7, 8 slots total) */
#define JS_RECORD_INITIAL_MASK 7
@@ -1226,11 +1329,6 @@ typedef struct JSFunction {
uint8_t cproto;
int16_t magic;
} cfunc;
struct {
struct JSFunctionBytecode *function_bytecode;
JSValue outer_frame; /* JSFrame JSValue, lexical parent for closures */
JSValue env_record; /* stone record, module environment */
} func;
struct {
JSCodeRegister *code; /* compiled register code (off-heap) */
JSValue env_record; /* stone record, module environment */
@@ -1239,150 +1337,11 @@ typedef struct JSFunction {
} u;
} JSFunction;
typedef struct JSClosureVar {
uint8_t is_local : 1;
uint8_t is_arg : 1;
uint8_t is_const : 1;
uint8_t is_lexical : 1;
uint8_t var_kind : 4; /* see JSVarKindEnum */
/* 8 bits available */
uint16_t var_idx; /* is_local = TRUE: index to a normal variable of the
parent function. otherwise: index to a closure
variable of the parent function */
JSValue var_name;
} JSClosureVar;
#define ARG_SCOPE_INDEX 1
#define ARG_SCOPE_END (-2)
typedef struct JSVarScope {
int parent; /* index into fd->scopes of the enclosing scope */
int first; /* index into fd->vars of the last variable in this scope */
} JSVarScope;
typedef enum {
/* XXX: add more variable kinds here instead of using bit fields */
JS_VAR_NORMAL,
JS_VAR_FUNCTION_DECL, /* lexical var with function declaration */
JS_VAR_NEW_FUNCTION_DECL, /* lexical var with async/generator
function declaration */
JS_VAR_CATCH,
JS_VAR_FUNCTION_NAME, /* function expression name */
} JSVarKindEnum;
/* XXX: could use a different structure in bytecode functions to save
memory */
typedef struct JSVarDef {
JSValue var_name;
/* index into fd->scopes of this variable lexical scope */
int scope_level;
/* during compilation:
- if scope_level = 0: scope in which the variable is defined
- if scope_level != 0: index into fd->vars of the next
variable in the same or enclosing lexical scope
in a bytecode function:
index into fd->vars of the next
variable in the same or enclosing lexical scope
*/
int scope_next;
uint8_t is_const : 1;
uint8_t is_lexical : 1;
uint8_t is_captured : 1;
uint8_t var_kind : 4; /* see JSVarKindEnum */
/* only used during compilation: function pool index for lexical
variables with var_kind =
JS_VAR_FUNCTION_DECL/JS_VAR_NEW_FUNCTION_DECL or scope level of
the definition of the 'var' variables (they have scope_level =
0) */
int func_pool_idx : 24; /* only used during compilation : index in
the constant pool for hoisted function
definition */
} JSVarDef;
/* for the encoding of the pc2line table */
#define PC2LINE_BASE (-1)
#define PC2LINE_RANGE 5
#define PC2LINE_OP_FIRST 1
#define PC2LINE_DIFF_PC_MAX ((255 - PC2LINE_OP_FIRST) / PC2LINE_RANGE)
typedef struct JSFunctionBytecode {
objhdr_t header; /* must come first */
uint8_t js_mode;
uint8_t has_prototype : 1; /* true if a prototype field is necessary */
uint8_t has_simple_parameter_list : 1;
uint8_t func_kind : 2;
uint8_t has_debug : 1;
uint8_t read_only_bytecode : 1;
uint8_t is_direct_or_indirect_eval
: 1; /* used by JS_GetScriptOrModuleName() */
/* XXX: 10 bits available */
uint8_t *byte_code_buf; /* (self pointer) */
int byte_code_len;
JSValue func_name;
JSVarDef *vardefs; /* arguments + local variables (arg_count + var_count)
(self pointer) */
JSClosureVar
*closure_var; /* list of variables in the closure (self pointer) */
uint16_t arg_count;
uint16_t var_count;
uint16_t defined_arg_count; /* for length function property */
uint16_t stack_size; /* maximum stack size */
JSValue *cpool; /* constant pool (self pointer) */
int cpool_count;
int closure_var_count;
struct {
/* debug info, move to separate structure to save memory? */
JSValue filename;
int source_len;
int pc2line_len;
uint8_t *pc2line_buf;
char *source;
} debug;
} JSFunctionBytecode;
/* New simplified compiled unit structure for Phase 1+ simplification.
Replaces JSFunctionBytecode with a simpler model:
- No closure machinery (uses outer_frame chain at runtime)
- Free variables resolved at link time against env + globals
- Nested functions stored as separate units in cpool */
typedef struct JSCompiledUnit {
objhdr_t header; /* must come first */
/* Bytecode (self pointer) */
uint8_t *byte_code_buf;
int byte_code_len;
/* Constants - strings, numbers, nested unit refs (self pointer) */
JSValue *cpool;
int cpool_count;
/* Stack requirements */
uint16_t local_count; /* total local slots (args + vars) */
uint16_t stack_size; /* operand stack depth */
/* Flags */
uint8_t has_debug : 1;
uint8_t read_only_bytecode : 1;
/* Debug info (optional - only present if has_debug) */
struct {
JSValue filename;
int source_len;
int pc2line_len;
uint8_t *pc2line_buf;
char *source;
} debug;
} JSCompiledUnit;
/* ============================================================
Context-Neutral Module Format (Phase 2+)
Struct definitions are in quickjs.h
============================================================ */
typedef struct JSProperty {
JSValue value;
} JSProperty;
#define JS_PROP_INITIAL_SIZE 2
#define JS_PROP_INITIAL_HASH_SIZE 4 /* must be a power of two */
#define JS_ARRAY_INITIAL_SIZE 2
@@ -1395,7 +1354,6 @@ typedef struct JSProperty {
#endif
JSValue js_call_c_function (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv);
JSValue js_call_bound_function (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv);
JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv, int flags);
JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame);
int JS_DeleteProperty (JSContext *ctx, JSValue obj, JSValue prop);
@@ -1410,17 +1368,9 @@ void js_dump_value_write (void *opaque, const char *buf, size_t len);
void js_regexp_finalizer (JSRuntime *rt, JSValue val);
JSValue js_new_function (JSContext *ctx, JSFunctionKind kind);
/* Forward declarations for intrinsics (now declared in quickjs.h) */
/* Forward declaration - helper to set cap in objhdr */
static inline objhdr_t objhdr_set_cap56 (objhdr_t h, uint64_t cap);
/* JS_VALUE_GET_STRING is an alias for getting JSText from a string value */
/* Note: Uses chase() for GC safety - already defined at line 293 */
/* JS_ThrowMemoryError is an alias for JS_ThrowOutOfMemory */
#define JS_ThrowMemoryError(ctx) JS_ThrowOutOfMemory(ctx)
/* GC-SAFE: JS_SetPropertyInternal: same as JS_SetProperty but doesn't check stone.
Internal use only. May trigger GC if record needs to resize. */
@@ -1600,118 +1550,6 @@ static inline __exception int js_poll_interrupts (JSContext *ctx) {
}
}
/* === Token enum (shared by parser, tokenizer, AST) === */
enum {
TOK_NUMBER = -128,
TOK_STRING,
TOK_TEMPLATE,
TOK_IDENT,
TOK_REGEXP,
/* warning: order matters (see js_parse_assign_expr) */
TOK_MUL_ASSIGN,
TOK_DIV_ASSIGN,
TOK_MOD_ASSIGN,
TOK_PLUS_ASSIGN,
TOK_MINUS_ASSIGN,
TOK_SHL_ASSIGN,
TOK_SAR_ASSIGN,
TOK_SHR_ASSIGN,
TOK_AND_ASSIGN,
TOK_XOR_ASSIGN,
TOK_OR_ASSIGN,
TOK_POW_ASSIGN,
TOK_LAND_ASSIGN,
TOK_LOR_ASSIGN,
TOK_DEC,
TOK_INC,
TOK_SHL,
TOK_SAR,
TOK_SHR,
TOK_LT,
TOK_LTE,
TOK_GT,
TOK_GTE,
TOK_EQ,
TOK_STRICT_EQ,
TOK_NEQ,
TOK_STRICT_NEQ,
TOK_LAND,
TOK_LOR,
TOK_POW,
TOK_ARROW,
TOK_ERROR,
TOK_PRIVATE_NAME,
TOK_EOF,
/* whitespace/comment tokens for tokenizer */
TOK_COMMENT,
TOK_NEWLINE,
TOK_SPACE,
/* keywords: WARNING: same order as atoms */
TOK_NULL, /* must be first */
TOK_FALSE,
TOK_TRUE,
TOK_IF,
TOK_ELSE,
TOK_RETURN,
TOK_GO,
TOK_VAR,
TOK_DEF,
TOK_THIS,
TOK_DELETE,
TOK_IN,
TOK_DO,
TOK_WHILE,
TOK_FOR,
TOK_BREAK,
TOK_CONTINUE,
TOK_DISRUPT,
TOK_DISRUPTION,
TOK_FUNCTION,
TOK_DEBUGGER,
TOK_WITH,
/* FutureReservedWord */
TOK_CLASS,
TOK_CONST,
TOK_ENUM,
TOK_EXPORT,
TOK_EXTENDS,
TOK_IMPORT,
TOK_SUPER,
/* FutureReservedWords when parsing strict mode code */
TOK_IMPLEMENTS,
TOK_INTERFACE,
TOK_LET,
TOK_PRIVATE,
TOK_PROTECTED,
TOK_PUBLIC,
TOK_STATIC,
TOK_YIELD,
TOK_AWAIT, /* must be last */
TOK_OF, /* only used for js_parse_skip_parens_token() */
};
#define TOK_FIRST_KEYWORD TOK_NULL
#define TOK_LAST_KEYWORD TOK_AWAIT
/* unicode code points */
#define CP_NBSP 0x00a0
#define CP_BOM 0xfeff
#define CP_LS 0x2028
#define CP_PS 0x2029
/* === Line/column cache === */
typedef struct {
/* last source position */
const uint8_t *ptr;
int line_num;
int col_num;
const uint8_t *buf_start;
} GetLineColCache;
/* === PPretext (parser pretext, system-malloc, used by cell_js.c parser) === */
typedef struct PPretext {
uint32_t *data;
@@ -1719,8 +1557,6 @@ typedef struct PPretext {
int cap;
} PPretext;
#define JS_CALL_FLAG_COPY_ARGV (1 << 1)
extern JSClassID js_class_id_alloc;
/* === Forward declarations for functions split across modules === */
@@ -1728,16 +1564,8 @@ extern JSClassID js_class_id_alloc;
/* runtime.c — line/column, GC, and VM dispatch */
int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size);
JSValue JS_CallInternal (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv, int flags);
int get_line_col (int *pcol_num, const uint8_t *buf, size_t len);
int get_line_col_cached (GetLineColCache *s, int *pcol_num, const uint8_t *ptr);
/* runtime.c exports */
JSValue JS_ThrowStackOverflow (JSContext *ctx);
int JS_DefineObjectName (JSContext *ctx, JSValue obj, JSValue name);
int JS_DefineObjectNameComputed (JSContext *ctx, JSValue obj, JSValue str);
int js_method_set_properties (JSContext *ctx, JSValue func_obj, JSValue name, int flags, JSValue home_obj);
JSValue JS_GetPropertyValue (JSContext *ctx, JSValue this_obj, JSValue prop);
__exception int JS_CopyDataProperties (JSContext *ctx, JSValue target, JSValue source, JSValue excluded, BOOL setprop);
int js_string_compare_value (JSContext *ctx, JSValue op1, JSValue op2, BOOL eq_only);
int js_string_compare_value_nocase (JSContext *ctx, JSValue op1, JSValue op2);
JSValue js_regexp_constructor (JSContext *ctx, JSValue this_val, int argc, JSValue *argv);
@@ -1746,20 +1574,15 @@ int JS_HasPropertyKey (JSContext *ctx, JSValue obj, JSValue key);
void *js_realloc_rt (void *ptr, size_t size);
char *js_strdup_rt (const char *str);
JSValue JS_ConcatString (JSContext *ctx, JSValue op1, JSValue op2);
__exception int js_post_inc_slow (JSContext *ctx, JSValue *sp, OPCodeEnum op);
no_inline int js_not_slow (JSContext *ctx, JSValue *sp);
no_inline int js_relational_slow (JSContext *ctx, JSValue *sp, OPCodeEnum op);
__exception int js_operator_in (JSContext *ctx, JSValue *sp);
__exception int js_operator_delete (JSContext *ctx, JSValue *sp);
JSText *pretext_init (JSContext *ctx, int capacity);
JSText *pretext_putc (JSContext *ctx, JSText *s, uint32_t c);
JSText *pretext_concat_value (JSContext *ctx, JSText *s, JSValue v);
JSValue js_new_blob (JSContext *ctx, blob *b);
/* Functions from header region (defined in runtime.c) */
void *js_realloc (JSContext *ctx, void *ptr, size_t size);
void *st_alloc (JSContext *ctx, size_t bytes, size_t align);
void st_free_all (JSContext *ctx);
int st_text_resize (JSContext *ctx);
void *ct_alloc (JSContext *ctx, size_t bytes, size_t align);
void ct_free_all (JSContext *ctx);
int ct_resize (JSContext *ctx);
JSValue intern_text_to_value (JSContext *ctx, const uint32_t *utf32, uint32_t len);
JSValue js_key_new (JSContext *ctx, const char *str);
JSValue js_key_new_len (JSContext *ctx, const char *str, size_t len);
@@ -1776,19 +1599,6 @@ uint64_t get_text_hash (JSText *text);
void pack_utf32_to_words (const uint32_t *utf32, uint32_t len, uint64_t *packed);
int text_equal (JSText *a, const uint64_t *packed_b, uint32_t len_b);
static inline JSValue *get_upvalue_ptr (JSValue frame_val, int depth, int slot) {
if (JS_IsNull(frame_val)) return NULL;
JSFrame *frame = JS_VALUE_GET_FRAME(frame_val);
while (depth > 0) {
JSFunction *fn = JS_VALUE_GET_FUNCTION(frame->function);
frame_val = fn->u.func.outer_frame;
if (JS_IsNull(frame_val)) return NULL;
frame = JS_VALUE_GET_FRAME(frame_val);
depth--;
}
return &frame->slots[slot];
}
void print_backtrace (JSContext *ctx, const char *filename, int line_num, int col_num);
JSValue JS_ThrowError2 (JSContext *ctx, JSErrorEnum error_num, const char *fmt, va_list ap, BOOL add_backtrace);
JSValue gc_copy_value (JSContext *ctx, JSValue v, uint8_t *from_base, uint8_t *from_end, uint8_t *to_base, uint8_t **to_free, uint8_t *to_end);
@@ -1800,10 +1610,6 @@ PPretext *ppretext_append_jsvalue (PPretext *p, JSValue str);
PPretext *ppretext_append_int (PPretext *p, int n);
JSValue js_atof (JSContext *ctx, const char *str, const char **pp, int radix, int flags);
/* Defines from runtime section needed by cell_js.c */
#define DEFINE_GLOBAL_LEX_VAR (1 << 7)
#define DEFINE_GLOBAL_FUNC_VAR (1 << 6)
#define ATOD_INT_ONLY (1 << 0)
/* accept Oo and Ob prefixes in addition to 0x prefix if radix = 0 */
#define ATOD_ACCEPT_BIN_OCT (1 << 2)
@@ -1820,14 +1626,6 @@ JSValue js_atof (JSContext *ctx, const char *str, const char **pp, int radix, in
/* accept -0x1 */
#define ATOD_ACCEPT_PREFIX_AFTER_SIGN (1 << 10)
#define GLOBAL_VAR_OFFSET 0x40000000
#define ARGUMENT_VAR_OFFSET 0x20000000
/* Inline functions from runtime section needed by cell_js.c */
static inline void js_dbuf_init (JSContext *ctx, DynBuf *s) {
dbuf_init2 (s, ctx->rt, NULL);
}
static inline int to_digit (int c) {
if (c >= '0' && c <= '9') return c - '0';
else if (c >= 'A' && c <= 'Z') return c - 'A' + 10;
@@ -1835,54 +1633,6 @@ static inline int to_digit (int c) {
else return 36;
}
static inline void dbuf_put_leb128 (DynBuf *s, uint32_t v) {
uint32_t a;
for (;;) {
a = v & 0x7f;
v >>= 7;
if (v != 0) {
dbuf_putc (s, a | 0x80);
} else {
dbuf_putc (s, a);
break;
}
}
}
static inline void dbuf_put_sleb128 (DynBuf *s, int32_t v1) {
uint32_t v = v1;
dbuf_put_leb128 (s, (2 * v) ^ -(v >> 31));
}
static inline int get_leb128 (uint32_t *pval, const uint8_t *buf, const uint8_t *buf_end) {
const uint8_t *ptr = buf;
uint32_t v, a, i;
v = 0;
for (i = 0; i < 5; i++) {
if (unlikely (ptr >= buf_end)) break;
a = *ptr++;
v |= (a & 0x7f) << (i * 7);
if (!(a & 0x80)) {
*pval = v;
return ptr - buf;
}
}
*pval = 0;
return -1;
}
static inline int get_sleb128 (int32_t *pval, const uint8_t *buf, const uint8_t *buf_end) {
int ret;
uint32_t val;
ret = get_leb128 (&val, buf, buf_end);
if (ret < 0) {
*pval = 0;
return -1;
}
*pval = (val >> 1) ^ -(val & 1);
return ret;
}
no_inline int js_realloc_array (JSContext *ctx, void **parray, int elem_size, int *psize, int req_size);
static inline int js_resize_array (JSContext *ctx, void **parray, int elem_size, int *psize, int req_size) {
if (unlikely (req_size > *psize))
@@ -1897,7 +1647,6 @@ JSValue js_key_from_string (JSContext *ctx, JSValue val);
/* mach.c exports */
JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code, JSValue this_obj, int argc, JSValue *argv, JSValue env, JSValue outer_frame);
JSFrameRegister *alloc_frame_register(JSContext *ctx, int slot_count);
cJSON *mach_find_scope_record(cJSON *scopes, int function_nr);
int reg_vm_check_interrupt(JSContext *ctx);

View File

@@ -92,7 +92,6 @@ static inline int objhdr_s (objhdr_t h) { return (h & OBJHDR_S_MASK) != 0; }
typedef struct JSRuntime JSRuntime; // the entire VM
typedef struct JSContext JSContext; // Each actor
typedef struct JSClass JSClass;
typedef struct JSFunctionBytecode JSFunctionBytecode;
typedef uint32_t JSClassID;
/* Forward declaration - JSGCRef moved after JSValue definition */
@@ -147,10 +146,22 @@ typedef struct JSGCRef {
struct JSGCRef *prev;
} JSGCRef;
/* JSLocalRef - GC updates C locals through pointers (OCaml-style) */
typedef struct JSLocalRef {
JSValue *ptr;
struct JSLocalRef *prev;
} JSLocalRef;
/* stack of JSGCRef */
JSValue *JS_PushGCRef(JSContext *ctx, JSGCRef *ref);
JSValue JS_PopGCRef(JSContext *ctx, JSGCRef *ref);
/* JS_FRAME/JS_ROOT/JS_LOCAL helpers (for use from cell.h macros) */
JSGCRef *JS_GetGCFrame(JSContext *ctx);
JSLocalRef *JS_GetLocalFrame(JSContext *ctx);
void JS_PushLocalRef(JSContext *ctx, JSLocalRef *ref);
void JS_RestoreFrame(JSContext *ctx, JSGCRef *gc_frame, JSLocalRef *local_frame);
#define JS_PUSH_VALUE(ctx, v) do { JS_PushGCRef(ctx, &v ## _ref); v ## _ref.val = v; } while (0)
#define JS_POP_VALUE(ctx, v) v = JS_PopGCRef(ctx, &v ## _ref)
@@ -284,13 +295,6 @@ JS_IsShortFloat (JSValue v) {
#define JS_TRUE ((JSValue)(JS_TAG_BOOL | (1 << 5)))
#define JS_EXCEPTION ((JSValue)JS_TAG_EXCEPTION)
/* flags for object properties - simplified model:
- No per-property writable/configurable (use stone() for immutability)
- Text keys are enumerable, object keys are not */
#define JS_PROP_TMASK (3 << 4) /* mask for NORMAL, VARREF */
#define JS_PROP_NORMAL (0 << 4)
#define JS_PROP_VARREF (2 << 4) /* used internally for closures */
#ifndef JS_DEFAULT_STACK_SIZE
#define JS_DEFAULT_STACK_SIZE (1024 * 1024)
#endif
@@ -306,49 +310,35 @@ typedef JSValue JSCFunctionData (JSContext *ctx, JSValue this_val,
int argc, JSValue *argv, int magic,
JSValue *data);
JSValue JS_Stone (JSContext *ctx, JSValue this_val);
/* ============================================================
1. Runtime / Context Lifecycle
============================================================ */
JSRuntime *JS_NewRuntime (void);
/* info lifetime must exceed that of rt */
void JS_FreeRuntime (JSRuntime *rt);
void JS_SetMemoryLimit (JSRuntime *rt, size_t limit);
void JS_SetPoolSize (JSRuntime *rt, size_t initial, size_t cap);
JSContext *JS_NewContext (JSRuntime *rt);
JSContext *JS_NewContextWithHeapSize (JSRuntime *rt, size_t heap_size);
void JS_FreeContext (JSContext *s);
void *JS_GetContextOpaque (JSContext *ctx);
void JS_SetContextOpaque (JSContext *ctx, void *opaque);
JSRuntime *JS_GetRuntime (JSContext *ctx);
/* use 0 to disable maximum stack size check */
void JS_SetMaxStackSize (JSContext *ctx, size_t stack_size);
/* should be called when changing thread to update the stack top value
used to check stack overflow. */
void JS_UpdateStackTop (JSContext *ctx);
void JS_FreeRuntime (JSRuntime *rt);
/* return != 0 if the JS code needs to be interrupted */
typedef int JSInterruptHandler (JSRuntime *rt, void *opaque);
void JS_SetInterruptHandler (JSContext *ctx, JSInterruptHandler *cb,
void *opaque);
JS_BOOL JS_IsLiveObject (JSRuntime *rt, JSValue obj);
JSContext *JS_NewContext (JSRuntime *rt);
void JS_FreeContext (JSContext *s);
JSContext *JS_DupContext (JSContext *ctx);
JSContext *JS_GetContext (JSRuntime *rt);
void *JS_GetContextOpaque (JSContext *ctx);
void JS_SetContextOpaque (JSContext *ctx, void *opaque);
JSRuntime *JS_GetRuntime (JSContext *ctx);
void JS_SetClassProto (JSContext *ctx, JSClassID class_id, JSValue obj);
JSValue JS_GetClassProto (JSContext *ctx, JSClassID class_id);
JSContext *JS_NewContextWithHeapSize (JSRuntime *rt, size_t heap_size);
typedef struct JSMemoryUsage {
int64_t malloc_size, malloc_limit, memory_used_size;
int64_t malloc_count;
int64_t memory_used_count;
int64_t str_count, str_size;
int64_t obj_count, obj_size;
int64_t prop_count, prop_size;
int64_t shape_count, shape_size;
int64_t js_func_count, js_func_size, js_func_code_size;
int64_t js_func_pc2line_count, js_func_pc2line_size;
int64_t c_func_count, array_count;
int64_t fast_array_count, fast_array_elements;
int64_t binary_object_count, binary_object_size;
} JSMemoryUsage;
void JS_ComputeMemoryUsage (JSRuntime *rt, JSMemoryUsage *s);
void JS_DumpMemoryUsage (FILE *fp, const JSMemoryUsage *s, JSRuntime *rt);
/* Class system */
typedef void JSClassFinalizer (JSRuntime *rt, JSValue val);
typedef JSValue JSClassCall (JSContext *ctx, JSValue func_obj,
JSValue this_val, int argc,
@@ -361,6 +351,7 @@ typedef struct JSClassDef {
} JSClassDef;
#define JS_INVALID_CLASS_ID 0
extern JSClassID js_class_id_alloc;
JSClassID JS_NewClassID (JSClassID *pclass_id);
/* Returns the class ID if `v` is an object, otherwise returns
* JS_INVALID_CLASS_ID. */
@@ -368,22 +359,12 @@ JSClassID JS_GetClassID (JSValue v);
int JS_NewClass (JSContext *ctx, JSClassID class_id,
const JSClassDef *class_def);
int JS_IsRegisteredClass (JSContext *ctx, JSClassID class_id);
extern JSClassID js_class_id_alloc;
void JS_SetClassProto (JSContext *ctx, JSClassID class_id, JSValue obj);
JSValue JS_GetClassProto (JSContext *ctx, JSClassID class_id);
/* ============================================================
Copying GC - No Reference Counting Needed
============================================================
With a copying GC, reference counting is not needed since all live
objects are discovered by tracing from roots. These macros make
existing DupValue/FreeValue calls into no-ops.
2. Value Creation and Type Checks
============================================================ */
#define JS_DupValue(ctx, v) (v)
#define JS_FreeValue(ctx, v) ((void)0)
#define JS_DupValueRT(rt, v) (v)
#define JS_FreeValueRT(rt, v) ((void)0)
/* value handling */
static inline JSValue
JS_NewBool (JSContext *ctx, JS_BOOL val) {
@@ -435,6 +416,7 @@ JS_NewFloat64 (JSContext *ctx, double d) {
return __JS_NewFloat64 (ctx, d);
}
/* Inline type checks (immediate tags) */
static inline JS_BOOL JS_IsNumber (JSValue v) {
int tag = JS_VALUE_GET_TAG (v);
return tag == JS_TAG_INT || JS_TAG_IS_FLOAT64 (tag);
@@ -452,7 +434,7 @@ static inline JS_BOOL JS_IsException (JSValue v) {
return (JS_VALUE_GET_TAG (v) == JS_TAG_EXCEPTION);
}
/* Immediate String Helpers */
/* Immediate ASCII string helpers */
#define MIST_ASCII_MAX_LEN 7
static inline JS_BOOL
@@ -480,17 +462,102 @@ static inline JSValue MIST_TryNewImmediateASCII (const char *str, size_t len) {
return v;
}
/* Heap object type checks (non-inline — see mist_is_* in quickjs-internal.h
for inline versions used by the VM dispatch loop) */
JS_BOOL JS_IsArray(JSValue v);
JS_BOOL JS_IsRecord(JSValue v);
#define JS_IsObject JS_IsRecord
JS_BOOL JS_IsFunction(JSValue v);
JS_BOOL JS_IsBlob(JSValue v);
JS_BOOL JS_IsText(JSValue v);
static JS_BOOL JS_IsStone(JSValue v);
JS_BOOL JS_IsStone(JSValue v);
// Fundamental
/* ============================================================
3. GC References
============================================================
With a copying GC, reference counting is not needed since all live
objects are discovered by tracing from roots. These macros make
existing DupValue/FreeValue calls into no-ops.
============================================================ */
#define JS_DupValue(ctx, v) (v)
#define JS_FreeValue(ctx, v) ((void)0)
#define JS_DupValueRT(rt, v) (v)
#define JS_FreeValueRT(rt, v) ((void)0)
/* ============================================================
4. Property Access
============================================================ */
JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop);
int JS_SetProperty (JSContext *ctx, JSValue this_obj, JSValue prop, JSValue val);
JSValue JS_GetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop);
int JS_SetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop, JSValue val);
JSValue JS_GetPropertyNumber (JSContext *ctx, JSValue this_obj, int idx);
JSValue JS_SetPropertyNumber (JSContext *ctx, JSValue obj, int idx, JSValue val);
JSValue JS_GetPrototype (JSContext *ctx, JSValue val);
JSValue JS_GetOwnPropertyNames (JSContext *ctx, JSValue obj);
int JS_GetLength (JSContext *ctx, JSValue obj, int64_t *pres);
void JS_SetOpaque (JSValue obj, void *opaque);
void *JS_GetOpaque (JSValue obj, JSClassID class_id);
void *JS_GetOpaque2 (JSContext *ctx, JSValue obj, JSClassID class_id);
void *JS_GetAnyOpaque (JSValue obj, JSClassID *class_id);
/* ============================================================
5. Object / Array / String Creation
============================================================ */
JSValue JS_NewObjectProtoClass (JSContext *ctx, JSValue proto, JSClassID class_id);
JSValue JS_NewObjectClass (JSContext *ctx, int class_id);
JSValue JS_NewObjectProto (JSContext *ctx, JSValue proto);
JSValue JS_NewObject (JSContext *ctx);
JSValue JS_NewObjectCap (JSContext *ctx, uint32_t n);
JSValue JS_NewArray (JSContext *ctx);
JSValue JS_NewArrayLen (JSContext *ctx, uint32_t len);
JSValue JS_NewArrayCap (JSContext *ctx, uint32_t cap);
JSValue JS_NewArrayFrom (JSContext *ctx, int count, JSValue *values);
int JS_ArrayPush (JSContext *ctx, JSValue *arr_ptr, JSValue val);
JSValue JS_ArrayPop (JSContext *ctx, JSValue obj);
JSValue JS_NewStringLen (JSContext *ctx, const char *str1, size_t len1);
static inline JSValue JS_NewString (JSContext *ctx, const char *str) {
return JS_NewStringLen (ctx, str, strlen (str));
}
/* ============================================================
6. Type Conversion
============================================================ */
int JS_ToBool (JSContext *ctx, JSValue val); /* return -1 for JS_EXCEPTION */
int JS_ToInt32 (JSContext *ctx, int32_t *pres, JSValue val);
static inline int JS_ToUint32 (JSContext *ctx, uint32_t *pres, JSValue val) {
return JS_ToInt32 (ctx, (int32_t *)pres, val);
}
int JS_ToInt64 (JSContext *ctx, int64_t *pres, JSValue val);
int JS_ToFloat64 (JSContext *ctx, double *pres, JSValue val);
JSValue JS_ToString (JSContext *ctx, JSValue val);
JSValue JS_ToPropertyKey (JSContext *ctx, JSValue val);
const char *JS_ToCStringLen2 (JSContext *ctx, size_t *plen, JSValue val1, JS_BOOL cesu8);
static inline const char * JS_ToCStringLen (JSContext *ctx, size_t *plen, JSValue val1) {
return JS_ToCStringLen2 (ctx, plen, val1, 0);
}
static inline const char * JS_ToCString (JSContext *ctx, JSValue val1) {
return JS_ToCStringLen2 (ctx, NULL, val1, 0);
}
void JS_FreeCString (JSContext *ctx, const char *ptr);
JS_BOOL JS_StrictEq (JSContext *ctx, JSValue op1, JSValue op2);
/* ============================================================
7. Error Handling
============================================================ */
JSValue JS_Throw (JSContext *ctx, JSValue obj);
JSValue JS_GetException (JSContext *ctx);
JS_BOOL JS_HasException (JSContext *ctx);
@@ -506,45 +573,28 @@ JSValue __js_printf_like (2, 3)
JS_ThrowInternalError (JSContext *ctx, const char *fmt, ...);
JSValue JS_ThrowOutOfMemory (JSContext *ctx);
// TODO: rename this to just "eq"
JS_BOOL JS_StrictEq (JSContext *ctx, JSValue op1, JSValue op2);
/* ============================================================
8. Function Creation and Invocation
============================================================ */
int JS_ToBool (JSContext *ctx, JSValue val); /* return -1 for JS_EXCEPTION */
int JS_ToInt32 (JSContext *ctx, int32_t *pres, JSValue val);
static inline int JS_ToUint32 (JSContext *ctx, uint32_t *pres, JSValue val) {
return JS_ToInt32 (ctx, (int32_t *)pres, val);
}
int JS_ToInt64 (JSContext *ctx, int64_t *pres, JSValue val);
int JS_ToFloat64 (JSContext *ctx, double *pres, JSValue val);
/* return an exception if 'val' is a Number */
JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv);
JSValue JS_Stone (JSContext *ctx, JSValue this_val);
JSValue JS_NewStringLen (JSContext *ctx, const char *str1, size_t len1);
static inline JSValue JS_NewString (JSContext *ctx, const char *str) {
return JS_NewStringLen (ctx, str, strlen (str));
}
JSValue JS_ToString (JSContext *ctx, JSValue val);
JSValue JS_ToPropertyKey (JSContext *ctx, JSValue val);
const char *JS_ToCStringLen2 (JSContext *ctx, size_t *plen, JSValue val1, JS_BOOL cesu8);
static inline const char * JS_ToCStringLen (JSContext *ctx, size_t *plen, JSValue val1) {
return JS_ToCStringLen2 (ctx, plen, val1, 0);
}
static inline const char * JS_ToCString (JSContext *ctx, JSValue val1) {
return JS_ToCStringLen2 (ctx, NULL, val1, 0);
}
void JS_FreeCString (JSContext *ctx, const char *ptr);
/* JSON */
/* 'buf' must be zero terminated i.e. buf[buf_len] = '\0'. */
JSValue JS_ParseJSON (JSContext *ctx, const char *buf, size_t buf_len,
const char *filename);
#define JS_PARSE_JSON_EXT (1 << 0) /* allow extended JSON */
JSValue JS_ParseJSON2 (JSContext *ctx, const char *buf, size_t buf_len,
const char *filename, int flags);
JSValue JS_JSONStringify (JSContext *ctx, JSValue obj,
JSValue replacer, JSValue space0);
JSValue JS_NewObjectProtoClass (JSContext *ctx, JSValue proto, JSClassID class_id);
JSValue JS_NewObjectClass (JSContext *ctx, int class_id);
JSValue JS_NewObjectProto (JSContext *ctx, JSValue proto);
JSValue JS_NewObject (JSContext *ctx);
/* ============================================================
9. Intrinsic Wrappers (JS_Cell* / JS_Array*)
============================================================ */
JSValue JS_NewArray (JSContext *ctx);
JSValue JS_NewArrayLen (JSContext *ctx, uint32_t len);
int JS_ArrayPush (JSContext *ctx, JSValue *arr_ptr, JSValue val);
JSValue JS_ArrayPop (JSContext *ctx, JSValue obj);
/* Intrinsic array operations - signatures match internal functions */
/* Intrinsic array operations */
JSValue JS_Array (JSContext *ctx, JSValue arg0, JSValue arg1, JSValue arg2, JSValue arg3);
JSValue JS_ArrayFilter (JSContext *ctx, JSValue arr, JSValue fn);
JSValue JS_ArraySort (JSContext *ctx, JSValue arr, JSValue selector);
@@ -552,9 +602,7 @@ JSValue JS_ArrayFind (JSContext *ctx, JSValue arr, JSValue target_or_fn, JSValue
JSValue JS_ArrFor (JSContext *ctx, JSValue arr, JSValue fn, JSValue reverse, JSValue exit_val);
JSValue JS_ArrayReduce (JSContext *ctx, JSValue arr, JSValue fn, JSValue initial, JSValue reverse);
/* Cell intrinsic functions - C API wrappers */
/* Core functions */
/* Core cell functions */
JSValue JS_CellStone (JSContext *ctx, JSValue val);
JSValue JS_CellLength (JSContext *ctx, JSValue val);
JSValue JS_CellReverse (JSContext *ctx, JSValue val);
@@ -567,7 +615,7 @@ JSValue JS_CellModulo (JSContext *ctx, JSValue a, JSValue b);
JSValue JS_CellNeg (JSContext *ctx, JSValue val);
JSValue JS_CellNot (JSContext *ctx, JSValue val);
/* Text functions */
/* Text cell functions */
JSValue JS_CellText (JSContext *ctx, JSValue val);
JSValue JS_CellLower (JSContext *ctx, JSValue text);
JSValue JS_CellUpper (JSContext *ctx, JSValue text);
@@ -578,7 +626,7 @@ JSValue JS_CellSearch (JSContext *ctx, JSValue text, JSValue pattern, JSValue fr
JSValue JS_CellExtract (JSContext *ctx, JSValue text, JSValue from, JSValue to);
JSValue JS_CellCharacter (JSContext *ctx, JSValue codepoint);
/* Number functions */
/* Number cell functions */
JSValue JS_CellNumber (JSContext *ctx, JSValue val);
JSValue JS_CellAbs (JSContext *ctx, JSValue num);
JSValue JS_CellSign (JSContext *ctx, JSValue num);
@@ -592,56 +640,20 @@ JSValue JS_CellMin (JSContext *ctx, JSValue a, JSValue b);
JSValue JS_CellMax (JSContext *ctx, JSValue a, JSValue b);
JSValue JS_CellRemainder (JSContext *ctx, JSValue a, JSValue b);
/* Object functions */
/* Object cell functions */
JSValue JS_CellObject (JSContext *ctx, JSValue proto, JSValue props);
/* Format function */
/* Format */
JSValue JS_CellFormat (JSContext *ctx, JSValue text, JSValue collection, JSValue transformer);
/* Helper functions */
JSValue JS_NewArrayFrom (JSContext *ctx, int count, JSValue *values);
/* Output helpers */
void JS_PrintText (JSContext *ctx, JSValue val);
void JS_PrintTextLn (JSContext *ctx, JSValue val);
void JS_PrintFormatted (JSContext *ctx, const char *fmt, int count, JSValue *values);
JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop);
int JS_SetProperty (JSContext *ctx, JSValue this_obj, JSValue prop, JSValue val);
// For records
JSValue JS_GetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop);
int JS_SetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop, JSValue val);
// Must be an array
JSValue JS_GetPropertyNumber (JSContext *ctx, JSValue this_obj, int idx);
JSValue JS_SetPropertyNumber (JSContext *ctx, JSValue obj, int idx, JSValue val);
JSValue JS_GetPrototype (JSContext *ctx, JSValue val);
/* Get property keys as array of text */
JSValue JS_GetOwnPropertyNames (JSContext *ctx, JSValue obj);
JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv);
void JS_SetOpaque (JSValue obj, void *opaque);
void *JS_GetOpaque (JSValue obj, JSClassID class_id);
void *JS_GetOpaque2 (JSContext *ctx, JSValue obj, JSClassID class_id);
void *JS_GetAnyOpaque (JSValue obj, JSClassID *class_id);
/* 'buf' must be zero terminated i.e. buf[buf_len] = '\0'. */
JSValue JS_ParseJSON (JSContext *ctx, const char *buf, size_t buf_len,
const char *filename);
#define JS_PARSE_JSON_EXT (1 << 0) /* allow extended JSON */
JSValue JS_ParseJSON2 (JSContext *ctx, const char *buf, size_t buf_len,
const char *filename, int flags);
JSValue JS_JSONStringify (JSContext *ctx, JSValue obj,
JSValue replacer, JSValue space0);
/* return != 0 if the JS code needs to be interrupted */
typedef int JSInterruptHandler (JSRuntime *rt, void *opaque);
void JS_SetInterruptHandler (JSContext *ctx, JSInterruptHandler *cb,
void *opaque);
/* C function definition */
/* ============================================================
10. C Function Definition
============================================================ */
typedef enum JSCFunctionEnum {
JS_CFUNC_generic,
JS_CFUNC_generic_magic,
@@ -901,7 +913,27 @@ typedef struct JSCFunctionListEntry {
int JS_SetPropertyFunctionList (JSContext *ctx, JSValue obj,
const JSCFunctionListEntry *tab, int len);
/* debug value output */
/* ============================================================
11. Debug / Dump Utilities
============================================================ */
typedef struct JSMemoryUsage {
int64_t malloc_size, malloc_limit, memory_used_size;
int64_t malloc_count;
int64_t memory_used_count;
int64_t str_count, str_size;
int64_t obj_count, obj_size;
int64_t prop_count, prop_size;
int64_t shape_count, shape_size;
int64_t js_func_count, js_func_size, js_func_code_size;
int64_t js_func_pc2line_count, js_func_pc2line_size;
int64_t c_func_count, array_count;
int64_t fast_array_count, fast_array_elements;
int64_t binary_object_count, binary_object_size;
} JSMemoryUsage;
void JS_ComputeMemoryUsage (JSRuntime *rt, JSMemoryUsage *s);
void JS_DumpMemoryUsage (FILE *fp, const JSMemoryUsage *s, JSRuntime *rt);
typedef struct {
JS_BOOL show_hidden : 8; /* only show enumerable properties */
@@ -957,7 +989,9 @@ JSValue js_debugger_fn_info (JSContext *ctx, JSValue fn);
JSValue js_debugger_fn_bytecode (JSContext *js, JSValue fn);
void *js_debugger_val_address (JSContext *js, JSValue val);
/* Memory allocation functions (bump allocator) */
/* ============================================================
12. Memory Allocation
============================================================ */
void *js_malloc (JSContext *ctx, size_t size);
void *js_mallocz (JSContext *ctx, size_t size);
void *js_realloc (JSContext *ctx, void *ptr, size_t size);
@@ -969,17 +1003,13 @@ void *js_malloc_rt (size_t size);
void *js_mallocz_rt (size_t size);
void js_free_rt (void *ptr);
/* ============================================================
13. Compilation and Bytecode
============================================================ */
struct cJSON;
/* Compiled bytecode (context-free, serializable) */
typedef struct MachCode MachCode;
/* Compile AST cJSON tree to context-free MachCode. */
MachCode *JS_CompileMachTree(struct cJSON *ast);
/* Compile AST JSON string to context-free MachCode. */
MachCode *JS_CompileMach(const char *ast_json);
/* Free a compiled MachCode tree. */
void JS_FreeMachCode(MachCode *mc);
@@ -995,6 +1025,9 @@ struct JSCodeRegister *JS_LoadMachCode(JSContext *ctx, MachCode *mc, JSValue env
/* Deserialize and execute pre-compiled MACH binary bytecode. */
JSValue JS_RunMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue env);
/* Parse mcode JSON IR, compile, and execute via register VM. */
JSValue JS_RunMachMcode(JSContext *ctx, const char *json_str, size_t len, JSValue env);
/* Dump disassembly of pre-compiled MACH binary bytecode. */
void JS_DumpMachBin(JSContext *ctx, const uint8_t *data, size_t size, JSValue env);

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,7 @@
// streamline.ce — run the full compile + optimize pipeline, output JSON
//
// Usage: ./cell --core . streamline.ce <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More