aot compile vm_suite
This commit is contained in:
87
build.cm
87
build.cm
@@ -576,6 +576,93 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
|
||||
return dylib_path
|
||||
}
|
||||
|
||||
// Compile pre-compiled mcode IR to a native .dylib via QBE.
|
||||
// Use this when the caller already has the optimized IR (avoids calling mcode
|
||||
// twice and hitting module-level state pollution).
|
||||
Build.compile_native_ir = function(optimized, src_path, opts) {
|
||||
var _target = (opts && opts.target) || Build.detect_host_target()
|
||||
var _buildtype = (opts && opts.buildtype) || 'release'
|
||||
var pkg = opts && opts.pkg
|
||||
var qbe_rt_path = null
|
||||
var native_stem = null
|
||||
var native_install_dir = null
|
||||
var native_install_path = null
|
||||
|
||||
var tc = toolchains[_target]
|
||||
var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so')
|
||||
var cc = tc.c
|
||||
|
||||
var qbe_macros = use('qbe')
|
||||
var qbe_emit = use('qbe_emit')
|
||||
|
||||
var sym_name = null
|
||||
if (pkg) {
|
||||
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
|
||||
}
|
||||
var il = qbe_emit(optimized, qbe_macros, sym_name)
|
||||
il = qbe_insert_dead_labels(il)
|
||||
|
||||
var src = text(fd.slurp(src_path))
|
||||
var hash = content_hash(src + '\n' + _target + '\nnative')
|
||||
var build_dir = get_build_dir()
|
||||
ensure_dir(build_dir)
|
||||
|
||||
var dylib_path = build_dir + '/' + hash + '.' + _target + dylib_ext
|
||||
if (fd.is_file(dylib_path))
|
||||
return dylib_path
|
||||
|
||||
var tmp = '/tmp/cell_native_' + hash
|
||||
var ssa_path = tmp + '.ssa'
|
||||
var s_path = tmp + '.s'
|
||||
var o_path = tmp + '.o'
|
||||
var rt_o_path = '/tmp/cell_qbe_rt.o'
|
||||
|
||||
fd.slurpwrite(ssa_path, stone(blob(il)))
|
||||
|
||||
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
|
||||
if (rc != 0) {
|
||||
print('QBE compilation failed for: ' + src_path); disrupt
|
||||
}
|
||||
|
||||
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
|
||||
if (rc != 0) {
|
||||
print('Assembly failed for: ' + src_path); disrupt
|
||||
}
|
||||
|
||||
if (!fd.is_file(rt_o_path)) {
|
||||
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
|
||||
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
|
||||
if (rc != 0) {
|
||||
print('QBE runtime stubs compilation failed'); disrupt
|
||||
}
|
||||
}
|
||||
|
||||
var link_cmd = cc + ' -shared -fPIC'
|
||||
if (tc.system == 'darwin') {
|
||||
link_cmd = link_cmd + ' -undefined dynamic_lookup'
|
||||
} else if (tc.system == 'linux') {
|
||||
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
|
||||
}
|
||||
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
|
||||
|
||||
rc = os.system(link_cmd)
|
||||
if (rc != 0) {
|
||||
print('Linking native dylib failed for: ' + src_path); disrupt
|
||||
}
|
||||
|
||||
log.console('Built native: ' + fd.basename(dylib_path))
|
||||
|
||||
if (pkg) {
|
||||
native_stem = fd.basename(src_path)
|
||||
native_install_dir = shop.get_lib_dir() + '/' + shop.lib_name_for_package(pkg)
|
||||
ensure_dir(native_install_dir)
|
||||
native_install_path = native_install_dir + '/' + native_stem + dylib_ext
|
||||
fd.slurpwrite(native_install_path, fd.slurp(dylib_path))
|
||||
}
|
||||
|
||||
return dylib_path
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Module table generation (for static builds)
|
||||
// ============================================================================
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// compare_aot.ce — compile a .cm module via both paths and compare results
|
||||
// compare_aot.ce — compile a .ce/.cm file via both paths and compare results
|
||||
//
|
||||
// Usage:
|
||||
// cell --dev compare_aot.ce <module.cm>
|
||||
// cell --dev compare_aot.ce <file.ce>
|
||||
|
||||
var build = use('build')
|
||||
var fd_mod = use('fd')
|
||||
@@ -9,17 +9,20 @@ var os = use('os')
|
||||
var json = use('json')
|
||||
|
||||
var show = function(v) {
|
||||
if (v == null) return "null"
|
||||
return json.encode(v)
|
||||
}
|
||||
|
||||
if (length(args) < 1) {
|
||||
print('usage: cell --dev compare_aot.ce <module.cm>')
|
||||
print('usage: cell --dev compare_aot.ce <file>')
|
||||
return
|
||||
}
|
||||
|
||||
var file = args[0]
|
||||
if (!fd_mod.is_file(file)) {
|
||||
if (!ends_with(file, '.cm') && fd_mod.is_file(file + '.cm'))
|
||||
if (!ends_with(file, '.ce') && fd_mod.is_file(file + '.ce'))
|
||||
file = file + '.ce'
|
||||
else if (!ends_with(file, '.cm') && fd_mod.is_file(file + '.cm'))
|
||||
file = file + '.cm'
|
||||
else {
|
||||
print('file not found: ' + file)
|
||||
@@ -43,50 +46,68 @@ var folded = fold(ast)
|
||||
var compiled = mcode_mod(folded)
|
||||
var optimized = streamline_mod(compiled)
|
||||
|
||||
// --- Interpreted (mach VM) ---
|
||||
print('--- interpreted ---')
|
||||
var mcode_json = json.encode(optimized)
|
||||
var mach_blob = mach_compile_mcode_bin(abs, mcode_json)
|
||||
var result_interp = mach_load(mach_blob, stone({}))
|
||||
print('result: ' + show(result_interp))
|
||||
|
||||
// --- Native (AOT via QBE) ---
|
||||
print('\n--- native ---')
|
||||
var dylib_path = build.compile_native(abs, null, null, null)
|
||||
print('dylib: ' + dylib_path)
|
||||
|
||||
var handle = os.dylib_open(dylib_path)
|
||||
if (!handle) {
|
||||
print('failed to open dylib')
|
||||
return
|
||||
}
|
||||
|
||||
// Build env with runtime functions. Must include starts_with etc. because
|
||||
// the GC can lose global object properties after compaction.
|
||||
// Shared env for both paths — only non-intrinsic runtime functions.
|
||||
// Intrinsics (starts_with, ends_with, logical, some, every, etc.) live on
|
||||
// the stoned global and are found via GETINTRINSIC/cell_rt_get_intrinsic.
|
||||
var env = stone({
|
||||
logical: logical,
|
||||
some: some,
|
||||
every: every,
|
||||
starts_with: starts_with,
|
||||
ends_with: ends_with,
|
||||
log: log,
|
||||
fallback: fallback,
|
||||
parallel: parallel,
|
||||
race: race,
|
||||
sequence: sequence
|
||||
sequence: sequence,
|
||||
use
|
||||
})
|
||||
|
||||
var result_native = os.native_module_load(handle, env)
|
||||
print('result: ' + show(result_native))
|
||||
// --- Interpreted (mach VM) ---
|
||||
var result_interp = null
|
||||
var interp_ok = false
|
||||
var run_interp = function() {
|
||||
print('--- interpreted ---')
|
||||
var mcode_json = json.encode(optimized)
|
||||
var mach_blob = mach_compile_mcode_bin(abs, mcode_json)
|
||||
result_interp = mach_load(mach_blob, env)
|
||||
interp_ok = true
|
||||
print('result: ' + show(result_interp))
|
||||
} disruption {
|
||||
interp_ok = true
|
||||
print('(disruption escaped from interpreted run)')
|
||||
}
|
||||
run_interp()
|
||||
|
||||
// --- Native (AOT via QBE) ---
|
||||
var result_native = null
|
||||
var native_ok = false
|
||||
var run_native = function() {
|
||||
print('\n--- native ---')
|
||||
var dylib_path = build.compile_native_ir(optimized, abs, null)
|
||||
print('dylib: ' + dylib_path)
|
||||
var handle = os.dylib_open(dylib_path)
|
||||
if (!handle) {
|
||||
print('failed to open dylib')
|
||||
return
|
||||
}
|
||||
result_native = os.native_module_load(handle, env)
|
||||
native_ok = true
|
||||
print('result: ' + show(result_native))
|
||||
} disruption {
|
||||
native_ok = true
|
||||
print('(disruption escaped from native run)')
|
||||
}
|
||||
run_native()
|
||||
|
||||
// --- Comparison ---
|
||||
print('\n--- comparison ---')
|
||||
var s_interp = show(result_interp)
|
||||
var s_native = show(result_native)
|
||||
if (s_interp == s_native) {
|
||||
print('MATCH')
|
||||
if (interp_ok && native_ok) {
|
||||
if (s_interp == s_native) {
|
||||
print('MATCH')
|
||||
} else {
|
||||
print('MISMATCH')
|
||||
print(' interp: ' + s_interp)
|
||||
print(' native: ' + s_native)
|
||||
}
|
||||
} else {
|
||||
print('MISMATCH')
|
||||
print(' interp: ' + s_interp)
|
||||
print(' native: ' + s_native)
|
||||
if (!interp_ok) print('interpreted run failed')
|
||||
if (!native_ok) print('native run failed')
|
||||
}
|
||||
|
||||
24
qbe_emit.cm
24
qbe_emit.cm
@@ -101,6 +101,7 @@ var qbe_emit = function(ir, qbe, export_name) {
|
||||
var chk = null
|
||||
var pat_label = null
|
||||
var flg_label = null
|
||||
var in_handler = false
|
||||
|
||||
// Function signature: (ctx, frame_ptr) → JSValue
|
||||
emit(`export function l $${name}(l %ctx, l %fp) {`)
|
||||
@@ -127,7 +128,17 @@ var qbe_emit = function(ir, qbe, export_name) {
|
||||
}
|
||||
|
||||
var refresh_fp = function() {
|
||||
emit(` %fp =l call $cell_rt_refresh_fp(l %ctx)`)
|
||||
emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`)
|
||||
var exc = fresh()
|
||||
emit(` %${exc} =w ceql %fp, 0`)
|
||||
if (has_handler && !in_handler) {
|
||||
emit(` jnz %${exc}, @disruption_handler, @${exc}_ok`)
|
||||
} else {
|
||||
emit(` jnz %${exc}, @${exc}_exc, @${exc}_ok`)
|
||||
emit(`@${exc}_exc`)
|
||||
emit(` ret 15`)
|
||||
}
|
||||
emit(`@${exc}_ok`)
|
||||
}
|
||||
|
||||
// Walk instructions
|
||||
@@ -143,7 +154,10 @@ var qbe_emit = function(ir, qbe, export_name) {
|
||||
emit(" jmp @disruption_handler")
|
||||
}
|
||||
emit("@disruption_handler")
|
||||
emit(" call $cell_rt_clear_exception(l %ctx)")
|
||||
emit(` %fp =l call $cell_rt_refresh_fp(l %ctx)`)
|
||||
last_was_term = false
|
||||
in_handler = true
|
||||
}
|
||||
i = i + 1
|
||||
|
||||
@@ -949,7 +963,7 @@ var qbe_emit = function(ir, qbe, export_name) {
|
||||
}
|
||||
if (op == "disrupt") {
|
||||
emit(` call $cell_rt_disrupt(l %ctx)`)
|
||||
if (has_handler) {
|
||||
if (has_handler && !in_handler) {
|
||||
emit(" jmp @disruption_handler")
|
||||
} else {
|
||||
emit(` ret 15`)
|
||||
@@ -1012,11 +1026,7 @@ var qbe_emit = function(ir, qbe, export_name) {
|
||||
}
|
||||
emit("@disrupt")
|
||||
emit(` call $cell_rt_disrupt(l %ctx)`)
|
||||
if (has_handler) {
|
||||
emit(" jmp @disruption_handler")
|
||||
} else {
|
||||
emit(` ret 15`)
|
||||
}
|
||||
emit(` ret 15`)
|
||||
|
||||
emit("}")
|
||||
emit("")
|
||||
|
||||
@@ -2084,6 +2084,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
env = fn->u.reg.env_record;
|
||||
pc = code->disruption_pc;
|
||||
ctx->disruption_reported = FALSE;
|
||||
ctx->current_exception = JS_NULL;
|
||||
break;
|
||||
}
|
||||
if (JS_IsNull(frame->caller)) {
|
||||
@@ -2123,7 +2124,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
|
||||
}
|
||||
ctx->disruption_reported = TRUE;
|
||||
}
|
||||
result = JS_Throw(ctx, JS_NULL);
|
||||
result = JS_Throw(ctx, JS_TRUE);
|
||||
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -282,12 +282,25 @@ void cell_rt_set_native_env(JSContext *ctx, JSValue env) {
|
||||
}
|
||||
|
||||
JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name) {
|
||||
/* Check native env first (runtime-provided functions like starts_with) */
|
||||
/* Check native env first (runtime-provided functions like log) */
|
||||
if (g_has_native_env) {
|
||||
JSValue v = JS_GetPropertyStr(ctx, g_native_env_ref.val, name);
|
||||
if (!JS_IsNull(v)) return v;
|
||||
if (!JS_IsNull(v))
|
||||
return v;
|
||||
}
|
||||
return JS_GetPropertyStr(ctx, ctx->global_obj, name);
|
||||
/* Linear scan of global object — avoids hash mismatch issues with
|
||||
stoned records whose keys may be in cold storage */
|
||||
JSValue gobj = ctx->global_obj;
|
||||
if (JS_IsRecord(gobj)) {
|
||||
JSRecord *rec = (JSRecord *)chase(gobj);
|
||||
uint64_t mask = objhdr_cap56(rec->mist_hdr);
|
||||
for (uint64_t i = 1; i <= mask; i++) {
|
||||
if (js_key_equal_str(rec->slots[i].key, name))
|
||||
return rec->slots[i].val;
|
||||
}
|
||||
}
|
||||
JS_ThrowReferenceError(ctx, "'%s' is not defined", name);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
/* --- Closure access ---
|
||||
@@ -349,8 +362,36 @@ JSValue *cell_rt_enter_frame(JSContext *ctx, int64_t nr_slots) {
|
||||
|
||||
JSValue *cell_rt_refresh_fp(JSContext *ctx) {
|
||||
(void)ctx;
|
||||
JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(
|
||||
g_aot_gc_refs[g_aot_depth - 1].val);
|
||||
if (g_aot_depth <= 0) {
|
||||
fprintf(stderr, "[BUG] cell_rt_refresh_fp: g_aot_depth=%d\n", g_aot_depth);
|
||||
abort();
|
||||
}
|
||||
JSValue val = g_aot_gc_refs[g_aot_depth - 1].val;
|
||||
JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val);
|
||||
if (!frame) {
|
||||
fprintf(stderr, "[BUG] cell_rt_refresh_fp: frame is NULL at depth=%d val=%lld\n",
|
||||
g_aot_depth, (long long)val);
|
||||
abort();
|
||||
}
|
||||
return (JSValue *)frame->slots;
|
||||
}
|
||||
|
||||
/* Combined refresh + exception check in a single call.
|
||||
Returns the refreshed fp, or NULL if there is a pending exception.
|
||||
This avoids QBE register-allocation issues from two consecutive calls. */
|
||||
JSValue *cell_rt_refresh_fp_checked(JSContext *ctx) {
|
||||
if (JS_HasException(ctx))
|
||||
return NULL;
|
||||
if (g_aot_depth <= 0) {
|
||||
fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: g_aot_depth=%d\n", g_aot_depth);
|
||||
abort();
|
||||
}
|
||||
JSValue val = g_aot_gc_refs[g_aot_depth - 1].val;
|
||||
JSFrameRegister *frame = (JSFrameRegister *)JS_VALUE_GET_PTR(val);
|
||||
if (!frame) {
|
||||
fprintf(stderr, "[BUG] cell_rt_refresh_fp_checked: frame is NULL\n");
|
||||
abort();
|
||||
}
|
||||
return (JSValue *)frame->slots;
|
||||
}
|
||||
|
||||
@@ -464,12 +505,13 @@ JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int64_t nargs) {
|
||||
}
|
||||
|
||||
void cell_rt_setarg(JSValue frame_val, int64_t idx, JSValue val) {
|
||||
if (frame_val == JS_EXCEPTION) return;
|
||||
if (frame_val == JS_EXCEPTION || frame_val == JS_NULL) return;
|
||||
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
|
||||
fr->slots[idx] = val;
|
||||
}
|
||||
|
||||
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
|
||||
if (frame_val == JS_EXCEPTION) return JS_EXCEPTION;
|
||||
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
|
||||
int nr_slots = (int)objhdr_cap56(fr->header);
|
||||
int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
|
||||
@@ -580,6 +622,15 @@ JSValue cell_rt_or(JSContext *ctx, JSValue left, JSValue right) {
|
||||
return JS_ToBool(ctx, left) ? left : right;
|
||||
}
|
||||
|
||||
/* --- Exception checking ---
|
||||
After potentially-throwing runtime calls, QBE-generated code needs to
|
||||
check for pending exceptions and branch to the disruption handler. */
|
||||
|
||||
void cell_rt_clear_exception(JSContext *ctx) {
|
||||
if (JS_HasException(ctx))
|
||||
JS_GetException(ctx);
|
||||
}
|
||||
|
||||
/* --- Disruption --- */
|
||||
|
||||
void cell_rt_disrupt(JSContext *ctx) {
|
||||
@@ -630,6 +681,10 @@ JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env)
|
||||
return JS_ThrowTypeError(ctx, "frame allocation failed");
|
||||
}
|
||||
|
||||
/* Clear any stale exception left by a previous interpreted run */
|
||||
if (JS_HasException(ctx))
|
||||
JS_GetException(ctx);
|
||||
|
||||
JSValue result = fn(ctx, fp);
|
||||
cell_rt_leave_frame(ctx); /* safe — closures have independent GC refs */
|
||||
g_current_dl_handle = prev_handle;
|
||||
|
||||
@@ -3076,7 +3076,7 @@ JSValue JS_ThrowError2 (JSContext *ctx, JSErrorEnum error_num, const char *fmt,
|
||||
if (add_backtrace) {
|
||||
print_backtrace (ctx, NULL, 0, 0);
|
||||
}
|
||||
return JS_Throw (ctx, JS_NULL);
|
||||
return JS_Throw (ctx, JS_TRUE);
|
||||
}
|
||||
|
||||
static JSValue JS_ThrowError (JSContext *ctx, JSErrorEnum error_num, const char *fmt, va_list ap) {
|
||||
|
||||
458
streamline.cm
458
streamline.cm
@@ -1020,6 +1020,461 @@ var streamline = function(ir, log) {
|
||||
return null
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Pass: compress_slots — linear-scan register allocation
|
||||
// Reuses slots with non-overlapping live ranges to reduce
|
||||
// nr_slots. Mirrors mcode_compress_regs from mach.c.
|
||||
// Works across all functions for captured-slot tracking.
|
||||
// =========================================================
|
||||
|
||||
// Which instruction positions hold slot references (special cases)
|
||||
var slot_idx_special = {
|
||||
get: [1], put: [1],
|
||||
access: [1], int: [1], function: [1], regexp: [1],
|
||||
true: [1], false: [1], null: [1],
|
||||
record: [1], array: [1],
|
||||
invoke: [1, 2], tail_invoke: [1, 2],
|
||||
goinvoke: [1],
|
||||
setarg: [1, 3],
|
||||
frame: [1, 2], goframe: [1, 2],
|
||||
jump: [], disrupt: [],
|
||||
jump_true: [1], jump_false: [1], jump_not_null: [1],
|
||||
return: [1]
|
||||
}
|
||||
|
||||
var get_slot_refs = function(instr) {
|
||||
var special = slot_idx_special[instr[0]]
|
||||
var result = null
|
||||
var j = 0
|
||||
var limit = 0
|
||||
if (special != null) return special
|
||||
result = []
|
||||
limit = length(instr) - 2
|
||||
j = 1
|
||||
while (j < limit) {
|
||||
if (is_number(instr[j])) result[] = j
|
||||
j = j + 1
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
var compress_one_fn = function(func, captured_slots) {
|
||||
var instructions = func.instructions
|
||||
var nr_slots = func.nr_slots
|
||||
var nr_args = func.nr_args != null ? func.nr_args : 0
|
||||
var n = 0
|
||||
var pinned = 0
|
||||
var first_ref = null
|
||||
var last_ref = null
|
||||
var i = 0
|
||||
var j = 0
|
||||
var k = 0
|
||||
var s = 0
|
||||
var instr = null
|
||||
var refs = null
|
||||
var op = null
|
||||
var target = null
|
||||
var tpos = 0
|
||||
var changed = false
|
||||
var label_map = null
|
||||
var live_slots = null
|
||||
var live_first = null
|
||||
var live_last = null
|
||||
var cnt = 0
|
||||
var key_s = 0
|
||||
var key_f = 0
|
||||
var key_l = 0
|
||||
var remap = null
|
||||
var pool = null
|
||||
var next_phys = 0
|
||||
var active_phys = null
|
||||
var active_last = null
|
||||
var phys = 0
|
||||
var mi = 0
|
||||
var new_max = 0
|
||||
var old_val = 0
|
||||
var new_active_phys = null
|
||||
var new_active_last = null
|
||||
var new_pool = null
|
||||
|
||||
if (instructions == null || !is_number(nr_slots) || nr_slots <= 1) return null
|
||||
n = length(instructions)
|
||||
pinned = 1 + nr_args
|
||||
|
||||
// Step 1: build live ranges
|
||||
first_ref = array(nr_slots, -1)
|
||||
last_ref = array(nr_slots, -1)
|
||||
|
||||
// Pin this + args
|
||||
k = 0
|
||||
while (k < pinned) {
|
||||
first_ref[k] = 0
|
||||
last_ref[k] = n
|
||||
k = k + 1
|
||||
}
|
||||
|
||||
// Scan instructions for slot references
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (is_array(instr)) {
|
||||
refs = get_slot_refs(instr)
|
||||
j = 0
|
||||
while (j < length(refs)) {
|
||||
s = instr[refs[j]]
|
||||
if (is_number(s) && s >= 0 && s < nr_slots) {
|
||||
if (first_ref[s] < 0) first_ref[s] = i
|
||||
last_ref[s] = i
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
// Pin captured slots (AFTER scan so last_ref isn't overwritten)
|
||||
if (captured_slots != null) {
|
||||
k = 0
|
||||
while (k < length(captured_slots)) {
|
||||
s = captured_slots[k]
|
||||
if (s >= 0 && s < nr_slots) {
|
||||
if (first_ref[s] < 0) first_ref[s] = 0
|
||||
last_ref[s] = n
|
||||
}
|
||||
k = k + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1b: extend for backward jumps (loops)
|
||||
label_map = {}
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (is_text(instr) && !starts_with(instr, "_nop_")) {
|
||||
label_map[instr] = i
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
changed = true
|
||||
while (changed) {
|
||||
changed = false
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (!is_array(instr)) {
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
op = instr[0]
|
||||
target = null
|
||||
if (op == "jump") {
|
||||
target = instr[1]
|
||||
} else if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
|
||||
target = instr[2]
|
||||
}
|
||||
if (target == null || !is_text(target)) {
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
tpos = label_map[target]
|
||||
if (tpos == null || tpos >= i) {
|
||||
i = i + 1
|
||||
continue
|
||||
}
|
||||
// Backward jump: extend slots live into loop
|
||||
s = pinned
|
||||
while (s < nr_slots) {
|
||||
if (first_ref[s] >= 0 && first_ref[s] < tpos && last_ref[s] >= tpos && last_ref[s] < i) {
|
||||
last_ref[s] = i
|
||||
changed = true
|
||||
}
|
||||
s = s + 1
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: sort live intervals by first_ref
|
||||
live_slots = []
|
||||
live_first = []
|
||||
live_last = []
|
||||
s = pinned
|
||||
while (s < nr_slots) {
|
||||
if (first_ref[s] >= 0) {
|
||||
live_slots[] = s
|
||||
live_first[] = first_ref[s]
|
||||
live_last[] = last_ref[s]
|
||||
}
|
||||
s = s + 1
|
||||
}
|
||||
|
||||
cnt = length(live_slots)
|
||||
i = 1
|
||||
while (i < cnt) {
|
||||
key_s = live_slots[i]
|
||||
key_f = live_first[i]
|
||||
key_l = live_last[i]
|
||||
j = i - 1
|
||||
while (j >= 0 && (live_first[j] > key_f || (live_first[j] == key_f && live_slots[j] > key_s))) {
|
||||
live_slots[j + 1] = live_slots[j]
|
||||
live_first[j + 1] = live_first[j]
|
||||
live_last[j + 1] = live_last[j]
|
||||
j = j - 1
|
||||
}
|
||||
live_slots[j + 1] = key_s
|
||||
live_first[j + 1] = key_f
|
||||
live_last[j + 1] = key_l
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
// Linear-scan allocation
|
||||
remap = array(nr_slots)
|
||||
s = 0
|
||||
while (s < nr_slots) {
|
||||
remap[s] = s
|
||||
s = s + 1
|
||||
}
|
||||
|
||||
pool = []
|
||||
next_phys = pinned
|
||||
active_phys = []
|
||||
active_last = []
|
||||
|
||||
i = 0
|
||||
while (i < cnt) {
|
||||
// Expire intervals whose last < live_first[i]
|
||||
new_active_phys = []
|
||||
new_active_last = []
|
||||
j = 0
|
||||
while (j < length(active_phys)) {
|
||||
if (active_last[j] < live_first[i]) {
|
||||
pool[] = active_phys[j]
|
||||
} else {
|
||||
new_active_phys[] = active_phys[j]
|
||||
new_active_last[] = active_last[j]
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
active_phys = new_active_phys
|
||||
active_last = new_active_last
|
||||
|
||||
// Pick lowest available physical register
|
||||
if (length(pool) > 0) {
|
||||
mi = 0
|
||||
j = 1
|
||||
while (j < length(pool)) {
|
||||
if (pool[j] < pool[mi]) mi = j
|
||||
j = j + 1
|
||||
}
|
||||
phys = pool[mi]
|
||||
new_pool = []
|
||||
j = 0
|
||||
while (j < length(pool)) {
|
||||
if (j != mi) new_pool[] = pool[j]
|
||||
j = j + 1
|
||||
}
|
||||
pool = new_pool
|
||||
} else {
|
||||
phys = next_phys
|
||||
next_phys = next_phys + 1
|
||||
}
|
||||
|
||||
remap[live_slots[i]] = phys
|
||||
active_phys[] = phys
|
||||
active_last[] = live_last[i]
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
// Compute new nr_slots
|
||||
new_max = pinned
|
||||
s = 0
|
||||
while (s < nr_slots) {
|
||||
if (first_ref[s] >= 0 && remap[s] >= new_max) {
|
||||
new_max = remap[s] + 1
|
||||
}
|
||||
s = s + 1
|
||||
}
|
||||
|
||||
if (new_max >= nr_slots) return null
|
||||
|
||||
// Step 3: apply remap to instructions
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (is_array(instr)) {
|
||||
refs = get_slot_refs(instr)
|
||||
j = 0
|
||||
while (j < length(refs)) {
|
||||
old_val = instr[refs[j]]
|
||||
if (is_number(old_val) && old_val >= 0 && old_val < nr_slots) {
|
||||
instr[refs[j]] = remap[old_val]
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
func.nr_slots = new_max
|
||||
return remap
|
||||
}
|
||||
|
||||
var compress_slots = function(ir) {
|
||||
if (ir == null || ir.main == null) return null
|
||||
var functions = ir.functions != null ? ir.functions : []
|
||||
var func_count = length(functions)
|
||||
var parent_of = null
|
||||
var captured = null
|
||||
var remaps = null
|
||||
var remap_sizes = null
|
||||
var instrs = null
|
||||
var instr = null
|
||||
var child_idx = 0
|
||||
var parent_slot = 0
|
||||
var level = 0
|
||||
var ancestor = 0
|
||||
var caps = null
|
||||
var found = false
|
||||
var anc_remap = null
|
||||
var old_slot = 0
|
||||
var fi = 0
|
||||
var i = 0
|
||||
var j = 0
|
||||
var k = 0
|
||||
|
||||
// Build parent_of: parent_of[i] = parent index, func_count = main
|
||||
parent_of = array(func_count, -1)
|
||||
|
||||
// Scan main for function instructions
|
||||
if (ir.main != null && ir.main.instructions != null) {
|
||||
instrs = ir.main.instructions
|
||||
i = 0
|
||||
while (i < length(instrs)) {
|
||||
instr = instrs[i]
|
||||
if (is_array(instr) && instr[0] == "function") {
|
||||
child_idx = instr[2]
|
||||
if (child_idx >= 0 && child_idx < func_count) {
|
||||
parent_of[child_idx] = func_count
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Scan each function for function instructions
|
||||
fi = 0
|
||||
while (fi < func_count) {
|
||||
instrs = functions[fi].instructions
|
||||
if (instrs != null) {
|
||||
i = 0
|
||||
while (i < length(instrs)) {
|
||||
instr = instrs[i]
|
||||
if (is_array(instr) && instr[0] == "function") {
|
||||
child_idx = instr[2]
|
||||
if (child_idx >= 0 && child_idx < func_count) {
|
||||
parent_of[child_idx] = fi
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
fi = fi + 1
|
||||
}
|
||||
|
||||
// Build captured slots per function
|
||||
captured = array(func_count + 1)
|
||||
i = 0
|
||||
while (i < func_count + 1) {
|
||||
captured[i] = []
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
fi = 0
|
||||
while (fi < func_count) {
|
||||
instrs = functions[fi].instructions
|
||||
if (instrs != null) {
|
||||
i = 0
|
||||
while (i < length(instrs)) {
|
||||
instr = instrs[i]
|
||||
if (is_array(instr) && (instr[0] == "get" || instr[0] == "put")) {
|
||||
parent_slot = instr[2]
|
||||
level = instr[3]
|
||||
ancestor = fi
|
||||
j = 0
|
||||
while (j < level && ancestor >= 0) {
|
||||
ancestor = parent_of[ancestor]
|
||||
j = j + 1
|
||||
}
|
||||
if (ancestor >= 0) {
|
||||
caps = captured[ancestor]
|
||||
found = false
|
||||
k = 0
|
||||
while (k < length(caps)) {
|
||||
if (caps[k] == parent_slot) {
|
||||
found = true
|
||||
k = length(caps)
|
||||
}
|
||||
k = k + 1
|
||||
}
|
||||
if (!found) caps[] = parent_slot
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
fi = fi + 1
|
||||
}
|
||||
|
||||
// Compress each function and save remap tables
|
||||
remaps = array(func_count + 1)
|
||||
remap_sizes = array(func_count + 1, 0)
|
||||
|
||||
fi = 0
|
||||
while (fi < func_count) {
|
||||
remap_sizes[fi] = functions[fi].nr_slots
|
||||
remaps[fi] = compress_one_fn(functions[fi], captured[fi])
|
||||
fi = fi + 1
|
||||
}
|
||||
|
||||
if (ir.main != null) {
|
||||
remap_sizes[func_count] = ir.main.nr_slots
|
||||
remaps[func_count] = compress_one_fn(ir.main, captured[func_count])
|
||||
}
|
||||
|
||||
// Fix get/put parent_slot references using ancestor remap tables
|
||||
fi = 0
|
||||
while (fi < func_count) {
|
||||
instrs = functions[fi].instructions
|
||||
if (instrs != null) {
|
||||
i = 0
|
||||
while (i < length(instrs)) {
|
||||
instr = instrs[i]
|
||||
if (is_array(instr) && (instr[0] == "get" || instr[0] == "put")) {
|
||||
level = instr[3]
|
||||
ancestor = fi
|
||||
j = 0
|
||||
while (j < level && ancestor >= 0) {
|
||||
ancestor = parent_of[ancestor]
|
||||
j = j + 1
|
||||
}
|
||||
if (ancestor >= 0 && remaps[ancestor] != null) {
|
||||
anc_remap = remaps[ancestor]
|
||||
old_slot = instr[2]
|
||||
if (old_slot >= 0 && old_slot < remap_sizes[ancestor]) {
|
||||
instr[2] = anc_remap[old_slot]
|
||||
}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
fi = fi + 1
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Compose all passes
|
||||
// =========================================================
|
||||
@@ -1090,6 +1545,9 @@ var streamline = function(ir, log) {
|
||||
}
|
||||
}
|
||||
|
||||
// Compress slots across all functions (must run after per-function passes)
|
||||
compress_slots(ir)
|
||||
|
||||
return ir
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user