From 5caa5d1288c6d59629b0a8dbbd4aaadb2613f5a5 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Sat, 21 Feb 2026 01:21:26 -0600 Subject: [PATCH] fix merge error --- boot_miscompile_bad.cm | 74 ++++++++++++++++++++++++++++++++++ mcode.cm | 32 ++++----------- parse.cm | 8 ++++ source/mach.c | 4 +- source/runtime.c | 2 +- streamline.cm | 90 ++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 180 insertions(+), 30 deletions(-) create mode 100644 boot_miscompile_bad.cm diff --git a/boot_miscompile_bad.cm b/boot_miscompile_bad.cm new file mode 100644 index 00000000..9e33f0bd --- /dev/null +++ b/boot_miscompile_bad.cm @@ -0,0 +1,74 @@ +// boot_miscompile_bad.cm — Documents a boot compiler miscompilation bug. +// +// BUG SUMMARY: +// The boot compiler's optimizer (likely compress_slots, eliminate_moves, +// or infer_param_types) miscompiles a specific pattern when it appears +// inside streamline.cm. The pattern: an array-loaded value used as a +// dynamic index for another array store, inside a guarded block: +// +// sv = instr[j] +// if (is_number(sv) && sv >= 0 && sv < nr_slots) { +// last_ref[sv] = i // <-- miscompiled: sv reads wrong slot +// } +// +// The bug is CONTEXT-DEPENDENT on streamline.cm's exact function/closure +// structure. A standalone module with the same pattern does NOT trigger it. +// The boot optimizer's cross-function analysis (infer_param_types, type +// propagation, etc.) makes different decisions in the full streamline.cm +// context, leading to the miscompilation. +// +// SYMPTOMS: +// - 'log' is not defined (comparison error path fires on non-comparable values) +// - array index must be a number (store_dynamic with corrupted index) +// - Error line has NO reference to 'log' — the reference comes from the +// error-reporting code path of the < operator +// - Non-deterministic: different error messages on different runs +// - NOT a GC bug: persists with --heap 4GB +// - NOT slot overflow: function has only 85 raw slots +// +// TO REPRODUCE: +// In streamline.cm, replace the build_slot_liveness function body with +// this version (raw operand scanning instead of get_slot_refs): +// +// var build_slot_liveness = function(instructions, nr_slots) { +// var last_ref = array(nr_slots, -1) +// var n = length(instructions) +// var i = 0 +// var j = 0 +// var limit = 0 +// var sv = 0 +// var instr = null +// +// while (i < n) { +// instr = instructions[i] +// if (is_array(instr)) { +// j = 1 +// limit = length(instr) - 2 +// while (j < limit) { +// sv = instr[j] +// if (is_number(sv) && sv >= 0 && sv < nr_slots) { +// last_ref[sv] = i +// } +// j = j + 1 +// } +// } +// i = i + 1 +// } +// return last_ref +// } +// +// Then: rm -rf .cell/build && ./cell --dev vm_suite +// +// WORKAROUND: +// Use get_slot_refs(instr) to iterate only over known slot-reference +// positions. This produces different IR that the boot optimizer handles +// correctly, and is also more semantically correct. +// +// FIXING: +// To find the root cause, compare the boot-compiled bytecodes of +// build_slot_liveness (in the full streamline.cm context) vs the +// source-compiled bytecodes. Use disasm.ce with --optimized to see +// what the source compiler produces. The boot-compiled bytecodes +// would need a C-level MachCode dump to inspect. + +return null diff --git a/mcode.cm b/mcode.cm index e885357a..b0e3e4f1 100644 --- a/mcode.cm +++ b/mcode.cm @@ -339,10 +339,6 @@ var mcode = function(ast) { return t == "num" || t == "int" } - var slot_is_int = function(slot) { - return s_slot_types[text(slot)] == "int" - } - var slot_is_text = function(slot) { return s_slot_types[text(slot)] == "text" } @@ -351,6 +347,10 @@ var mcode = function(ast) { s_slot_types[text(slot)] = typ } + var propagate_slot = function(dest, src) { + s_slot_types[text(dest)] = s_slot_types[text(src)] + } + // emit_add_decomposed: emit type-dispatched add (text → concat, num → add) // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure var emit_add_decomposed = function() { @@ -444,12 +444,6 @@ var mcode = function(ast) { var t0 = 0 var t1 = 0 - // Known-int fast path - if ((is_known_int(_bp_ln) || slot_is_int(left)) - && (is_known_int(_bp_rn) || slot_is_int(right))) { - emit_3("eq_int", dest, left, right) - return null - } // Known-num fast path if ((is_known_number(_bp_ln) || slot_is_num(left)) && (is_known_number(_bp_rn) || slot_is_num(right))) { @@ -536,12 +530,6 @@ var mcode = function(ast) { var t0 = 0 var t1 = 0 - // Known-int fast path - if ((is_known_int(_bp_ln) || slot_is_int(left)) - && (is_known_int(_bp_rn) || slot_is_int(right))) { - emit_3("ne_int", dest, left, right) - return null - } // Known-num fast path if ((is_known_number(_bp_ln) || slot_is_num(left)) && (is_known_number(_bp_rn) || slot_is_num(right))) { @@ -636,24 +624,17 @@ var mcode = function(ast) { var right = _bp_right var t0 = 0 var t1 = 0 - var left_is_int = is_known_int(_bp_ln) || slot_is_int(left) var left_is_num = is_known_number(_bp_ln) || slot_is_num(left) var left_is_text = is_known_text(_bp_ln) || slot_is_text(left) - var right_is_int = is_known_int(_bp_rn) || slot_is_int(right) var right_is_num = is_known_number(_bp_rn) || slot_is_num(right) var right_is_text = is_known_text(_bp_rn) || slot_is_text(right) var not_num = null var done = null var err = null - // Both known int - if (left_is_int && right_is_int) { - emit_3(int_op, dest, left, right) - return null - } // Both known number if (left_is_num && right_is_num) { - emit_3(float_op, dest, left, right) + emit_3(poly_op, dest, left, right) return null } // Both known text @@ -1645,6 +1626,7 @@ var mcode = function(ast) { local = find_var(name) if (local >= 0) { emit_2("move", local, dest) + propagate_slot(local, dest) } } else if (level > 0) { _lv = level - 1 @@ -1744,9 +1726,11 @@ var mcode = function(ast) { if (level == 0 || level == -1) { slot = find_var(name) if (slot >= 0) { + mark_slot(slot, null) val_slot = gen_expr(right, slot) if (val_slot != slot) { emit_2("move", slot, val_slot) + propagate_slot(slot, val_slot) } return val_slot } diff --git a/parse.cm b/parse.cm index 01e8f577..cb70ea66 100644 --- a/parse.cm +++ b/parse.cm @@ -1627,6 +1627,10 @@ var parse = function(tokens, src, filename, tokenizer) { if (r.v != null) { left_node.level = r.level left_node.function_nr = r.def_function_nr + if (r.level > 0) { + r.v.nr_uses = r.v.nr_uses + 1 + r.v.closure = 1 + } } else { left_node.level = -1 } @@ -1718,6 +1722,10 @@ var parse = function(tokens, src, filename, tokenizer) { if (r.v != null) { operand.level = r.level operand.function_nr = r.def_function_nr + if (r.level > 0) { + r.v.nr_uses = r.v.nr_uses + 1 + r.v.closure = 1 + } } else { operand.level = -1 } diff --git a/source/mach.c b/source/mach.c index 7cf3b86b..2d3bb32d 100644 --- a/source/mach.c +++ b/source/mach.c @@ -3008,10 +3008,10 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) { if (s.nr_slots > 255) { cJSON *nm_chk = cJSON_GetObjectItemCaseSensitive(fobj, "name"); const char *fn_name = nm_chk ? cJSON_GetStringValue(nm_chk) : ""; - fprintf(stderr, "ERROR: function '%s' has %d slots (max 255). " + fprintf(stderr, "FATAL: function '%s' has %d slots (max 255). " "Ensure the streamline optimizer ran before mach compilation.\n", fn_name, s.nr_slots); - return NULL; + abort(); } int dis_raw = (int)cJSON_GetNumberValue( cJSON_GetObjectItemCaseSensitive(fobj, "disruption_pc")); diff --git a/source/runtime.c b/source/runtime.c index c5ec4e94..acc23613 100644 --- a/source/runtime.c +++ b/source/runtime.c @@ -1370,7 +1370,7 @@ JSValue gc_copy_value (JSContext *ctx, JSValue v, uint8_t *from_base, uint8_t *f /* Frame shortening: returned frames (caller == JS_NULL) only need [this][args][closure_locals] — shrink during copy. */ - if (0 && type == OBJ_FRAME) { + if (type == OBJ_FRAME) { JSFrame *f = (JSFrame *)hdr_ptr; if (JS_IsNull (f->caller) && JS_IsPtr (f->function)) { /* fn may be forwarded, but kind (offset 18) and u.cell.code (offset 24) diff --git a/streamline.cm b/streamline.cm index c290fe93..7ce577ec 100644 --- a/streamline.cm +++ b/streamline.cm @@ -1305,6 +1305,85 @@ var streamline = function(ir, log) { push: 2, setarg: 3, put: 1 } + // Build last_ref liveness array for a function's instructions. + // Returns array where last_ref[slot] = last instruction index referencing that slot. + // Uses get_slot_refs to only visit actual slot reference positions. + var build_slot_liveness = function(instructions, nr_slots) { + var last_ref = array(nr_slots, -1) + var n = length(instructions) + var refs = null + var i = 0 + var j = 0 + var s = 0 + var instr = null + var label_map = null + var changed = false + var op = null + var target = null + var tpos = 0 + + // Scan instructions for slot references + while (i < n) { + instr = instructions[i] + if (is_array(instr)) { + refs = get_slot_refs(instr) + j = 0 + while (j < length(refs)) { + s = instr[refs[j]] + if (is_number(s) && s >= 0 && s < nr_slots) { + last_ref[s] = i + } + j = j + 1 + } + } + i = i + 1 + } + + // Extend for backward jumps (loops) + label_map = {} + i = 0 + while (i < n) { + instr = instructions[i] + if (is_text(instr) && !starts_with(instr, "_nop_")) { + label_map[instr] = i + } + i = i + 1 + } + changed = true + while (changed) { + changed = false + i = 0 + while (i < n) { + instr = instructions[i] + if (is_array(instr)) { + target = null + op = instr[0] + if (op == "jump") { + target = instr[1] + } else if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") { + target = instr[2] + } + if (target != null && is_text(target)) { + tpos = label_map[target] + if (tpos != null && tpos < i) { + s = 0 + while (s < nr_slots) { + if (last_ref[s] >= 0 && last_ref[s] >= tpos && last_ref[s] < i) { + last_ref[s] = i + changed = true + } + s = s + 1 + } + } + } + } + i = i + 1 + } + } + + return last_ref + } + var insert_stone_text = function(func, log) { var instructions = func.instructions var nr_slots = func.nr_slots @@ -1320,6 +1399,7 @@ var streamline = function(ir, log) { var slot = 0 var nc = 0 var shift = 0 + var last_ref = null if (instructions == null || length(instructions) == 0) { return null @@ -1329,10 +1409,13 @@ var streamline = function(ir, log) { events = log.events } + // Build liveness info (in separate function to stay under slot limit) + last_ref = build_slot_liveness(instructions, nr_slots) + // Walk instructions, tracking types, inserting stone_text + n = length(instructions) slot_types = array(nr_slots, T_UNKNOWN) result = [] - n = length(instructions) i = 0 while (i < n) { instr = instructions[i] @@ -1353,9 +1436,10 @@ var streamline = function(ir, log) { } } } else if (op == "move") { - // Conservatively stone source before move if provably text + // Stone source before move only if source is provably text + // AND source slot is still live after this instruction slot = instr[2] - if (is_number(slot) && slot_is(slot_types, slot, T_TEXT)) { + if (is_number(slot) && slot_is(slot_types, slot, T_TEXT) && last_ref[slot] > i) { result[] = ["stone_text", slot] nc = nc + 1 if (is_number(dpc) && i < dpc) shift = shift + 1