fix merge error

This commit is contained in:
2026-02-21 01:21:26 -06:00
parent 3ebd98fc00
commit 5caa5d1288
6 changed files with 180 additions and 30 deletions

74
boot_miscompile_bad.cm Normal file
View File

@@ -0,0 +1,74 @@
// boot_miscompile_bad.cm — Documents a boot compiler miscompilation bug.
//
// BUG SUMMARY:
// The boot compiler's optimizer (likely compress_slots, eliminate_moves,
// or infer_param_types) miscompiles a specific pattern when it appears
// inside streamline.cm. The pattern: an array-loaded value used as a
// dynamic index for another array store, inside a guarded block:
//
// sv = instr[j]
// if (is_number(sv) && sv >= 0 && sv < nr_slots) {
// last_ref[sv] = i // <-- miscompiled: sv reads wrong slot
// }
//
// The bug is CONTEXT-DEPENDENT on streamline.cm's exact function/closure
// structure. A standalone module with the same pattern does NOT trigger it.
// The boot optimizer's cross-function analysis (infer_param_types, type
// propagation, etc.) makes different decisions in the full streamline.cm
// context, leading to the miscompilation.
//
// SYMPTOMS:
// - 'log' is not defined (comparison error path fires on non-comparable values)
// - array index must be a number (store_dynamic with corrupted index)
// - Error line has NO reference to 'log' — the reference comes from the
// error-reporting code path of the < operator
// - Non-deterministic: different error messages on different runs
// - NOT a GC bug: persists with --heap 4GB
// - NOT slot overflow: function has only 85 raw slots
//
// TO REPRODUCE:
// In streamline.cm, replace the build_slot_liveness function body with
// this version (raw operand scanning instead of get_slot_refs):
//
// var build_slot_liveness = function(instructions, nr_slots) {
// var last_ref = array(nr_slots, -1)
// var n = length(instructions)
// var i = 0
// var j = 0
// var limit = 0
// var sv = 0
// var instr = null
//
// while (i < n) {
// instr = instructions[i]
// if (is_array(instr)) {
// j = 1
// limit = length(instr) - 2
// while (j < limit) {
// sv = instr[j]
// if (is_number(sv) && sv >= 0 && sv < nr_slots) {
// last_ref[sv] = i
// }
// j = j + 1
// }
// }
// i = i + 1
// }
// return last_ref
// }
//
// Then: rm -rf .cell/build && ./cell --dev vm_suite
//
// WORKAROUND:
// Use get_slot_refs(instr) to iterate only over known slot-reference
// positions. This produces different IR that the boot optimizer handles
// correctly, and is also more semantically correct.
//
// FIXING:
// To find the root cause, compare the boot-compiled bytecodes of
// build_slot_liveness (in the full streamline.cm context) vs the
// source-compiled bytecodes. Use disasm.ce with --optimized to see
// what the source compiler produces. The boot-compiled bytecodes
// would need a C-level MachCode dump to inspect.
return null

View File

@@ -339,10 +339,6 @@ var mcode = function(ast) {
return t == "num" || t == "int"
}
var slot_is_int = function(slot) {
return s_slot_types[text(slot)] == "int"
}
var slot_is_text = function(slot) {
return s_slot_types[text(slot)] == "text"
}
@@ -351,6 +347,10 @@ var mcode = function(ast) {
s_slot_types[text(slot)] = typ
}
var propagate_slot = function(dest, src) {
s_slot_types[text(dest)] = s_slot_types[text(src)]
}
// emit_add_decomposed: emit type-dispatched add (text → concat, num → add)
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
var emit_add_decomposed = function() {
@@ -444,12 +444,6 @@ var mcode = function(ast) {
var t0 = 0
var t1 = 0
// Known-int fast path
if ((is_known_int(_bp_ln) || slot_is_int(left))
&& (is_known_int(_bp_rn) || slot_is_int(right))) {
emit_3("eq_int", dest, left, right)
return null
}
// Known-num fast path
if ((is_known_number(_bp_ln) || slot_is_num(left))
&& (is_known_number(_bp_rn) || slot_is_num(right))) {
@@ -536,12 +530,6 @@ var mcode = function(ast) {
var t0 = 0
var t1 = 0
// Known-int fast path
if ((is_known_int(_bp_ln) || slot_is_int(left))
&& (is_known_int(_bp_rn) || slot_is_int(right))) {
emit_3("ne_int", dest, left, right)
return null
}
// Known-num fast path
if ((is_known_number(_bp_ln) || slot_is_num(left))
&& (is_known_number(_bp_rn) || slot_is_num(right))) {
@@ -636,24 +624,17 @@ var mcode = function(ast) {
var right = _bp_right
var t0 = 0
var t1 = 0
var left_is_int = is_known_int(_bp_ln) || slot_is_int(left)
var left_is_num = is_known_number(_bp_ln) || slot_is_num(left)
var left_is_text = is_known_text(_bp_ln) || slot_is_text(left)
var right_is_int = is_known_int(_bp_rn) || slot_is_int(right)
var right_is_num = is_known_number(_bp_rn) || slot_is_num(right)
var right_is_text = is_known_text(_bp_rn) || slot_is_text(right)
var not_num = null
var done = null
var err = null
// Both known int
if (left_is_int && right_is_int) {
emit_3(int_op, dest, left, right)
return null
}
// Both known number
if (left_is_num && right_is_num) {
emit_3(float_op, dest, left, right)
emit_3(poly_op, dest, left, right)
return null
}
// Both known text
@@ -1645,6 +1626,7 @@ var mcode = function(ast) {
local = find_var(name)
if (local >= 0) {
emit_2("move", local, dest)
propagate_slot(local, dest)
}
} else if (level > 0) {
_lv = level - 1
@@ -1744,9 +1726,11 @@ var mcode = function(ast) {
if (level == 0 || level == -1) {
slot = find_var(name)
if (slot >= 0) {
mark_slot(slot, null)
val_slot = gen_expr(right, slot)
if (val_slot != slot) {
emit_2("move", slot, val_slot)
propagate_slot(slot, val_slot)
}
return val_slot
}

View File

@@ -1627,6 +1627,10 @@ var parse = function(tokens, src, filename, tokenizer) {
if (r.v != null) {
left_node.level = r.level
left_node.function_nr = r.def_function_nr
if (r.level > 0) {
r.v.nr_uses = r.v.nr_uses + 1
r.v.closure = 1
}
} else {
left_node.level = -1
}
@@ -1718,6 +1722,10 @@ var parse = function(tokens, src, filename, tokenizer) {
if (r.v != null) {
operand.level = r.level
operand.function_nr = r.def_function_nr
if (r.level > 0) {
r.v.nr_uses = r.v.nr_uses + 1
r.v.closure = 1
}
} else {
operand.level = -1
}

View File

@@ -3008,10 +3008,10 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
if (s.nr_slots > 255) {
cJSON *nm_chk = cJSON_GetObjectItemCaseSensitive(fobj, "name");
const char *fn_name = nm_chk ? cJSON_GetStringValue(nm_chk) : "<anonymous>";
fprintf(stderr, "ERROR: function '%s' has %d slots (max 255). "
fprintf(stderr, "FATAL: function '%s' has %d slots (max 255). "
"Ensure the streamline optimizer ran before mach compilation.\n",
fn_name, s.nr_slots);
return NULL;
abort();
}
int dis_raw = (int)cJSON_GetNumberValue(
cJSON_GetObjectItemCaseSensitive(fobj, "disruption_pc"));

View File

@@ -1370,7 +1370,7 @@ JSValue gc_copy_value (JSContext *ctx, JSValue v, uint8_t *from_base, uint8_t *f
/* Frame shortening: returned frames (caller == JS_NULL) only need
[this][args][closure_locals] — shrink during copy. */
if (0 && type == OBJ_FRAME) {
if (type == OBJ_FRAME) {
JSFrame *f = (JSFrame *)hdr_ptr;
if (JS_IsNull (f->caller) && JS_IsPtr (f->function)) {
/* fn may be forwarded, but kind (offset 18) and u.cell.code (offset 24)

View File

@@ -1305,6 +1305,85 @@ var streamline = function(ir, log) {
push: 2, setarg: 3, put: 1
}
// Build last_ref liveness array for a function's instructions.
// Returns array where last_ref[slot] = last instruction index referencing that slot.
// Uses get_slot_refs to only visit actual slot reference positions.
var build_slot_liveness = function(instructions, nr_slots) {
var last_ref = array(nr_slots, -1)
var n = length(instructions)
var refs = null
var i = 0
var j = 0
var s = 0
var instr = null
var label_map = null
var changed = false
var op = null
var target = null
var tpos = 0
// Scan instructions for slot references
while (i < n) {
instr = instructions[i]
if (is_array(instr)) {
refs = get_slot_refs(instr)
j = 0
while (j < length(refs)) {
s = instr[refs[j]]
if (is_number(s) && s >= 0 && s < nr_slots) {
last_ref[s] = i
}
j = j + 1
}
}
i = i + 1
}
// Extend for backward jumps (loops)
label_map = {}
i = 0
while (i < n) {
instr = instructions[i]
if (is_text(instr) && !starts_with(instr, "_nop_")) {
label_map[instr] = i
}
i = i + 1
}
changed = true
while (changed) {
changed = false
i = 0
while (i < n) {
instr = instructions[i]
if (is_array(instr)) {
target = null
op = instr[0]
if (op == "jump") {
target = instr[1]
} else if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
target = instr[2]
}
if (target != null && is_text(target)) {
tpos = label_map[target]
if (tpos != null && tpos < i) {
s = 0
while (s < nr_slots) {
if (last_ref[s] >= 0 && last_ref[s] >= tpos && last_ref[s] < i) {
last_ref[s] = i
changed = true
}
s = s + 1
}
}
}
}
i = i + 1
}
}
return last_ref
}
var insert_stone_text = function(func, log) {
var instructions = func.instructions
var nr_slots = func.nr_slots
@@ -1320,6 +1399,7 @@ var streamline = function(ir, log) {
var slot = 0
var nc = 0
var shift = 0
var last_ref = null
if (instructions == null || length(instructions) == 0) {
return null
@@ -1329,10 +1409,13 @@ var streamline = function(ir, log) {
events = log.events
}
// Build liveness info (in separate function to stay under slot limit)
last_ref = build_slot_liveness(instructions, nr_slots)
// Walk instructions, tracking types, inserting stone_text
n = length(instructions)
slot_types = array(nr_slots, T_UNKNOWN)
result = []
n = length(instructions)
i = 0
while (i < n) {
instr = instructions[i]
@@ -1353,9 +1436,10 @@ var streamline = function(ir, log) {
}
}
} else if (op == "move") {
// Conservatively stone source before move if provably text
// Stone source before move only if source is provably text
// AND source slot is still live after this instruction
slot = instr[2]
if (is_number(slot) && slot_is(slot_types, slot, T_TEXT)) {
if (is_number(slot) && slot_is(slot_types, slot, T_TEXT) && last_ref[slot] > i) {
result[] = ["stone_text", slot]
nc = nc + 1
if (is_number(dpc) && i < dpc) shift = shift + 1