fix merge error
This commit is contained in:
74
boot_miscompile_bad.cm
Normal file
74
boot_miscompile_bad.cm
Normal file
@@ -0,0 +1,74 @@
|
||||
// boot_miscompile_bad.cm — Documents a boot compiler miscompilation bug.
|
||||
//
|
||||
// BUG SUMMARY:
|
||||
// The boot compiler's optimizer (likely compress_slots, eliminate_moves,
|
||||
// or infer_param_types) miscompiles a specific pattern when it appears
|
||||
// inside streamline.cm. The pattern: an array-loaded value used as a
|
||||
// dynamic index for another array store, inside a guarded block:
|
||||
//
|
||||
// sv = instr[j]
|
||||
// if (is_number(sv) && sv >= 0 && sv < nr_slots) {
|
||||
// last_ref[sv] = i // <-- miscompiled: sv reads wrong slot
|
||||
// }
|
||||
//
|
||||
// The bug is CONTEXT-DEPENDENT on streamline.cm's exact function/closure
|
||||
// structure. A standalone module with the same pattern does NOT trigger it.
|
||||
// The boot optimizer's cross-function analysis (infer_param_types, type
|
||||
// propagation, etc.) makes different decisions in the full streamline.cm
|
||||
// context, leading to the miscompilation.
|
||||
//
|
||||
// SYMPTOMS:
|
||||
// - 'log' is not defined (comparison error path fires on non-comparable values)
|
||||
// - array index must be a number (store_dynamic with corrupted index)
|
||||
// - Error line has NO reference to 'log' — the reference comes from the
|
||||
// error-reporting code path of the < operator
|
||||
// - Non-deterministic: different error messages on different runs
|
||||
// - NOT a GC bug: persists with --heap 4GB
|
||||
// - NOT slot overflow: function has only 85 raw slots
|
||||
//
|
||||
// TO REPRODUCE:
|
||||
// In streamline.cm, replace the build_slot_liveness function body with
|
||||
// this version (raw operand scanning instead of get_slot_refs):
|
||||
//
|
||||
// var build_slot_liveness = function(instructions, nr_slots) {
|
||||
// var last_ref = array(nr_slots, -1)
|
||||
// var n = length(instructions)
|
||||
// var i = 0
|
||||
// var j = 0
|
||||
// var limit = 0
|
||||
// var sv = 0
|
||||
// var instr = null
|
||||
//
|
||||
// while (i < n) {
|
||||
// instr = instructions[i]
|
||||
// if (is_array(instr)) {
|
||||
// j = 1
|
||||
// limit = length(instr) - 2
|
||||
// while (j < limit) {
|
||||
// sv = instr[j]
|
||||
// if (is_number(sv) && sv >= 0 && sv < nr_slots) {
|
||||
// last_ref[sv] = i
|
||||
// }
|
||||
// j = j + 1
|
||||
// }
|
||||
// }
|
||||
// i = i + 1
|
||||
// }
|
||||
// return last_ref
|
||||
// }
|
||||
//
|
||||
// Then: rm -rf .cell/build && ./cell --dev vm_suite
|
||||
//
|
||||
// WORKAROUND:
|
||||
// Use get_slot_refs(instr) to iterate only over known slot-reference
|
||||
// positions. This produces different IR that the boot optimizer handles
|
||||
// correctly, and is also more semantically correct.
|
||||
//
|
||||
// FIXING:
|
||||
// To find the root cause, compare the boot-compiled bytecodes of
|
||||
// build_slot_liveness (in the full streamline.cm context) vs the
|
||||
// source-compiled bytecodes. Use disasm.ce with --optimized to see
|
||||
// what the source compiler produces. The boot-compiled bytecodes
|
||||
// would need a C-level MachCode dump to inspect.
|
||||
|
||||
return null
|
||||
32
mcode.cm
32
mcode.cm
@@ -339,10 +339,6 @@ var mcode = function(ast) {
|
||||
return t == "num" || t == "int"
|
||||
}
|
||||
|
||||
var slot_is_int = function(slot) {
|
||||
return s_slot_types[text(slot)] == "int"
|
||||
}
|
||||
|
||||
var slot_is_text = function(slot) {
|
||||
return s_slot_types[text(slot)] == "text"
|
||||
}
|
||||
@@ -351,6 +347,10 @@ var mcode = function(ast) {
|
||||
s_slot_types[text(slot)] = typ
|
||||
}
|
||||
|
||||
var propagate_slot = function(dest, src) {
|
||||
s_slot_types[text(dest)] = s_slot_types[text(src)]
|
||||
}
|
||||
|
||||
// emit_add_decomposed: emit type-dispatched add (text → concat, num → add)
|
||||
// reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
|
||||
var emit_add_decomposed = function() {
|
||||
@@ -444,12 +444,6 @@ var mcode = function(ast) {
|
||||
var t0 = 0
|
||||
var t1 = 0
|
||||
|
||||
// Known-int fast path
|
||||
if ((is_known_int(_bp_ln) || slot_is_int(left))
|
||||
&& (is_known_int(_bp_rn) || slot_is_int(right))) {
|
||||
emit_3("eq_int", dest, left, right)
|
||||
return null
|
||||
}
|
||||
// Known-num fast path
|
||||
if ((is_known_number(_bp_ln) || slot_is_num(left))
|
||||
&& (is_known_number(_bp_rn) || slot_is_num(right))) {
|
||||
@@ -536,12 +530,6 @@ var mcode = function(ast) {
|
||||
var t0 = 0
|
||||
var t1 = 0
|
||||
|
||||
// Known-int fast path
|
||||
if ((is_known_int(_bp_ln) || slot_is_int(left))
|
||||
&& (is_known_int(_bp_rn) || slot_is_int(right))) {
|
||||
emit_3("ne_int", dest, left, right)
|
||||
return null
|
||||
}
|
||||
// Known-num fast path
|
||||
if ((is_known_number(_bp_ln) || slot_is_num(left))
|
||||
&& (is_known_number(_bp_rn) || slot_is_num(right))) {
|
||||
@@ -636,24 +624,17 @@ var mcode = function(ast) {
|
||||
var right = _bp_right
|
||||
var t0 = 0
|
||||
var t1 = 0
|
||||
var left_is_int = is_known_int(_bp_ln) || slot_is_int(left)
|
||||
var left_is_num = is_known_number(_bp_ln) || slot_is_num(left)
|
||||
var left_is_text = is_known_text(_bp_ln) || slot_is_text(left)
|
||||
var right_is_int = is_known_int(_bp_rn) || slot_is_int(right)
|
||||
var right_is_num = is_known_number(_bp_rn) || slot_is_num(right)
|
||||
var right_is_text = is_known_text(_bp_rn) || slot_is_text(right)
|
||||
var not_num = null
|
||||
var done = null
|
||||
var err = null
|
||||
|
||||
// Both known int
|
||||
if (left_is_int && right_is_int) {
|
||||
emit_3(int_op, dest, left, right)
|
||||
return null
|
||||
}
|
||||
// Both known number
|
||||
if (left_is_num && right_is_num) {
|
||||
emit_3(float_op, dest, left, right)
|
||||
emit_3(poly_op, dest, left, right)
|
||||
return null
|
||||
}
|
||||
// Both known text
|
||||
@@ -1645,6 +1626,7 @@ var mcode = function(ast) {
|
||||
local = find_var(name)
|
||||
if (local >= 0) {
|
||||
emit_2("move", local, dest)
|
||||
propagate_slot(local, dest)
|
||||
}
|
||||
} else if (level > 0) {
|
||||
_lv = level - 1
|
||||
@@ -1744,9 +1726,11 @@ var mcode = function(ast) {
|
||||
if (level == 0 || level == -1) {
|
||||
slot = find_var(name)
|
||||
if (slot >= 0) {
|
||||
mark_slot(slot, null)
|
||||
val_slot = gen_expr(right, slot)
|
||||
if (val_slot != slot) {
|
||||
emit_2("move", slot, val_slot)
|
||||
propagate_slot(slot, val_slot)
|
||||
}
|
||||
return val_slot
|
||||
}
|
||||
|
||||
8
parse.cm
8
parse.cm
@@ -1627,6 +1627,10 @@ var parse = function(tokens, src, filename, tokenizer) {
|
||||
if (r.v != null) {
|
||||
left_node.level = r.level
|
||||
left_node.function_nr = r.def_function_nr
|
||||
if (r.level > 0) {
|
||||
r.v.nr_uses = r.v.nr_uses + 1
|
||||
r.v.closure = 1
|
||||
}
|
||||
} else {
|
||||
left_node.level = -1
|
||||
}
|
||||
@@ -1718,6 +1722,10 @@ var parse = function(tokens, src, filename, tokenizer) {
|
||||
if (r.v != null) {
|
||||
operand.level = r.level
|
||||
operand.function_nr = r.def_function_nr
|
||||
if (r.level > 0) {
|
||||
r.v.nr_uses = r.v.nr_uses + 1
|
||||
r.v.closure = 1
|
||||
}
|
||||
} else {
|
||||
operand.level = -1
|
||||
}
|
||||
|
||||
@@ -3008,10 +3008,10 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
|
||||
if (s.nr_slots > 255) {
|
||||
cJSON *nm_chk = cJSON_GetObjectItemCaseSensitive(fobj, "name");
|
||||
const char *fn_name = nm_chk ? cJSON_GetStringValue(nm_chk) : "<anonymous>";
|
||||
fprintf(stderr, "ERROR: function '%s' has %d slots (max 255). "
|
||||
fprintf(stderr, "FATAL: function '%s' has %d slots (max 255). "
|
||||
"Ensure the streamline optimizer ran before mach compilation.\n",
|
||||
fn_name, s.nr_slots);
|
||||
return NULL;
|
||||
abort();
|
||||
}
|
||||
int dis_raw = (int)cJSON_GetNumberValue(
|
||||
cJSON_GetObjectItemCaseSensitive(fobj, "disruption_pc"));
|
||||
|
||||
@@ -1370,7 +1370,7 @@ JSValue gc_copy_value (JSContext *ctx, JSValue v, uint8_t *from_base, uint8_t *f
|
||||
|
||||
/* Frame shortening: returned frames (caller == JS_NULL) only need
|
||||
[this][args][closure_locals] — shrink during copy. */
|
||||
if (0 && type == OBJ_FRAME) {
|
||||
if (type == OBJ_FRAME) {
|
||||
JSFrame *f = (JSFrame *)hdr_ptr;
|
||||
if (JS_IsNull (f->caller) && JS_IsPtr (f->function)) {
|
||||
/* fn may be forwarded, but kind (offset 18) and u.cell.code (offset 24)
|
||||
|
||||
@@ -1305,6 +1305,85 @@ var streamline = function(ir, log) {
|
||||
push: 2, setarg: 3, put: 1
|
||||
}
|
||||
|
||||
// Build last_ref liveness array for a function's instructions.
|
||||
// Returns array where last_ref[slot] = last instruction index referencing that slot.
|
||||
// Uses get_slot_refs to only visit actual slot reference positions.
|
||||
var build_slot_liveness = function(instructions, nr_slots) {
|
||||
var last_ref = array(nr_slots, -1)
|
||||
var n = length(instructions)
|
||||
var refs = null
|
||||
var i = 0
|
||||
var j = 0
|
||||
var s = 0
|
||||
var instr = null
|
||||
var label_map = null
|
||||
var changed = false
|
||||
var op = null
|
||||
var target = null
|
||||
var tpos = 0
|
||||
|
||||
// Scan instructions for slot references
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (is_array(instr)) {
|
||||
refs = get_slot_refs(instr)
|
||||
j = 0
|
||||
while (j < length(refs)) {
|
||||
s = instr[refs[j]]
|
||||
if (is_number(s) && s >= 0 && s < nr_slots) {
|
||||
last_ref[s] = i
|
||||
}
|
||||
j = j + 1
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
|
||||
// Extend for backward jumps (loops)
|
||||
label_map = {}
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (is_text(instr) && !starts_with(instr, "_nop_")) {
|
||||
label_map[instr] = i
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
changed = true
|
||||
while (changed) {
|
||||
changed = false
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
if (is_array(instr)) {
|
||||
target = null
|
||||
op = instr[0]
|
||||
if (op == "jump") {
|
||||
target = instr[1]
|
||||
} else if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
|
||||
target = instr[2]
|
||||
}
|
||||
if (target != null && is_text(target)) {
|
||||
tpos = label_map[target]
|
||||
if (tpos != null && tpos < i) {
|
||||
s = 0
|
||||
while (s < nr_slots) {
|
||||
if (last_ref[s] >= 0 && last_ref[s] >= tpos && last_ref[s] < i) {
|
||||
last_ref[s] = i
|
||||
changed = true
|
||||
}
|
||||
s = s + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
return last_ref
|
||||
}
|
||||
|
||||
var insert_stone_text = function(func, log) {
|
||||
var instructions = func.instructions
|
||||
var nr_slots = func.nr_slots
|
||||
@@ -1320,6 +1399,7 @@ var streamline = function(ir, log) {
|
||||
var slot = 0
|
||||
var nc = 0
|
||||
var shift = 0
|
||||
var last_ref = null
|
||||
|
||||
if (instructions == null || length(instructions) == 0) {
|
||||
return null
|
||||
@@ -1329,10 +1409,13 @@ var streamline = function(ir, log) {
|
||||
events = log.events
|
||||
}
|
||||
|
||||
// Build liveness info (in separate function to stay under slot limit)
|
||||
last_ref = build_slot_liveness(instructions, nr_slots)
|
||||
|
||||
// Walk instructions, tracking types, inserting stone_text
|
||||
n = length(instructions)
|
||||
slot_types = array(nr_slots, T_UNKNOWN)
|
||||
result = []
|
||||
n = length(instructions)
|
||||
i = 0
|
||||
while (i < n) {
|
||||
instr = instructions[i]
|
||||
@@ -1353,9 +1436,10 @@ var streamline = function(ir, log) {
|
||||
}
|
||||
}
|
||||
} else if (op == "move") {
|
||||
// Conservatively stone source before move if provably text
|
||||
// Stone source before move only if source is provably text
|
||||
// AND source slot is still live after this instruction
|
||||
slot = instr[2]
|
||||
if (is_number(slot) && slot_is(slot_types, slot, T_TEXT)) {
|
||||
if (is_number(slot) && slot_is(slot_types, slot, T_TEXT) && last_ref[slot] > i) {
|
||||
result[] = ["stone_text", slot]
|
||||
nc = nc + 1
|
||||
if (is_number(dpc) && i < dpc) shift = shift + 1
|
||||
|
||||
Reference in New Issue
Block a user