diff --git a/build.cm b/build.cm index b181cc8e..b233b527 100644 --- a/build.cm +++ b/build.cm @@ -81,7 +81,7 @@ function content_hash(str) { } // Bump when native codegen/runtime ABI changes so stale dylibs are not reused. -def NATIVE_CACHE_VERSION = "native-v16" +def NATIVE_CACHE_VERSION = "native-v22" // Enable AOT ASan by creating .cell/asan_aot in the package root. function native_sanitize_flags() { diff --git a/qbe_emit.cm b/qbe_emit.cm index 43674cd6..a5e2734d 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -1130,6 +1130,15 @@ var qbe_emit = function(ir, qbe, export_name) { var truthy = null var lhs_d = null var rhs_d = null + var peek1 = null + var peek2 = null + var peek3 = null + var peek4 = null + var peek5 = null + var floor_frame_slot = 0 + var floor_this_slot = 0 + var floor_arg_slot = 0 + var floor_dest_slot = 0 // Pre-scan: count invoke/tail_invoke points to assign segment numbers. // Must skip dead code (instructions after terminators) the same way @@ -1141,15 +1150,43 @@ var qbe_emit = function(ir, qbe, export_name) { scan = instrs[si] si = si + 1 if (is_text(scan)) { + // Skip optimizer nop pseudo-labels entirely. + if (starts_with(scan, "_nop_")) continue label_pos[sanitize(scan)] = si - 1 - // Labels reset dead code state (unless they're nop pseudo-labels) - if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_")) - scan_dead = false + // Real labels reset dead code state. + scan_dead = false continue } if (scan_dead) continue if (!is_array(scan)) continue scan_op = scan[0] + + // Keep invoke segment counting consistent with main-loop peephole: + // inline floor intrinsic call sequence does not emit an invoke. + if (false && scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "floor") { + if (si + 4 < length(instrs)) { + peek1 = instrs[si] + peek2 = instrs[si + 1] + peek3 = instrs[si + 2] + peek4 = instrs[si + 3] + peek5 = instrs[si + 4] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + floor_frame_slot = peek1[1] + floor_this_slot = peek2[1] + if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot && + peek4[1] == floor_frame_slot && peek4[2] == 1 && + peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) { + si = si + 5 + continue + } + } + } + } + if (scan_op == "invoke") { invoke_count = invoke_count + 1 } @@ -1427,7 +1464,7 @@ var qbe_emit = function(ir, qbe, export_name) { // Labels are plain strings; skip nop pseudo-labels from streamline if (is_text(instr)) { - if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue + if (starts_with(instr, "_nop_")) continue lbl = sanitize(instr) if (!last_was_term) { emit(` jmp @${lbl}`) @@ -1446,6 +1483,47 @@ var qbe_emit = function(ir, qbe, export_name) { a3 = instr[3] last_was_term = false + // Peephole: inline `floor(x)` intrinsic call sequence + // access floor; frame; null this; setarg 0 this; setarg 1 x; invoke + if (false && op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "floor") { + if (instr_idx + 5 < length(instrs)) { + peek1 = instrs[instr_idx + 1] + peek2 = instrs[instr_idx + 2] + peek3 = instrs[instr_idx + 3] + peek4 = instrs[instr_idx + 4] + peek5 = instrs[instr_idx + 5] + if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 && + is_array(peek2) && peek2[0] == "null" && + is_array(peek3) && peek3[0] == "setarg" && + is_array(peek4) && peek4[0] == "setarg" && + is_array(peek5) && peek5[0] == "invoke") { + floor_frame_slot = peek1[1] + floor_this_slot = peek2[1] + if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot && + peek4[1] == floor_frame_slot && peek4[2] == 1 && + peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) { + floor_arg_slot = peek4[3] + floor_dest_slot = peek5[2] + v = s_read(floor_arg_slot) + p = fresh() + emit(` %${p}_is_num =w copy ${emit_is_num_w(v)}`) + emit(` jnz %${p}_is_num, @${p}_ok, @${p}_bad`) + emit(`@${p}_bad`) + s_write(floor_dest_slot, text(qbe.js_null)) + emit(` jmp @${p}_done`) + emit(`@${p}_ok`) + lhs_d = emit_num_to_double(v) + emit(` %${p}_fd =d call $floor(d ${lhs_d})`) + emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_fd)`) + s_write(floor_dest_slot, `%${p}_r`) + emit(`@${p}_done`) + i = instr_idx + 6 + continue + } + } + } + } + // --- Constants --- if (op == "int") { diff --git a/streamline.cm b/streamline.cm index 545a1862..16fcd8fb 100644 --- a/streamline.cm +++ b/streamline.cm @@ -250,8 +250,13 @@ var streamline = function(ir, log) { var param_types = null var i = 0 var j = 0 + var iter = 0 var instr = null var bt = null + var src = 0 + var dst = 0 + var old_bt = null + var changed = false var rule = null if (instructions == null || nr_args == 0) { @@ -275,6 +280,31 @@ var streamline = function(ir, log) { i = i + 1 } + // Propagate typed constraints backward through move chains. + changed = true + iter = 0 + while (changed && iter < num_instr + 4) { + changed = false + i = 0 + while (i < num_instr) { + instr = instructions[i] + if (is_array(instr) && instr[0] == "move") { + dst = instr[1] + src = instr[2] + bt = backward_types[dst] + if (bt != null && bt != T_UNKNOWN) { + old_bt = backward_types[src] + merge_backward(backward_types, src, bt) + if (backward_types[src] != old_bt) { + changed = true + } + } + } + i = i + 1 + } + iter = iter + 1 + } + param_types = array(func.nr_slots) j = 1 while (j <= nr_args) { @@ -325,16 +355,35 @@ var streamline = function(ir, log) { is_record: [1, T_BOOL], is_stone: [1, T_BOOL] } - var infer_slot_write_types = function(func) { + // Known intrinsic return types for invoke result inference. + var intrinsic_return_types = { + abs: T_NUM, floor: T_NUM, ceiling: T_NUM, + round: T_NUM, trunc: T_NUM, fraction: T_NUM, + integer: T_NUM, sign: T_NUM, + max: T_NUM, min: T_NUM + } + + var infer_slot_write_types = function(func, param_types) { var instructions = func.instructions var nr_args = func.nr_args != null ? func.nr_args : 0 var num_instr = 0 var write_types = null + var frame_callee = null + var intrinsic_slots = null + var move_dests = null + var move_srcs = null var i = 0 var k = 0 + var iter = 0 var instr = null + var op = null + var src = 0 var slot = 0 + var old_typ = null + var src_typ = null var typ = null + var callee_slot = null + var changed = false var rule = null var cw_keys = null @@ -344,11 +393,62 @@ var streamline = function(ir, log) { num_instr = length(instructions) write_types = array(func.nr_slots) + frame_callee = array(func.nr_slots) + intrinsic_slots = array(func.nr_slots) + move_dests = [] + move_srcs = [] i = 0 while (i < num_instr) { instr = instructions[i] if (is_array(instr)) { - rule = write_rules[instr[0]] + op = instr[0] + if (op == "access") { + slot = instr[1] + if (slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, access_value_type(instr[2])) + } + if (is_object(instr[2]) && instr[2].make == "intrinsic") { + typ = intrinsic_return_types[instr[2].name] + if (typ != null && slot >= 0 && slot < length(intrinsic_slots)) { + intrinsic_slots[slot] = typ + } + } + i = i + 1 + continue + } + if (op == "move") { + slot = instr[1] + if (slot > 0 && slot > nr_args) { + move_dests[] = slot + move_srcs[] = instr[2] + } + i = i + 1 + continue + } + if (op == "frame" || op == "goframe") { + if (is_number(instr[1]) && instr[1] >= 0 && instr[1] < length(frame_callee)) { + frame_callee[instr[1]] = instr[2] + } + i = i + 1 + continue + } + if (op == "invoke" || op == "tail_invoke") { + slot = instr[2] + typ = T_UNKNOWN + callee_slot = frame_callee[instr[1]] + if (is_number(callee_slot) && callee_slot >= 0 && callee_slot < length(intrinsic_slots)) { + if (intrinsic_slots[callee_slot] != null) { + typ = intrinsic_slots[callee_slot] + } + } + if (slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, typ) + } + i = i + 1 + continue + } + + rule = write_rules[op] if (rule != null) { slot = instr[rule[0]] typ = rule[1] @@ -363,6 +463,54 @@ var streamline = function(ir, log) { i = i + 1 } + // Resolve move writes from known source invariants (fixed-point). + changed = true + iter = 0 + while (changed && iter < length(write_types) + 4) { + changed = false + k = 0 + while (k < length(move_dests)) { + slot = move_dests[k] + src = move_srcs[k] + src_typ = null + if (is_number(src) && src >= 0) { + if (src < length(write_types) && write_types[src] != null) { + src_typ = write_types[src] + } else if (param_types != null && src < length(param_types) && param_types[src] != null) { + src_typ = param_types[src] + } + } + if (src_typ != null) { + old_typ = write_types[slot] + merge_backward(write_types, slot, src_typ) + if (write_types[slot] != old_typ) { + changed = true + } + } + k = k + 1 + } + iter = iter + 1 + } + + // Any remaining unresolved move write can carry arbitrary type. + k = 0 + while (k < length(move_dests)) { + slot = move_dests[k] + src = move_srcs[k] + src_typ = null + if (is_number(src) && src >= 0) { + if (src < length(write_types) && write_types[src] != null) { + src_typ = write_types[src] + } else if (param_types != null && src < length(param_types) && param_types[src] != null) { + src_typ = param_types[src] + } + } + if (src_typ == null && slot > 0 && slot > nr_args) { + merge_backward(write_types, slot, T_UNKNOWN) + } + k = k + 1 + } + // Closure-written slots can have any type at runtime — mark unknown if (func.closure_written != null) { cw_keys = array(func.closure_written) @@ -976,6 +1124,94 @@ var streamline = function(ir, log) { return null } + // ========================================================= + // Pass: eliminate_unreachable_cfg — nop blocks not reachable + // from function entry under explicit jump control-flow. + // ========================================================= + var eliminate_unreachable_cfg = function(func) { + var instructions = func.instructions + var num_instr = 0 + var disruption_pc = -1 + var label_map = null + var reachable = null + var stack = null + var sp = 0 + var idx = 0 + var tgt = null + var instr = null + var op = null + var nc = 0 + + if (instructions == null || length(instructions) == 0) { + return null + } + + num_instr = length(instructions) + if (is_number(func.disruption_pc)) { + disruption_pc = func.disruption_pc + } + label_map = {} + idx = 0 + while (idx < num_instr) { + instr = instructions[idx] + if (is_text(instr) && !starts_with(instr, "_nop_")) { + label_map[instr] = idx + } + idx = idx + 1 + } + + reachable = array(num_instr, false) + stack = [0] + if (disruption_pc > 0 && disruption_pc < num_instr) { + stack[] = disruption_pc + } + + sp = 0 + while (sp < length(stack)) { + idx = stack[sp] + sp = sp + 1 + + if (idx < 0 || idx >= num_instr || reachable[idx]) { + continue + } + reachable[idx] = true + instr = instructions[idx] + + if (!is_array(instr)) { + stack[] = idx + 1 + continue + } + + op = instr[0] + if (op == "jump") { + tgt = label_map[instr[1]] + if (is_number(tgt)) stack[] = tgt + continue + } + if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") { + tgt = label_map[instr[2]] + if (is_number(tgt)) stack[] = tgt + stack[] = idx + 1 + continue + } + if (op == "return" || op == "disrupt") { + continue + } + stack[] = idx + 1 + } + + idx = 0 + while (idx < num_instr) { + if (!reachable[idx] && is_array(instructions[idx])) { + nc = nc + 1 + instructions[idx] = "_nop_ucfg_" + text(nc) + } + idx = idx + 1 + } + + return null + } + // ========================================================= // Pass: eliminate_dead_jumps — jump to next label → nop // ========================================================= @@ -1590,51 +1826,75 @@ var streamline = function(ir, log) { var param_types = null var write_types = null var slot_types = null + var run_cycle = function(suffix) { + var name = null + name = "infer_param_types" + suffix + run_pass(func, name, function() { + param_types = infer_param_types(func) + return param_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "infer_slot_write_types" + suffix + run_pass(func, name, function() { + write_types = infer_slot_write_types(func, param_types) + return write_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_type_checks" + suffix + run_pass(func, name, function() { + slot_types = eliminate_type_checks(func, param_types, write_types, log) + return slot_types + }) + if (verify_fn) verify_fn(func, "after " + name) + + if (log != null && log.type_deltas != null && slot_types != null) { + log.type_deltas[] = { + fn: func.name, + cycle: suffix == "" ? 1 : 2, + param_types: param_types, + slot_types: slot_types + } + } + + name = "simplify_algebra" + suffix + run_pass(func, name, function() { + return simplify_algebra(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "simplify_booleans" + suffix + run_pass(func, name, function() { + return simplify_booleans(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_moves" + suffix + run_pass(func, name, function() { + return eliminate_moves(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_unreachable" + suffix + run_pass(func, name, function() { + return eliminate_unreachable(func) + }) + if (verify_fn) verify_fn(func, "after " + name) + + name = "eliminate_dead_jumps" + suffix + run_pass(func, name, function() { + return eliminate_dead_jumps(func, log) + }) + if (verify_fn) verify_fn(func, "after " + name) + return null + } + if (func.instructions == null || length(func.instructions) == 0) { return null } - run_pass(func, "infer_param_types", function() { - param_types = infer_param_types(func) - return param_types - }) - if (verify_fn) verify_fn(func, "after infer_param_types") - run_pass(func, "infer_slot_write_types", function() { - write_types = infer_slot_write_types(func) - return write_types - }) - if (verify_fn) verify_fn(func, "after infer_slot_write_types") - run_pass(func, "eliminate_type_checks", function() { - slot_types = eliminate_type_checks(func, param_types, write_types, log) - return slot_types - }) - if (verify_fn) verify_fn(func, "after eliminate_type_checks") - if (log != null && log.type_deltas != null && slot_types != null) { - log.type_deltas[] = { - fn: func.name, - param_types: param_types, - slot_types: slot_types - } - } - run_pass(func, "simplify_algebra", function() { - return simplify_algebra(func, log) - }) - if (verify_fn) verify_fn(func, "after simplify_algebra") - run_pass(func, "simplify_booleans", function() { - return simplify_booleans(func, log) - }) - if (verify_fn) verify_fn(func, "after simplify_booleans") - run_pass(func, "eliminate_moves", function() { - return eliminate_moves(func, log) - }) - if (verify_fn) verify_fn(func, "after eliminate_moves") - run_pass(func, "eliminate_unreachable", function() { - return eliminate_unreachable(func) - }) - if (verify_fn) verify_fn(func, "after eliminate_unreachable") - run_pass(func, "eliminate_dead_jumps", function() { - return eliminate_dead_jumps(func, log) - }) - if (verify_fn) verify_fn(func, "after eliminate_dead_jumps") + + run_cycle("") return null }