better streamline

This commit is contained in:
2026-02-18 20:56:15 -06:00
parent 621da78de9
commit a05d0e2525
3 changed files with 387 additions and 49 deletions

View File

@@ -81,7 +81,7 @@ function content_hash(str) {
}
// Bump when native codegen/runtime ABI changes so stale dylibs are not reused.
def NATIVE_CACHE_VERSION = "native-v16"
def NATIVE_CACHE_VERSION = "native-v22"
// Enable AOT ASan by creating .cell/asan_aot in the package root.
function native_sanitize_flags() {

View File

@@ -1130,6 +1130,15 @@ var qbe_emit = function(ir, qbe, export_name) {
var truthy = null
var lhs_d = null
var rhs_d = null
var peek1 = null
var peek2 = null
var peek3 = null
var peek4 = null
var peek5 = null
var floor_frame_slot = 0
var floor_this_slot = 0
var floor_arg_slot = 0
var floor_dest_slot = 0
// Pre-scan: count invoke/tail_invoke points to assign segment numbers.
// Must skip dead code (instructions after terminators) the same way
@@ -1141,15 +1150,43 @@ var qbe_emit = function(ir, qbe, export_name) {
scan = instrs[si]
si = si + 1
if (is_text(scan)) {
// Skip optimizer nop pseudo-labels entirely.
if (starts_with(scan, "_nop_")) continue
label_pos[sanitize(scan)] = si - 1
// Labels reset dead code state (unless they're nop pseudo-labels)
if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_"))
scan_dead = false
// Real labels reset dead code state.
scan_dead = false
continue
}
if (scan_dead) continue
if (!is_array(scan)) continue
scan_op = scan[0]
// Keep invoke segment counting consistent with main-loop peephole:
// inline floor intrinsic call sequence does not emit an invoke.
if (false && scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "floor") {
if (si + 4 < length(instrs)) {
peek1 = instrs[si]
peek2 = instrs[si + 1]
peek3 = instrs[si + 2]
peek4 = instrs[si + 3]
peek5 = instrs[si + 4]
if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 &&
is_array(peek2) && peek2[0] == "null" &&
is_array(peek3) && peek3[0] == "setarg" &&
is_array(peek4) && peek4[0] == "setarg" &&
is_array(peek5) && peek5[0] == "invoke") {
floor_frame_slot = peek1[1]
floor_this_slot = peek2[1]
if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot &&
peek4[1] == floor_frame_slot && peek4[2] == 1 &&
peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) {
si = si + 5
continue
}
}
}
}
if (scan_op == "invoke") {
invoke_count = invoke_count + 1
}
@@ -1427,7 +1464,7 @@ var qbe_emit = function(ir, qbe, export_name) {
// Labels are plain strings; skip nop pseudo-labels from streamline
if (is_text(instr)) {
if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue
if (starts_with(instr, "_nop_")) continue
lbl = sanitize(instr)
if (!last_was_term) {
emit(` jmp @${lbl}`)
@@ -1446,6 +1483,47 @@ var qbe_emit = function(ir, qbe, export_name) {
a3 = instr[3]
last_was_term = false
// Peephole: inline `floor(x)` intrinsic call sequence
// access floor; frame; null this; setarg 0 this; setarg 1 x; invoke
if (false && op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "floor") {
if (instr_idx + 5 < length(instrs)) {
peek1 = instrs[instr_idx + 1]
peek2 = instrs[instr_idx + 2]
peek3 = instrs[instr_idx + 3]
peek4 = instrs[instr_idx + 4]
peek5 = instrs[instr_idx + 5]
if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 &&
is_array(peek2) && peek2[0] == "null" &&
is_array(peek3) && peek3[0] == "setarg" &&
is_array(peek4) && peek4[0] == "setarg" &&
is_array(peek5) && peek5[0] == "invoke") {
floor_frame_slot = peek1[1]
floor_this_slot = peek2[1]
if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot &&
peek4[1] == floor_frame_slot && peek4[2] == 1 &&
peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) {
floor_arg_slot = peek4[3]
floor_dest_slot = peek5[2]
v = s_read(floor_arg_slot)
p = fresh()
emit(` %${p}_is_num =w copy ${emit_is_num_w(v)}`)
emit(` jnz %${p}_is_num, @${p}_ok, @${p}_bad`)
emit(`@${p}_bad`)
s_write(floor_dest_slot, text(qbe.js_null))
emit(` jmp @${p}_done`)
emit(`@${p}_ok`)
lhs_d = emit_num_to_double(v)
emit(` %${p}_fd =d call $floor(d ${lhs_d})`)
emit(` %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_fd)`)
s_write(floor_dest_slot, `%${p}_r`)
emit(`@${p}_done`)
i = instr_idx + 6
continue
}
}
}
}
// --- Constants ---
if (op == "int") {

View File

@@ -250,8 +250,13 @@ var streamline = function(ir, log) {
var param_types = null
var i = 0
var j = 0
var iter = 0
var instr = null
var bt = null
var src = 0
var dst = 0
var old_bt = null
var changed = false
var rule = null
if (instructions == null || nr_args == 0) {
@@ -275,6 +280,31 @@ var streamline = function(ir, log) {
i = i + 1
}
// Propagate typed constraints backward through move chains.
changed = true
iter = 0
while (changed && iter < num_instr + 4) {
changed = false
i = 0
while (i < num_instr) {
instr = instructions[i]
if (is_array(instr) && instr[0] == "move") {
dst = instr[1]
src = instr[2]
bt = backward_types[dst]
if (bt != null && bt != T_UNKNOWN) {
old_bt = backward_types[src]
merge_backward(backward_types, src, bt)
if (backward_types[src] != old_bt) {
changed = true
}
}
}
i = i + 1
}
iter = iter + 1
}
param_types = array(func.nr_slots)
j = 1
while (j <= nr_args) {
@@ -325,16 +355,35 @@ var streamline = function(ir, log) {
is_record: [1, T_BOOL], is_stone: [1, T_BOOL]
}
var infer_slot_write_types = function(func) {
// Known intrinsic return types for invoke result inference.
var intrinsic_return_types = {
abs: T_NUM, floor: T_NUM, ceiling: T_NUM,
round: T_NUM, trunc: T_NUM, fraction: T_NUM,
integer: T_NUM, sign: T_NUM,
max: T_NUM, min: T_NUM
}
var infer_slot_write_types = function(func, param_types) {
var instructions = func.instructions
var nr_args = func.nr_args != null ? func.nr_args : 0
var num_instr = 0
var write_types = null
var frame_callee = null
var intrinsic_slots = null
var move_dests = null
var move_srcs = null
var i = 0
var k = 0
var iter = 0
var instr = null
var op = null
var src = 0
var slot = 0
var old_typ = null
var src_typ = null
var typ = null
var callee_slot = null
var changed = false
var rule = null
var cw_keys = null
@@ -344,11 +393,62 @@ var streamline = function(ir, log) {
num_instr = length(instructions)
write_types = array(func.nr_slots)
frame_callee = array(func.nr_slots)
intrinsic_slots = array(func.nr_slots)
move_dests = []
move_srcs = []
i = 0
while (i < num_instr) {
instr = instructions[i]
if (is_array(instr)) {
rule = write_rules[instr[0]]
op = instr[0]
if (op == "access") {
slot = instr[1]
if (slot > 0 && slot > nr_args) {
merge_backward(write_types, slot, access_value_type(instr[2]))
}
if (is_object(instr[2]) && instr[2].make == "intrinsic") {
typ = intrinsic_return_types[instr[2].name]
if (typ != null && slot >= 0 && slot < length(intrinsic_slots)) {
intrinsic_slots[slot] = typ
}
}
i = i + 1
continue
}
if (op == "move") {
slot = instr[1]
if (slot > 0 && slot > nr_args) {
move_dests[] = slot
move_srcs[] = instr[2]
}
i = i + 1
continue
}
if (op == "frame" || op == "goframe") {
if (is_number(instr[1]) && instr[1] >= 0 && instr[1] < length(frame_callee)) {
frame_callee[instr[1]] = instr[2]
}
i = i + 1
continue
}
if (op == "invoke" || op == "tail_invoke") {
slot = instr[2]
typ = T_UNKNOWN
callee_slot = frame_callee[instr[1]]
if (is_number(callee_slot) && callee_slot >= 0 && callee_slot < length(intrinsic_slots)) {
if (intrinsic_slots[callee_slot] != null) {
typ = intrinsic_slots[callee_slot]
}
}
if (slot > 0 && slot > nr_args) {
merge_backward(write_types, slot, typ)
}
i = i + 1
continue
}
rule = write_rules[op]
if (rule != null) {
slot = instr[rule[0]]
typ = rule[1]
@@ -363,6 +463,54 @@ var streamline = function(ir, log) {
i = i + 1
}
// Resolve move writes from known source invariants (fixed-point).
changed = true
iter = 0
while (changed && iter < length(write_types) + 4) {
changed = false
k = 0
while (k < length(move_dests)) {
slot = move_dests[k]
src = move_srcs[k]
src_typ = null
if (is_number(src) && src >= 0) {
if (src < length(write_types) && write_types[src] != null) {
src_typ = write_types[src]
} else if (param_types != null && src < length(param_types) && param_types[src] != null) {
src_typ = param_types[src]
}
}
if (src_typ != null) {
old_typ = write_types[slot]
merge_backward(write_types, slot, src_typ)
if (write_types[slot] != old_typ) {
changed = true
}
}
k = k + 1
}
iter = iter + 1
}
// Any remaining unresolved move write can carry arbitrary type.
k = 0
while (k < length(move_dests)) {
slot = move_dests[k]
src = move_srcs[k]
src_typ = null
if (is_number(src) && src >= 0) {
if (src < length(write_types) && write_types[src] != null) {
src_typ = write_types[src]
} else if (param_types != null && src < length(param_types) && param_types[src] != null) {
src_typ = param_types[src]
}
}
if (src_typ == null && slot > 0 && slot > nr_args) {
merge_backward(write_types, slot, T_UNKNOWN)
}
k = k + 1
}
// Closure-written slots can have any type at runtime — mark unknown
if (func.closure_written != null) {
cw_keys = array(func.closure_written)
@@ -976,6 +1124,94 @@ var streamline = function(ir, log) {
return null
}
// =========================================================
// Pass: eliminate_unreachable_cfg — nop blocks not reachable
// from function entry under explicit jump control-flow.
// =========================================================
var eliminate_unreachable_cfg = function(func) {
var instructions = func.instructions
var num_instr = 0
var disruption_pc = -1
var label_map = null
var reachable = null
var stack = null
var sp = 0
var idx = 0
var tgt = null
var instr = null
var op = null
var nc = 0
if (instructions == null || length(instructions) == 0) {
return null
}
num_instr = length(instructions)
if (is_number(func.disruption_pc)) {
disruption_pc = func.disruption_pc
}
label_map = {}
idx = 0
while (idx < num_instr) {
instr = instructions[idx]
if (is_text(instr) && !starts_with(instr, "_nop_")) {
label_map[instr] = idx
}
idx = idx + 1
}
reachable = array(num_instr, false)
stack = [0]
if (disruption_pc > 0 && disruption_pc < num_instr) {
stack[] = disruption_pc
}
sp = 0
while (sp < length(stack)) {
idx = stack[sp]
sp = sp + 1
if (idx < 0 || idx >= num_instr || reachable[idx]) {
continue
}
reachable[idx] = true
instr = instructions[idx]
if (!is_array(instr)) {
stack[] = idx + 1
continue
}
op = instr[0]
if (op == "jump") {
tgt = label_map[instr[1]]
if (is_number(tgt)) stack[] = tgt
continue
}
if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
tgt = label_map[instr[2]]
if (is_number(tgt)) stack[] = tgt
stack[] = idx + 1
continue
}
if (op == "return" || op == "disrupt") {
continue
}
stack[] = idx + 1
}
idx = 0
while (idx < num_instr) {
if (!reachable[idx] && is_array(instructions[idx])) {
nc = nc + 1
instructions[idx] = "_nop_ucfg_" + text(nc)
}
idx = idx + 1
}
return null
}
// =========================================================
// Pass: eliminate_dead_jumps — jump to next label → nop
// =========================================================
@@ -1590,51 +1826,75 @@ var streamline = function(ir, log) {
var param_types = null
var write_types = null
var slot_types = null
var run_cycle = function(suffix) {
var name = null
name = "infer_param_types" + suffix
run_pass(func, name, function() {
param_types = infer_param_types(func)
return param_types
})
if (verify_fn) verify_fn(func, "after " + name)
name = "infer_slot_write_types" + suffix
run_pass(func, name, function() {
write_types = infer_slot_write_types(func, param_types)
return write_types
})
if (verify_fn) verify_fn(func, "after " + name)
name = "eliminate_type_checks" + suffix
run_pass(func, name, function() {
slot_types = eliminate_type_checks(func, param_types, write_types, log)
return slot_types
})
if (verify_fn) verify_fn(func, "after " + name)
if (log != null && log.type_deltas != null && slot_types != null) {
log.type_deltas[] = {
fn: func.name,
cycle: suffix == "" ? 1 : 2,
param_types: param_types,
slot_types: slot_types
}
}
name = "simplify_algebra" + suffix
run_pass(func, name, function() {
return simplify_algebra(func, log)
})
if (verify_fn) verify_fn(func, "after " + name)
name = "simplify_booleans" + suffix
run_pass(func, name, function() {
return simplify_booleans(func, log)
})
if (verify_fn) verify_fn(func, "after " + name)
name = "eliminate_moves" + suffix
run_pass(func, name, function() {
return eliminate_moves(func, log)
})
if (verify_fn) verify_fn(func, "after " + name)
name = "eliminate_unreachable" + suffix
run_pass(func, name, function() {
return eliminate_unreachable(func)
})
if (verify_fn) verify_fn(func, "after " + name)
name = "eliminate_dead_jumps" + suffix
run_pass(func, name, function() {
return eliminate_dead_jumps(func, log)
})
if (verify_fn) verify_fn(func, "after " + name)
return null
}
if (func.instructions == null || length(func.instructions) == 0) {
return null
}
run_pass(func, "infer_param_types", function() {
param_types = infer_param_types(func)
return param_types
})
if (verify_fn) verify_fn(func, "after infer_param_types")
run_pass(func, "infer_slot_write_types", function() {
write_types = infer_slot_write_types(func)
return write_types
})
if (verify_fn) verify_fn(func, "after infer_slot_write_types")
run_pass(func, "eliminate_type_checks", function() {
slot_types = eliminate_type_checks(func, param_types, write_types, log)
return slot_types
})
if (verify_fn) verify_fn(func, "after eliminate_type_checks")
if (log != null && log.type_deltas != null && slot_types != null) {
log.type_deltas[] = {
fn: func.name,
param_types: param_types,
slot_types: slot_types
}
}
run_pass(func, "simplify_algebra", function() {
return simplify_algebra(func, log)
})
if (verify_fn) verify_fn(func, "after simplify_algebra")
run_pass(func, "simplify_booleans", function() {
return simplify_booleans(func, log)
})
if (verify_fn) verify_fn(func, "after simplify_booleans")
run_pass(func, "eliminate_moves", function() {
return eliminate_moves(func, log)
})
if (verify_fn) verify_fn(func, "after eliminate_moves")
run_pass(func, "eliminate_unreachable", function() {
return eliminate_unreachable(func)
})
if (verify_fn) verify_fn(func, "after eliminate_unreachable")
run_pass(func, "eliminate_dead_jumps", function() {
return eliminate_dead_jumps(func, log)
})
if (verify_fn) verify_fn(func, "after eliminate_dead_jumps")
run_cycle("")
return null
}