better streamline

2026-02-18 20:56:15 -06:00
parent 621da78de9
commit a05d0e2525
3 changed files with 387 additions and 49 deletions
--- a/build.cm
+++ b/build.cm
@@ -81,7 +81,7 @@ function content_hash(str) {
 }

 // Bump when native codegen/runtime ABI changes so stale dylibs are not reused.
-def NATIVE_CACHE_VERSION = "native-v16"
+def NATIVE_CACHE_VERSION = "native-v22"

 // Enable AOT ASan by creating .cell/asan_aot in the package root.
 function native_sanitize_flags() {
--- a/qbe_emit.cm
+++ b/qbe_emit.cm
@@ -1130,6 +1130,15 @@ var qbe_emit = function(ir, qbe, export_name) {
    var truthy = null
    var lhs_d = null
    var rhs_d = null
+    var peek1 = null
+    var peek2 = null
+    var peek3 = null
+    var peek4 = null
+    var peek5 = null
+    var floor_frame_slot = 0
+    var floor_this_slot = 0
+    var floor_arg_slot = 0
+    var floor_dest_slot = 0

    // Pre-scan: count invoke/tail_invoke points to assign segment numbers.
    // Must skip dead code (instructions after terminators) the same way
@@ -1141,15 +1150,43 @@ var qbe_emit = function(ir, qbe, export_name) {
      scan = instrs[si]
      si = si + 1
      if (is_text(scan)) {
+        // Skip optimizer nop pseudo-labels entirely.
+        if (starts_with(scan, "_nop_")) continue
        label_pos[sanitize(scan)] = si - 1
-        // Labels reset dead code state (unless they're nop pseudo-labels)
-        if (!starts_with(scan, "_nop_ur_") && !starts_with(scan, "_nop_tc_"))
-          scan_dead = false
+        // Real labels reset dead code state.
+        scan_dead = false
        continue
      }
      if (scan_dead) continue
      if (!is_array(scan)) continue
      scan_op = scan[0]
+
+      // Keep invoke segment counting consistent with main-loop peephole:
+      // inline floor intrinsic call sequence does not emit an invoke.
+      if (false && scan_op == "access" && is_object(scan[2]) && scan[2].make == "intrinsic" && scan[2].name == "floor") {
+        if (si + 4 < length(instrs)) {
+          peek1 = instrs[si]
+          peek2 = instrs[si + 1]
+          peek3 = instrs[si + 2]
+          peek4 = instrs[si + 3]
+          peek5 = instrs[si + 4]
+          if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == scan[1] && peek1[3] == 1 &&
+              is_array(peek2) && peek2[0] == "null" &&
+              is_array(peek3) && peek3[0] == "setarg" &&
+              is_array(peek4) && peek4[0] == "setarg" &&
+              is_array(peek5) && peek5[0] == "invoke") {
+            floor_frame_slot = peek1[1]
+            floor_this_slot = peek2[1]
+            if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot &&
+                peek4[1] == floor_frame_slot && peek4[2] == 1 &&
+                peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) {
+              si = si + 5
+              continue
+            }
+          }
+        }
+      }
+
      if (scan_op == "invoke") {
        invoke_count = invoke_count + 1
      }
@@ -1427,7 +1464,7 @@ var qbe_emit = function(ir, qbe, export_name) {

      // Labels are plain strings; skip nop pseudo-labels from streamline
      if (is_text(instr)) {
-        if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue
+        if (starts_with(instr, "_nop_")) continue
        lbl = sanitize(instr)
        if (!last_was_term) {
          emit(`  jmp @${lbl}`)
@@ -1446,6 +1483,47 @@ var qbe_emit = function(ir, qbe, export_name) {
      a3 = instr[3]
      last_was_term = false

+      // Peephole: inline `floor(x)` intrinsic call sequence
+      // access floor; frame; null this; setarg 0 this; setarg 1 x; invoke
+      if (false && op == "access" && is_object(a2) && a2.make == "intrinsic" && a2.name == "floor") {
+        if (instr_idx + 5 < length(instrs)) {
+          peek1 = instrs[instr_idx + 1]
+          peek2 = instrs[instr_idx + 2]
+          peek3 = instrs[instr_idx + 3]
+          peek4 = instrs[instr_idx + 4]
+          peek5 = instrs[instr_idx + 5]
+          if (is_array(peek1) && peek1[0] == "frame" && peek1[2] == a1 && peek1[3] == 1 &&
+              is_array(peek2) && peek2[0] == "null" &&
+              is_array(peek3) && peek3[0] == "setarg" &&
+              is_array(peek4) && peek4[0] == "setarg" &&
+              is_array(peek5) && peek5[0] == "invoke") {
+            floor_frame_slot = peek1[1]
+            floor_this_slot = peek2[1]
+            if (peek3[1] == floor_frame_slot && peek3[2] == 0 && peek3[3] == floor_this_slot &&
+                peek4[1] == floor_frame_slot && peek4[2] == 1 &&
+                peek5[1] == floor_frame_slot && peek5[2] == floor_this_slot) {
+              floor_arg_slot = peek4[3]
+              floor_dest_slot = peek5[2]
+              v = s_read(floor_arg_slot)
+              p = fresh()
+              emit(`  %${p}_is_num =w copy ${emit_is_num_w(v)}`)
+              emit(`  jnz %${p}_is_num, @${p}_ok, @${p}_bad`)
+              emit(`@${p}_bad`)
+              s_write(floor_dest_slot, text(qbe.js_null))
+              emit(`  jmp @${p}_done`)
+              emit(`@${p}_ok`)
+              lhs_d = emit_num_to_double(v)
+              emit(`  %${p}_fd =d call $floor(d ${lhs_d})`)
+              emit(`  %${p}_r =l call $qbe_new_float64(l %ctx, d %${p}_fd)`)
+              s_write(floor_dest_slot, `%${p}_r`)
+              emit(`@${p}_done`)
+              i = instr_idx + 6
+              continue
+            }
+          }
+        }
+      }
+
      // --- Constants ---

      if (op == "int") {
--- a/streamline.cm
+++ b/streamline.cm
@@ -250,8 +250,13 @@ var streamline = function(ir, log) {
    var param_types = null
    var i = 0
    var j = 0
+    var iter = 0
    var instr = null
    var bt = null
+    var src = 0
+    var dst = 0
+    var old_bt = null
+    var changed = false
    var rule = null

    if (instructions == null || nr_args == 0) {
@@ -275,6 +280,31 @@ var streamline = function(ir, log) {
      i = i + 1
    }

+    // Propagate typed constraints backward through move chains.
+    changed = true
+    iter = 0
+    while (changed && iter < num_instr + 4) {
+      changed = false
+      i = 0
+      while (i < num_instr) {
+        instr = instructions[i]
+        if (is_array(instr) && instr[0] == "move") {
+          dst = instr[1]
+          src = instr[2]
+          bt = backward_types[dst]
+          if (bt != null && bt != T_UNKNOWN) {
+            old_bt = backward_types[src]
+            merge_backward(backward_types, src, bt)
+            if (backward_types[src] != old_bt) {
+              changed = true
+            }
+          }
+        }
+        i = i + 1
+      }
+      iter = iter + 1
+    }
+
    param_types = array(func.nr_slots)
    j = 1
    while (j <= nr_args) {
@@ -325,16 +355,35 @@ var streamline = function(ir, log) {
    is_record: [1, T_BOOL], is_stone: [1, T_BOOL]
  }

-  var infer_slot_write_types = function(func) {
+  // Known intrinsic return types for invoke result inference.
+  var intrinsic_return_types = {
+    abs: T_NUM, floor: T_NUM, ceiling: T_NUM,
+    round: T_NUM, trunc: T_NUM, fraction: T_NUM,
+    integer: T_NUM, sign: T_NUM,
+    max: T_NUM, min: T_NUM
+  }
+
+  var infer_slot_write_types = function(func, param_types) {
    var instructions = func.instructions
    var nr_args = func.nr_args != null ? func.nr_args : 0
    var num_instr = 0
    var write_types = null
+    var frame_callee = null
+    var intrinsic_slots = null
+    var move_dests = null
+    var move_srcs = null
    var i = 0
    var k = 0
+    var iter = 0
    var instr = null
+    var op = null
+    var src = 0
    var slot = 0
+    var old_typ = null
+    var src_typ = null
    var typ = null
+    var callee_slot = null
+    var changed = false
    var rule = null
    var cw_keys = null

@@ -344,11 +393,62 @@ var streamline = function(ir, log) {

    num_instr = length(instructions)
    write_types = array(func.nr_slots)
+    frame_callee = array(func.nr_slots)
+    intrinsic_slots = array(func.nr_slots)
+    move_dests = []
+    move_srcs = []
    i = 0
    while (i < num_instr) {
      instr = instructions[i]
      if (is_array(instr)) {
-        rule = write_rules[instr[0]]
+        op = instr[0]
+        if (op == "access") {
+          slot = instr[1]
+          if (slot > 0 && slot > nr_args) {
+            merge_backward(write_types, slot, access_value_type(instr[2]))
+          }
+          if (is_object(instr[2]) && instr[2].make == "intrinsic") {
+            typ = intrinsic_return_types[instr[2].name]
+            if (typ != null && slot >= 0 && slot < length(intrinsic_slots)) {
+              intrinsic_slots[slot] = typ
+            }
+          }
+          i = i + 1
+          continue
+        }
+        if (op == "move") {
+          slot = instr[1]
+          if (slot > 0 && slot > nr_args) {
+            move_dests[] = slot
+            move_srcs[] = instr[2]
+          }
+          i = i + 1
+          continue
+        }
+        if (op == "frame" || op == "goframe") {
+          if (is_number(instr[1]) && instr[1] >= 0 && instr[1] < length(frame_callee)) {
+            frame_callee[instr[1]] = instr[2]
+          }
+          i = i + 1
+          continue
+        }
+        if (op == "invoke" || op == "tail_invoke") {
+          slot = instr[2]
+          typ = T_UNKNOWN
+          callee_slot = frame_callee[instr[1]]
+          if (is_number(callee_slot) && callee_slot >= 0 && callee_slot < length(intrinsic_slots)) {
+            if (intrinsic_slots[callee_slot] != null) {
+              typ = intrinsic_slots[callee_slot]
+            }
+          }
+          if (slot > 0 && slot > nr_args) {
+            merge_backward(write_types, slot, typ)
+          }
+          i = i + 1
+          continue
+        }
+
+        rule = write_rules[op]
        if (rule != null) {
          slot = instr[rule[0]]
          typ = rule[1]
@@ -363,6 +463,54 @@ var streamline = function(ir, log) {
      i = i + 1
    }

+    // Resolve move writes from known source invariants (fixed-point).
+    changed = true
+    iter = 0
+    while (changed && iter < length(write_types) + 4) {
+      changed = false
+      k = 0
+      while (k < length(move_dests)) {
+        slot = move_dests[k]
+        src = move_srcs[k]
+        src_typ = null
+        if (is_number(src) && src >= 0) {
+          if (src < length(write_types) && write_types[src] != null) {
+            src_typ = write_types[src]
+          } else if (param_types != null && src < length(param_types) && param_types[src] != null) {
+            src_typ = param_types[src]
+          }
+        }
+        if (src_typ != null) {
+          old_typ = write_types[slot]
+          merge_backward(write_types, slot, src_typ)
+          if (write_types[slot] != old_typ) {
+            changed = true
+          }
+        }
+        k = k + 1
+      }
+      iter = iter + 1
+    }
+
+    // Any remaining unresolved move write can carry arbitrary type.
+    k = 0
+    while (k < length(move_dests)) {
+      slot = move_dests[k]
+      src = move_srcs[k]
+      src_typ = null
+      if (is_number(src) && src >= 0) {
+        if (src < length(write_types) && write_types[src] != null) {
+          src_typ = write_types[src]
+        } else if (param_types != null && src < length(param_types) && param_types[src] != null) {
+          src_typ = param_types[src]
+        }
+      }
+      if (src_typ == null && slot > 0 && slot > nr_args) {
+        merge_backward(write_types, slot, T_UNKNOWN)
+      }
+      k = k + 1
+    }
+
    // Closure-written slots can have any type at runtime — mark unknown
    if (func.closure_written != null) {
      cw_keys = array(func.closure_written)
@@ -976,6 +1124,94 @@ var streamline = function(ir, log) {
    return null
  }

+  // =========================================================
+  // Pass: eliminate_unreachable_cfg — nop blocks not reachable
+  // from function entry under explicit jump control-flow.
+  // =========================================================
+  var eliminate_unreachable_cfg = function(func) {
+    var instructions = func.instructions
+    var num_instr = 0
+    var disruption_pc = -1
+    var label_map = null
+    var reachable = null
+    var stack = null
+    var sp = 0
+    var idx = 0
+    var tgt = null
+    var instr = null
+    var op = null
+    var nc = 0
+
+    if (instructions == null || length(instructions) == 0) {
+      return null
+    }
+
+    num_instr = length(instructions)
+    if (is_number(func.disruption_pc)) {
+      disruption_pc = func.disruption_pc
+    }
+    label_map = {}
+    idx = 0
+    while (idx < num_instr) {
+      instr = instructions[idx]
+      if (is_text(instr) && !starts_with(instr, "_nop_")) {
+        label_map[instr] = idx
+      }
+      idx = idx + 1
+    }
+
+    reachable = array(num_instr, false)
+    stack = [0]
+    if (disruption_pc > 0 && disruption_pc < num_instr) {
+      stack[] = disruption_pc
+    }
+
+    sp = 0
+    while (sp < length(stack)) {
+      idx = stack[sp]
+      sp = sp + 1
+
+      if (idx < 0 || idx >= num_instr || reachable[idx]) {
+        continue
+      }
+      reachable[idx] = true
+      instr = instructions[idx]
+
+      if (!is_array(instr)) {
+        stack[] = idx + 1
+        continue
+      }
+
+      op = instr[0]
+      if (op == "jump") {
+        tgt = label_map[instr[1]]
+        if (is_number(tgt)) stack[] = tgt
+        continue
+      }
+      if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
+        tgt = label_map[instr[2]]
+        if (is_number(tgt)) stack[] = tgt
+        stack[] = idx + 1
+        continue
+      }
+      if (op == "return" || op == "disrupt") {
+        continue
+      }
+      stack[] = idx + 1
+    }
+
+    idx = 0
+    while (idx < num_instr) {
+      if (!reachable[idx] && is_array(instructions[idx])) {
+        nc = nc + 1
+        instructions[idx] = "_nop_ucfg_" + text(nc)
+      }
+      idx = idx + 1
+    }
+
+    return null
+  }
+
  // =========================================================
  // Pass: eliminate_dead_jumps — jump to next label → nop
  // =========================================================
@@ -1590,51 +1826,75 @@ var streamline = function(ir, log) {
    var param_types = null
    var write_types = null
    var slot_types = null
+    var run_cycle = function(suffix) {
+      var name = null
+      name = "infer_param_types" + suffix
+      run_pass(func, name, function() {
+        param_types = infer_param_types(func)
+        return param_types
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "infer_slot_write_types" + suffix
+      run_pass(func, name, function() {
+        write_types = infer_slot_write_types(func, param_types)
+        return write_types
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "eliminate_type_checks" + suffix
+      run_pass(func, name, function() {
+        slot_types = eliminate_type_checks(func, param_types, write_types, log)
+        return slot_types
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      if (log != null && log.type_deltas != null && slot_types != null) {
+        log.type_deltas[] = {
+          fn: func.name,
+          cycle: suffix == "" ? 1 : 2,
+          param_types: param_types,
+          slot_types: slot_types
+        }
+      }
+
+      name = "simplify_algebra" + suffix
+      run_pass(func, name, function() {
+        return simplify_algebra(func, log)
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "simplify_booleans" + suffix
+      run_pass(func, name, function() {
+        return simplify_booleans(func, log)
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "eliminate_moves" + suffix
+      run_pass(func, name, function() {
+        return eliminate_moves(func, log)
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "eliminate_unreachable" + suffix
+      run_pass(func, name, function() {
+        return eliminate_unreachable(func)
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "eliminate_dead_jumps" + suffix
+      run_pass(func, name, function() {
+        return eliminate_dead_jumps(func, log)
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
+      return null
+    }
+
    if (func.instructions == null || length(func.instructions) == 0) {
      return null
    }
-    run_pass(func, "infer_param_types", function() {
-      param_types = infer_param_types(func)
-      return param_types
-    })
-    if (verify_fn) verify_fn(func, "after infer_param_types")
-    run_pass(func, "infer_slot_write_types", function() {
-      write_types = infer_slot_write_types(func)
-      return write_types
-    })
-    if (verify_fn) verify_fn(func, "after infer_slot_write_types")
-    run_pass(func, "eliminate_type_checks", function() {
-      slot_types = eliminate_type_checks(func, param_types, write_types, log)
-      return slot_types
-    })
-    if (verify_fn) verify_fn(func, "after eliminate_type_checks")
-    if (log != null && log.type_deltas != null && slot_types != null) {
-      log.type_deltas[] = {
-        fn: func.name,
-        param_types: param_types,
-        slot_types: slot_types
-      }
-    }
-    run_pass(func, "simplify_algebra", function() {
-      return simplify_algebra(func, log)
-    })
-    if (verify_fn) verify_fn(func, "after simplify_algebra")
-    run_pass(func, "simplify_booleans", function() {
-      return simplify_booleans(func, log)
-    })
-    if (verify_fn) verify_fn(func, "after simplify_booleans")
-    run_pass(func, "eliminate_moves", function() {
-      return eliminate_moves(func, log)
-    })
-    if (verify_fn) verify_fn(func, "after eliminate_moves")
-    run_pass(func, "eliminate_unreachable", function() {
-      return eliminate_unreachable(func)
-    })
-    if (verify_fn) verify_fn(func, "after eliminate_unreachable")
-    run_pass(func, "eliminate_dead_jumps", function() {
-      return eliminate_dead_jumps(func, log)
-    })
-    if (verify_fn) verify_fn(func, "after eliminate_dead_jumps")
+
+    run_cycle("")
    return null
  }