Merge branch 'optimize_mcode'

2026-02-21 03:38:34 -06:00
parent eadad194be fea76ecac5
commit d041c49972
23 changed files with 51074 additions and 121790 deletions
--- a/boot/bootstrap.cm.mcode
+++ b/boot/bootstrap.cm.mcode
--- a/boot/fold.cm.mcode
+++ b/boot/fold.cm.mcode
--- a/boot/mcode.cm.mcode
+++ b/boot/mcode.cm.mcode
--- a/boot/parse.cm.mcode
+++ b/boot/parse.cm.mcode
--- a/boot/streamline.cm.mcode
+++ b/boot/streamline.cm.mcode
--- a/boot/tokenize.cm.mcode
+++ b/boot/tokenize.cm.mcode
--- a/boot_miscompile_bad.cm
+++ b/boot_miscompile_bad.cm
@@ -0,0 +1,74 @@
+// boot_miscompile_bad.cm — Documents a boot compiler miscompilation bug.
+//
+// BUG SUMMARY:
+// The boot compiler's optimizer (likely compress_slots, eliminate_moves,
+// or infer_param_types) miscompiles a specific pattern when it appears
+// inside streamline.cm. The pattern: an array-loaded value used as a
+// dynamic index for another array store, inside a guarded block:
+//
+//   sv = instr[j]
+//   if (is_number(sv) && sv >= 0 && sv < nr_slots) {
+//     last_ref[sv] = i    // <-- miscompiled: sv reads wrong slot
+//   }
+//
+// The bug is CONTEXT-DEPENDENT on streamline.cm's exact function/closure
+// structure. A standalone module with the same pattern does NOT trigger it.
+// The boot optimizer's cross-function analysis (infer_param_types, type
+// propagation, etc.) makes different decisions in the full streamline.cm
+// context, leading to the miscompilation.
+//
+// SYMPTOMS:
+// - 'log' is not defined  (comparison error path fires on non-comparable values)
+// - array index must be a number  (store_dynamic with corrupted index)
+// - Error line has NO reference to 'log' — the reference comes from the
+//   error-reporting code path of the < operator
+// - Non-deterministic: different error messages on different runs
+// - NOT a GC bug: persists with --heap 4GB
+// - NOT slot overflow: function has only 85 raw slots
+//
+// TO REPRODUCE:
+// In streamline.cm, replace the build_slot_liveness function body with
+// this version (raw operand scanning instead of get_slot_refs):
+//
+//   var build_slot_liveness = function(instructions, nr_slots) {
+//     var last_ref = array(nr_slots, -1)
+//     var n = length(instructions)
+//     var i = 0
+//     var j = 0
+//     var limit = 0
+//     var sv = 0
+//     var instr = null
+//
+//     while (i < n) {
+//       instr = instructions[i]
+//       if (is_array(instr)) {
+//         j = 1
+//         limit = length(instr) - 2
+//         while (j < limit) {
+//           sv = instr[j]
+//           if (is_number(sv) && sv >= 0 && sv < nr_slots) {
+//             last_ref[sv] = i
+//           }
+//           j = j + 1
+//         }
+//       }
+//       i = i + 1
+//     }
+//     return last_ref
+//   }
+//
+// Then: rm -rf .cell/build && ./cell --dev vm_suite
+//
+// WORKAROUND:
+// Use get_slot_refs(instr) to iterate only over known slot-reference
+// positions. This produces different IR that the boot optimizer handles
+// correctly, and is also more semantically correct.
+//
+// FIXING:
+// To find the root cause, compare the boot-compiled bytecodes of
+// build_slot_liveness (in the full streamline.cm context) vs the
+// source-compiled bytecodes. Use disasm.ce with --optimized to see
+// what the source compiler produces. The boot-compiled bytecodes
+// would need a C-level MachCode dump to inspect.
+
+return null
--- a/cfg.ce
+++ b/cfg.ce
@@ -0,0 +1,456 @@
+// cfg.ce — control flow graph
+//
+// Usage:
+//   cell cfg --fn <N|name> <file>         Text CFG for function
+//   cell cfg --dot --fn <N|name> <file>   DOT output for graphviz
+//   cell cfg <file>                       Text CFG for all functions
+
+var shop = use("internal/shop")
+
+var pad_right = function(s, w) {
+  var r = s
+  while (length(r) < w) {
+    r = r + " "
+  }
+  return r
+}
+
+var fmt_val = function(v) {
+  if (is_null(v)) return "null"
+  if (is_number(v)) return text(v)
+  if (is_text(v)) return `"${v}"`
+  if (is_object(v)) return text(v)
+  if (is_logical(v)) return v ? "true" : "false"
+  return text(v)
+}
+
+var is_jump_op = function(op) {
+  return op == "jump" || op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null"
+}
+
+var is_conditional_jump = function(op) {
+  return op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null"
+}
+
+var is_terminator = function(op) {
+  return op == "return" || op == "disrupt" || op == "tail_invoke" || op == "goinvoke"
+}
+
+var run = function() {
+  var filename = null
+  var fn_filter = null
+  var show_dot = false
+  var use_optimized = false
+  var i = 0
+  var compiled = null
+  var main_name = null
+  var fi = 0
+  var func = null
+  var fname = null
+
+  while (i < length(args)) {
+    if (args[i] == '--fn') {
+      i = i + 1
+      fn_filter = args[i]
+    } else if (args[i] == '--dot') {
+      show_dot = true
+    } else if (args[i] == '--optimized') {
+      use_optimized = true
+    } else if (args[i] == '--help' || args[i] == '-h') {
+      log.console("Usage: cell cfg [--fn <N|name>] [--dot] [--optimized] <file>")
+      log.console("")
+      log.console("  --fn <N|name>   Filter to function by index or name")
+      log.console("  --dot           Output DOT format for graphviz")
+      log.console("  --optimized     Use optimized IR")
+      return null
+    } else if (!starts_with(args[i], '-')) {
+      filename = args[i]
+    }
+    i = i + 1
+  }
+
+  if (!filename) {
+    log.console("Usage: cell cfg [--fn <N|name>] [--dot] [--optimized] <file>")
+    return null
+  }
+
+  if (use_optimized) {
+    compiled = shop.compile_file(filename)
+  } else {
+    compiled = shop.mcode_file(filename)
+  }
+
+  var fn_matches = function(index, name) {
+    var match = null
+    if (fn_filter == null) return true
+    if (index >= 0 && fn_filter == text(index)) return true
+    if (name != null) {
+      match = search(name, fn_filter)
+      if (match != null && match >= 0) return true
+    }
+    return false
+  }
+
+  var build_cfg = function(func) {
+    var instrs = func.instructions
+    var blocks = []
+    var label_to_block = {}
+    var pc_to_block = {}
+    var label_to_pc = {}
+    var block_start_pcs = {}
+    var after_terminator = false
+    var current_block = null
+    var current_label = null
+    var pc = 0
+    var ii = 0
+    var bi = 0
+    var instr = null
+    var op = null
+    var n = 0
+    var line_num = null
+    var blk = null
+    var last_instr_data = null
+    var last_op = null
+    var target_label = null
+    var target_bi = null
+    var edge_type = null
+
+    if (instrs == null || length(instrs) == 0) return []
+
+    // Pass 1: identify block start PCs
+    block_start_pcs["0"] = true
+    pc = 0
+    ii = 0
+    while (ii < length(instrs)) {
+      instr = instrs[ii]
+      if (is_array(instr)) {
+        op = instr[0]
+        if (after_terminator) {
+          block_start_pcs[text(pc)] = true
+          after_terminator = false
+        }
+        if (is_jump_op(op) || is_terminator(op)) {
+          after_terminator = true
+        }
+        pc = pc + 1
+      }
+      ii = ii + 1
+    }
+
+    // Pass 2: map labels to PCs and mark as block starts
+    pc = 0
+    ii = 0
+    while (ii < length(instrs)) {
+      instr = instrs[ii]
+      if (is_text(instr) && !starts_with(instr, "_nop_")) {
+        label_to_pc[instr] = pc
+        block_start_pcs[text(pc)] = true
+      } else if (is_array(instr)) {
+        pc = pc + 1
+      }
+      ii = ii + 1
+    }
+
+    // Pass 3: build basic blocks
+    pc = 0
+    ii = 0
+    current_label = null
+    while (ii < length(instrs)) {
+      instr = instrs[ii]
+      if (is_text(instr)) {
+        if (!starts_with(instr, "_nop_")) {
+          current_label = instr
+        }
+        ii = ii + 1
+        continue
+      }
+
+      if (is_array(instr)) {
+        if (block_start_pcs[text(pc)]) {
+          if (current_block != null) {
+            push(blocks, current_block)
+          }
+          current_block = {
+            id: length(blocks),
+            label: current_label,
+            start_pc: pc,
+            end_pc: pc,
+            instrs: [],
+            edges: [],
+            first_line: null,
+            last_line: null
+          }
+          current_label = null
+        }
+
+        if (current_block != null) {
+          push(current_block.instrs, {pc: pc, instr: instr})
+          current_block.end_pc = pc
+          n = length(instr)
+          line_num = instr[n - 2]
+          if (line_num != null) {
+            if (current_block.first_line == null) {
+              current_block.first_line = line_num
+            }
+            current_block.last_line = line_num
+          }
+        }
+        pc = pc + 1
+      }
+      ii = ii + 1
+    }
+    if (current_block != null) {
+      push(blocks, current_block)
+    }
+
+    // Build block index
+    bi = 0
+    while (bi < length(blocks)) {
+      pc_to_block[text(blocks[bi].start_pc)] = bi
+      if (blocks[bi].label != null) {
+        label_to_block[blocks[bi].label] = bi
+      }
+      bi = bi + 1
+    }
+
+    // Pass 4: compute edges
+    bi = 0
+    while (bi < length(blocks)) {
+      blk = blocks[bi]
+      if (length(blk.instrs) > 0) {
+        last_instr_data = blk.instrs[length(blk.instrs) - 1]
+        last_op = last_instr_data.instr[0]
+        n = length(last_instr_data.instr)
+
+        if (is_jump_op(last_op)) {
+          if (last_op == "jump") {
+            target_label = last_instr_data.instr[1]
+          } else {
+            target_label = last_instr_data.instr[2]
+          }
+
+          target_bi = label_to_block[target_label]
+          if (target_bi != null) {
+            edge_type = "jump"
+            if (target_bi <= bi) {
+              edge_type = "loop back-edge"
+            }
+            push(blk.edges, {target: target_bi, kind: edge_type})
+          }
+
+          if (is_conditional_jump(last_op)) {
+            if (bi + 1 < length(blocks)) {
+              push(blk.edges, {target: bi + 1, kind: "fallthrough"})
+            }
+          }
+        } else if (is_terminator(last_op)) {
+          push(blk.edges, {target: -1, kind: "EXIT (" + last_op + ")"})
+        } else {
+          if (bi + 1 < length(blocks)) {
+            push(blk.edges, {target: bi + 1, kind: "fallthrough"})
+          }
+        }
+      }
+      bi = bi + 1
+    }
+
+    return blocks
+  }
+
+  var print_cfg_text = function(blocks, name) {
+    var bi = 0
+    var blk = null
+    var header = null
+    var ii = 0
+    var idata = null
+    var instr = null
+    var op = null
+    var n = 0
+    var parts = null
+    var j = 0
+    var operands = null
+    var ei = 0
+    var edge = null
+    var target_label = null
+
+    log.compile(`\n=== ${name} ===`)
+
+    if (length(blocks) == 0) {
+      log.compile("  (empty)")
+      return null
+    }
+
+    bi = 0
+    while (bi < length(blocks)) {
+      blk = blocks[bi]
+      header = `  B${text(bi)}`
+      if (blk.label != null) {
+        header = header + ` "${blk.label}"`
+      }
+      header = header + ` [pc ${text(blk.start_pc)}-${text(blk.end_pc)}`
+      if (blk.first_line != null) {
+        if (blk.first_line == blk.last_line) {
+          header = header + `, line ${text(blk.first_line)}`
+        } else {
+          header = header + `, lines ${text(blk.first_line)}-${text(blk.last_line)}`
+        }
+      }
+      header = header + "]:"
+
+      log.compile(header)
+
+      ii = 0
+      while (ii < length(blk.instrs)) {
+        idata = blk.instrs[ii]
+        instr = idata.instr
+        op = instr[0]
+        n = length(instr)
+        parts = []
+        j = 1
+        while (j < n - 2) {
+          push(parts, fmt_val(instr[j]))
+          j = j + 1
+        }
+        operands = text(parts, ", ")
+        log.compile(`    ${pad_right(text(idata.pc), 6)}${pad_right(op, 15)}${operands}`)
+        ii = ii + 1
+      }
+
+      ei = 0
+      while (ei < length(blk.edges)) {
+        edge = blk.edges[ei]
+        if (edge.target == -1) {
+          log.compile(`    -> ${edge.kind}`)
+        } else {
+          target_label = blocks[edge.target].label
+          if (target_label != null) {
+            log.compile(`    -> B${text(edge.target)} "${target_label}" (${edge.kind})`)
+          } else {
+            log.compile(`    -> B${text(edge.target)} (${edge.kind})`)
+          }
+        }
+        ei = ei + 1
+      }
+
+      log.compile("")
+      bi = bi + 1
+    }
+    return null
+  }
+
+  var print_cfg_dot = function(blocks, name) {
+    var safe_name = replace(replace(name, '"', '\\"'), ' ', '_')
+    var bi = 0
+    var blk = null
+    var label_text = null
+    var ii = 0
+    var idata = null
+    var instr = null
+    var op = null
+    var n = 0
+    var parts = null
+    var j = 0
+    var operands = null
+    var ei = 0
+    var edge = null
+    var style = null
+
+    log.compile(`digraph "${safe_name}" {`)
+    log.compile("  rankdir=TB;")
+    log.compile("  node [shape=record, fontname=monospace, fontsize=10];")
+
+    bi = 0
+    while (bi < length(blocks)) {
+      blk = blocks[bi]
+      label_text = "B" + text(bi)
+      if (blk.label != null) {
+        label_text = label_text + " (" + blk.label + ")"
+      }
+      label_text = label_text + "\\npc " + text(blk.start_pc) + "-" + text(blk.end_pc)
+      if (blk.first_line != null) {
+        label_text = label_text + "\\nline " + text(blk.first_line)
+      }
+      label_text = label_text + "|"
+
+      ii = 0
+      while (ii < length(blk.instrs)) {
+        idata = blk.instrs[ii]
+        instr = idata.instr
+        op = instr[0]
+        n = length(instr)
+        parts = []
+        j = 1
+        while (j < n - 2) {
+          push(parts, fmt_val(instr[j]))
+          j = j + 1
+        }
+        operands = text(parts, ", ")
+        label_text = label_text + text(idata.pc) + " " + op + " " + replace(operands, '"', '\\"') + "\\l"
+        ii = ii + 1
+      }
+
+      log.compile("  B" + text(bi) + " [label=\"{" + label_text + "}\"];")
+      bi = bi + 1
+    }
+
+    // Edges
+    bi = 0
+    while (bi < length(blocks)) {
+      blk = blocks[bi]
+      ei = 0
+      while (ei < length(blk.edges)) {
+        edge = blk.edges[ei]
+        if (edge.target >= 0) {
+          style = ""
+          if (edge.kind == "loop back-edge") {
+            style = " [style=bold, color=red, label=\"loop\"]"
+          } else if (edge.kind == "fallthrough") {
+            style = " [style=dashed]"
+          }
+          log.compile(`  B${text(bi)} -> B${text(edge.target)}${style};`)
+        }
+        ei = ei + 1
+      }
+      bi = bi + 1
+    }
+
+    log.compile("}")
+    return null
+  }
+
+  var process_function = function(func, name, index) {
+    var blocks = build_cfg(func)
+    if (show_dot) {
+      print_cfg_dot(blocks, name)
+    } else {
+      print_cfg_text(blocks, name)
+    }
+    return null
+  }
+
+  // Process functions
+  main_name = compiled.name != null ? compiled.name : "<main>"
+
+  if (compiled.main != null) {
+    if (fn_matches(-1, main_name)) {
+      process_function(compiled.main, main_name, -1)
+    }
+  }
+
+  if (compiled.functions != null) {
+    fi = 0
+    while (fi < length(compiled.functions)) {
+      func = compiled.functions[fi]
+      fname = func.name != null ? func.name : "<anonymous>"
+      if (fn_matches(fi, fname)) {
+        process_function(func, `[${text(fi)}] ${fname}`, fi)
+      }
+      fi = fi + 1
+    }
+  }
+
+  return null
+}
+
+run()
+$stop()
--- a/diff_ir.ce
+++ b/diff_ir.ce
@@ -0,0 +1,310 @@
+// diff_ir.ce — mcode vs streamline diff
+//
+// Usage:
+//   cell diff_ir <file>                Diff all functions
+//   cell diff_ir --fn <N|name> <file>  Diff only one function
+//   cell diff_ir --summary <file>      Counts only
+
+var fd = use("fd")
+var shop = use("internal/shop")
+
+var pad_right = function(s, w) {
+  var r = s
+  while (length(r) < w) {
+    r = r + " "
+  }
+  return r
+}
+
+var fmt_val = function(v) {
+  if (is_null(v)) return "null"
+  if (is_number(v)) return text(v)
+  if (is_text(v)) return `"${v}"`
+  if (is_object(v)) return text(v)
+  if (is_logical(v)) return v ? "true" : "false"
+  return text(v)
+}
+
+var run = function() {
+  var fn_filter = null
+  var show_summary = false
+  var filename = null
+  var i = 0
+  var mcode_ir = null
+  var opt_ir = null
+  var source_text = null
+  var source_lines = null
+  var main_name = null
+  var fi = 0
+  var func = null
+  var opt_func = null
+  var fname = null
+
+  while (i < length(args)) {
+    if (args[i] == '--fn') {
+      i = i + 1
+      fn_filter = args[i]
+    } else if (args[i] == '--summary') {
+      show_summary = true
+    } else if (args[i] == '--help' || args[i] == '-h') {
+      log.console("Usage: cell diff_ir [--fn <N|name>] [--summary] <file>")
+      log.console("")
+      log.console("  --fn <N|name>   Filter to function by index or name")
+      log.console("  --summary       Show counts only")
+      return null
+    } else if (!starts_with(args[i], '-')) {
+      filename = args[i]
+    }
+    i = i + 1
+  }
+
+  if (!filename) {
+    log.console("Usage: cell diff_ir [--fn <N|name>] [--summary] <file>")
+    return null
+  }
+
+  mcode_ir = shop.mcode_file(filename)
+  opt_ir = shop.compile_file(filename)
+
+  source_text = text(fd.slurp(filename))
+  source_lines = array(source_text, "\n")
+
+  var get_source_line = function(line_num) {
+    if (line_num < 1 || line_num > length(source_lines)) return null
+    return source_lines[line_num - 1]
+  }
+
+  var fn_matches = function(index, name) {
+    var match = null
+    if (fn_filter == null) return true
+    if (index >= 0 && fn_filter == text(index)) return true
+    if (name != null) {
+      match = search(name, fn_filter)
+      if (match != null && match >= 0) return true
+    }
+    return false
+  }
+
+  var fmt_instr = function(instr) {
+    var op = instr[0]
+    var n = length(instr)
+    var parts = []
+    var j = 1
+    var operands = null
+    var line_str = null
+    while (j < n - 2) {
+      push(parts, fmt_val(instr[j]))
+      j = j + 1
+    }
+    operands = text(parts, ", ")
+    line_str = instr[n - 2] != null ? `:${text(instr[n - 2])}` : ""
+    return pad_right(`${pad_right(op, 15)}${operands}`, 45) + line_str
+  }
+
+  var classify = function(before, after) {
+    var bn = 0
+    var an = 0
+    var k = 0
+    if (is_text(after) && starts_with(after, "_nop_")) return "eliminated"
+    if (is_array(before) && is_array(after)) {
+      if (before[0] != after[0]) return "rewritten"
+      bn = length(before)
+      an = length(after)
+      if (bn != an) return "rewritten"
+      k = 1
+      while (k < bn - 2) {
+        if (before[k] != after[k]) return "rewritten"
+        k = k + 1
+      }
+      return "identical"
+    }
+    return "identical"
+  }
+
+  var total_eliminated = 0
+  var total_rewritten = 0
+  var total_funcs = 0
+
+  var diff_function = function(mcode_func, opt_func, name, index) {
+    var nr_args = mcode_func.nr_args != null ? mcode_func.nr_args : 0
+    var nr_slots = mcode_func.nr_slots != null ? mcode_func.nr_slots : 0
+    var m_instrs = mcode_func.instructions
+    var o_instrs = opt_func.instructions
+    var eliminated = 0
+    var rewritten = 0
+    var mi = 0
+    var oi = 0
+    var pc = 0
+    var m_instr = null
+    var o_instr = null
+    var kind = null
+    var last_line = null
+    var instr_line = null
+    var n = 0
+    var src = null
+    var annotation = null
+
+    if (m_instrs == null) m_instrs = []
+    if (o_instrs == null) o_instrs = []
+
+    // First pass: count changes
+    mi = 0
+    oi = 0
+    while (mi < length(m_instrs) && oi < length(o_instrs)) {
+      m_instr = m_instrs[mi]
+      o_instr = o_instrs[oi]
+
+      if (is_text(m_instr)) {
+        mi = mi + 1
+        oi = oi + 1
+        continue
+      }
+
+      if (is_text(o_instr) && starts_with(o_instr, "_nop_")) {
+        if (is_array(m_instr)) {
+          eliminated = eliminated + 1
+        }
+        mi = mi + 1
+        oi = oi + 1
+        continue
+      }
+
+      if (is_array(m_instr) && is_array(o_instr)) {
+        kind = classify(m_instr, o_instr)
+        if (kind == "rewritten") {
+          rewritten = rewritten + 1
+        }
+      }
+      mi = mi + 1
+      oi = oi + 1
+    }
+
+    total_eliminated = total_eliminated + eliminated
+    total_rewritten = total_rewritten + rewritten
+    total_funcs = total_funcs + 1
+
+    if (show_summary) {
+      if (eliminated == 0 && rewritten == 0) {
+        log.compile(`  ${pad_right(name + ":", 40)} 0 eliminated, 0 rewritten (unchanged)`)
+      } else {
+        log.compile(`  ${pad_right(name + ":", 40)} ${text(eliminated)} eliminated, ${text(rewritten)} rewritten`)
+      }
+      return null
+    }
+
+    if (eliminated == 0 && rewritten == 0) return null
+
+    log.compile(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
+    log.compile(`  ${text(eliminated)} eliminated, ${text(rewritten)} rewritten`)
+
+    // Second pass: show diffs
+    mi = 0
+    oi = 0
+    pc = 0
+    last_line = null
+    while (mi < length(m_instrs) && oi < length(o_instrs)) {
+      m_instr = m_instrs[mi]
+      o_instr = o_instrs[oi]
+
+      if (is_text(m_instr) && !starts_with(m_instr, "_nop_")) {
+        mi = mi + 1
+        oi = oi + 1
+        continue
+      }
+
+      if (is_text(m_instr) && starts_with(m_instr, "_nop_")) {
+        mi = mi + 1
+        oi = oi + 1
+        continue
+      }
+
+      if (is_text(o_instr) && starts_with(o_instr, "_nop_")) {
+        if (is_array(m_instr)) {
+          n = length(m_instr)
+          instr_line = m_instr[n - 2]
+          if (instr_line != last_line && instr_line != null) {
+            src = get_source_line(instr_line)
+            if (src != null) src = trim(src)
+            if (last_line != null) log.compile("")
+            if (src != null && length(src) > 0) {
+              log.compile(`  --- line ${text(instr_line)}: ${src} ---`)
+            }
+            last_line = instr_line
+          }
+          log.compile(`  - ${pad_right(text(pc), 6)}${fmt_instr(m_instr)}`)
+          log.compile(`  + ${pad_right(text(pc), 6)}${pad_right(o_instr, 45)}      (eliminated)`)
+        }
+        mi = mi + 1
+        oi = oi + 1
+        pc = pc + 1
+        continue
+      }
+
+      if (is_array(m_instr) && is_array(o_instr)) {
+        kind = classify(m_instr, o_instr)
+        if (kind != "identical") {
+          n = length(m_instr)
+          instr_line = m_instr[n - 2]
+          if (instr_line != last_line && instr_line != null) {
+            src = get_source_line(instr_line)
+            if (src != null) src = trim(src)
+            if (last_line != null) log.compile("")
+            if (src != null && length(src) > 0) {
+              log.compile(`  --- line ${text(instr_line)}: ${src} ---`)
+            }
+            last_line = instr_line
+          }
+
+          annotation = ""
+          if (kind == "rewritten") {
+            if (o_instr[0] == "concat" && m_instr[0] != "concat") {
+              annotation = "(specialized)"
+            } else {
+              annotation = "(rewritten)"
+            }
+          }
+
+          log.compile(`  - ${pad_right(text(pc), 6)}${fmt_instr(m_instr)}`)
+          log.compile(`  + ${pad_right(text(pc), 6)}${fmt_instr(o_instr)}  ${annotation}`)
+        }
+        pc = pc + 1
+      }
+
+      mi = mi + 1
+      oi = oi + 1
+    }
+
+    return null
+  }
+
+  // Process functions
+  main_name = mcode_ir.name != null ? mcode_ir.name : "<main>"
+
+  if (mcode_ir.main != null && opt_ir.main != null) {
+    if (fn_matches(-1, main_name)) {
+      diff_function(mcode_ir.main, opt_ir.main, main_name, -1)
+    }
+  }
+
+  if (mcode_ir.functions != null && opt_ir.functions != null) {
+    fi = 0
+    while (fi < length(mcode_ir.functions) && fi < length(opt_ir.functions)) {
+      func = mcode_ir.functions[fi]
+      opt_func = opt_ir.functions[fi]
+      fname = func.name != null ? func.name : "<anonymous>"
+      if (fn_matches(fi, fname)) {
+        diff_function(func, opt_func, `[${text(fi)}] ${fname}`, fi)
+      }
+      fi = fi + 1
+    }
+  }
+
+  if (show_summary) {
+    log.compile(`\n  total: ${text(total_eliminated)} eliminated, ${text(total_rewritten)} rewritten across ${text(total_funcs)} functions`)
+  }
+
+  return null
+}
+
+run()
+$stop()
--- a/docs/compiler-tools.md
+++ b/docs/compiler-tools.md
@@ -30,6 +30,10 @@ Each stage has a corresponding CLI tool that lets you see its output.
 | streamline  | `streamline.ce --ir`      | Human-readable canonical IR            |
 | disasm      | `disasm.ce`               | Source-interleaved disassembly          |
 | disasm      | `disasm.ce --optimized`   | Optimized source-interleaved disassembly |
+| diff        | `diff_ir.ce`              | Mcode vs streamline instruction diff   |
+| xref        | `xref.ce`                 | Cross-reference / call creation graph  |
+| cfg         | `cfg.ce`                  | Control flow graph (basic blocks)      |
+| slots       | `slots.ce`                | Slot data flow / use-def chains        |
 | all         | `ir_report.ce`            | Structured optimizer flight recorder   |

 All tools take a source file as input and run the pipeline up to the relevant stage.
@@ -141,6 +145,160 @@ Function creation instructions include a cross-reference annotation showing the
  3     function       5, 12                                  :235  ; -> [12] helper_fn
 ```

+## diff_ir.ce
+
+Compares mcode IR (before optimization) with streamline IR (after optimization), showing what the optimizer changed. Useful for understanding which instructions were eliminated, specialized, or rewritten.
+
+```bash
+cell diff_ir <file>                  # diff all functions
+cell diff_ir --fn <N|name> <file>    # diff only one function
+cell diff_ir --summary <file>        # counts only
+```
+
+| Flag | Description |
+|------|-------------|
+| (none) | Show all diffs with source interleaving |
+| `--fn <N\|name>` | Filter to specific function by index or name |
+| `--summary` | Show only eliminated/rewritten counts per function |
+
+### Output Format
+
+Changed instructions are shown in diff style with `-` (before) and `+` (after) lines:
+
+```
+=== [0] <anonymous> (args=1, slots=40) ===
+  17 eliminated, 51 rewritten
+
+  --- line 4: if (n <= 1) { ---
+  - 1     is_int         4, 1                          :4
+  + 1     is_int         3, 1                          :4  (specialized)
+  - 3     is_int         5, 2                          :4
+  + 3     _nop_tc_1                                         (eliminated)
+```
+
+Summary mode gives a quick overview:
+
+```
+  [0] <anonymous>:                       17 eliminated, 51 rewritten
+  [1] <anonymous>:                       65 eliminated, 181 rewritten
+  total: 86 eliminated, 250 rewritten across 4 functions
+```
+
+## xref.ce
+
+Cross-reference / call graph tool. Shows which functions create other functions (via `function` instructions), building a creation tree.
+
+```bash
+cell xref <file>                     # full creation tree
+cell xref --callers <N> <file>       # who creates function [N]?
+cell xref --callees <N> <file>       # what does [N] create/call?
+cell xref --dot <file>               # DOT graph for graphviz
+cell xref --optimized <file>         # use optimized IR
+```
+
+| Flag | Description |
+|------|-------------|
+| (none) | Indented creation tree from main |
+| `--callers <N>` | Show which functions create function [N] |
+| `--callees <N>` | Show what function [N] creates (use -1 for main) |
+| `--dot` | Output DOT format for graphviz |
+| `--optimized` | Use optimized IR instead of raw mcode |
+
+### Output Format
+
+Default tree view:
+
+```
+demo_disasm.cm
+  [0] <anonymous>
+  [1] <anonymous>
+  [2] <anonymous>
+```
+
+Caller/callee query:
+
+```
+Callers of [0] <anonymous>:
+  demo_disasm.cm at line 3
+```
+
+DOT output can be piped to graphviz: `cell xref --dot file.cm | dot -Tpng -o xref.png`
+
+## cfg.ce
+
+Control flow graph tool. Identifies basic blocks from labels and jumps, computes edges, and detects loop back-edges.
+
+```bash
+cell cfg --fn <N|name> <file>        # text CFG for function
+cell cfg --dot --fn <N|name> <file>  # DOT output for graphviz
+cell cfg <file>                      # text CFG for all functions
+cell cfg --optimized <file>          # use optimized IR
+```
+
+| Flag | Description |
+|------|-------------|
+| `--fn <N\|name>` | Filter to specific function by index or name |
+| `--dot` | Output DOT format for graphviz |
+| `--optimized` | Use optimized IR instead of raw mcode |
+
+### Output Format
+
+```
+=== [0] <anonymous> ===
+  B0 [pc 0-2, line 4]:
+    0     access         2, 1
+    1     is_int         4, 1
+    2     jump_false     4, "rel_ni_2"
+    -> B3 "rel_ni_2" (jump)
+    -> B1 (fallthrough)
+
+  B1 [pc 3-4, line 4]:
+    3     is_int         5, 2
+    4     jump_false     5, "rel_ni_2"
+    -> B3 "rel_ni_2" (jump)
+    -> B2 (fallthrough)
+```
+
+Each block shows its ID, PC range, source lines, instructions, and outgoing edges. Loop back-edges (target PC <= source PC) are annotated.
+
+## slots.ce
+
+Slot data flow analysis. Builds use-def chains for every slot in a function, showing where each slot is defined and used. Optionally captures type information from streamline.
+
+```bash
+cell slots --fn <N|name> <file>              # slot summary for function
+cell slots --slot <N> --fn <N|name> <file>   # trace slot N
+cell slots <file>                            # slot summary for all functions
+```
+
+| Flag | Description |
+|------|-------------|
+| `--fn <N\|name>` | Filter to specific function by index or name |
+| `--slot <N>` | Show chronological DEF/USE trace for a specific slot |
+
+### Output Format
+
+Summary shows each slot with its def count, use count, inferred type, and first definition. Dead slots (defined but never used) are flagged:
+
+```
+=== [0] <anonymous> (args=1, slots=40) ===
+  slot    defs    uses    type        first-def
+  s0      0       0       -           (this)
+  s1      0       10      -           (arg 0)
+  s2      1       6       -           pc 0: access
+  s10     1       0       -           pc 29: invoke  <- dead
+```
+
+Slot trace (`--slot N`) shows every DEF and USE in program order:
+
+```
+=== slot 3 in [0] <anonymous> ===
+  DEF  pc 5:     le_int         3, 1, 2                       :4
+  DEF  pc 11:    le_float       3, 1, 2                       :4
+  DEF  pc 17:    le_text        3, 1, 2                       :4
+  USE  pc 31:    jump_false     3, "if_else_0"                :4
+```
+
 ## seed.ce

 Regenerates the boot seed files in `boot/`. These are pre-compiled mcode IR (JSON) files that bootstrap the compilation pipeline on cold start.
--- a/docs/spec/mach.md
+++ b/docs/spec/mach.md
@@ -93,3 +93,13 @@ Arithmetic ops (ADD, SUB, MUL, DIV, MOD, POW) are executed inline without callin
 DIV and MOD check for zero divisor (→ null). POW uses `pow()` with non-finite handling for finite inputs.

 Comparison ops (EQ through GE) and bitwise ops still use `reg_vm_binop()` for their slow paths, as they handle a wider range of type combinations (string comparisons, null equality, etc.).
+
+## String Concatenation
+
+CONCAT has a three-tier dispatch for self-assign patterns (`concat R(A), R(A), R(C)` where dest equals the left operand):
+
+1. **In-place append**: If `R(A)` is a mutable heap text (S bit clear) with `length + rhs_length <= cap56`, characters are appended directly. Zero allocation, zero GC.
+2. **Growth allocation** (`JS_ConcatStringGrow`): Allocates a new text with 2x capacity and does **not** stone the result, leaving it mutable for subsequent appends.
+3. **Exact-fit stoned** (`JS_ConcatString`): Used when dest differs from the left operand (normal non-self-assign concat).
+
+The `stone_text` instruction (iABC, B=0, C=0) sets the S bit on a mutable heap text in `R(A)`. For non-pointer values or already-stoned text, it is a no-op. This instruction is emitted by the streamline optimizer at escape points; see [Streamline — insert_stone_text](streamline.md#7-insert_stone_text-mutable-text-escape-analysis) and [Stone Memory — Mutable Text](stone.md#mutable-text-concatenation).
--- a/docs/spec/mcode.md
+++ b/docs/spec/mcode.md
@@ -101,6 +101,11 @@ Operands are register slot numbers (integers), constant values (strings, numbers
 | Instruction | Operands | Description |
 |-------------|----------|-------------|
 | `concat` | `dest, a, b` | `dest = a ~ b` (text concatenation) |
+| `stone_text` | `slot` | Stone a mutable text value (see below) |
+
+The `stone_text` instruction is emitted by the streamline optimizer's escape analysis pass (`insert_stone_text`). It freezes a mutable text value before it escapes its defining slot — for example, before a `move`, `setarg`, `store_field`, `push`, or `put`. The instruction is only inserted when the slot is provably `T_TEXT`; non-text values never need stoning. See [Streamline Optimizer — insert_stone_text](streamline.md#7-insert_stone_text-mutable-text-escape-analysis) for details.
+
+At the VM level, `stone_text` is a single-operand instruction (iABC with B=0, C=0). If the slot holds a heap text without the S bit set, it sets the S bit. For all other values (integers, booleans, already-stoned text, etc.), it is a no-op.

 ### Comparison — Integer

--- a/docs/spec/stone.md
+++ b/docs/spec/stone.md
@@ -77,6 +77,30 @@ Messages between actors are stoned before delivery, ensuring actors never share

 Literal objects and arrays that can be determined at compile time may be allocated directly in stone memory.

+## Mutable Text Concatenation
+
+String concatenation in a loop (`s = s + "x"`) is optimized to O(n) amortized by leaving concat results **unstoned** with over-allocated capacity. On the next concatenation, if the destination text is mutable (S bit clear) and has enough room, the VM appends in-place with zero allocation.
+
+### How It Works
+
+When the VM executes `concat dest, dest, src` (same destination and left operand — a self-assign pattern):
+
+1. **Inline fast path**: If `dest` holds a heap text, is not stoned, and `length + src_length <= capacity` — append characters in place, update length, done. No allocation, no GC possible.
+
+2. **Growth path** (`JS_ConcatStringGrow`): Allocate a new text with `capacity = max(new_length * 2, 16)`, copy both operands, and return the result **without stoning** it. The 2x growth factor means a loop of N concatenations does O(log N) allocations totaling O(N) character copies.
+
+3. **Exact-fit path** (`JS_ConcatString`): When `dest != left` (not self-assign), the existing exact-fit stoned path is used. This is the normal case for expressions like `var c = a + b`.
+
+### Safety Invariant
+
+**An unstoned heap text is uniquely referenced by exactly one slot.** This is enforced by the `stone_text` mcode instruction, which the [streamline optimizer](streamline.md#7-insert_stone_text-mutable-text-escape-analysis) inserts before any instruction that would create a second reference to the value (move, store, push, setarg, put). Two VM-level guards cover cases where the compiler cannot prove the type: `get` (closure reads) and `return` (inter-frame returns).
+
+### Why Over-Allocation Is GC-Safe
+
+- The copying collector copies based on `cap56` (the object header's capacity field), not `length`. Over-allocated capacity survives GC.
+- `js_alloc_string` zero-fills the packed data region, so padding beyond `length` is always clean.
+- String comparisons, hashing, and interning all use `length`, not `cap56`. Extra capacity is invisible to string operations.
+
 ## Relationship to GC

 The Cheney copying collector only operates on the mutable heap. During collection, when the collector encounters a pointer to stone memory (S bit set), it skips it — stone objects are roots that never move. This means stone memory acts as a permanent root set with zero GC overhead.
--- a/docs/spec/streamline.md
+++ b/docs/spec/streamline.md
@@ -164,7 +164,44 @@ Removes `move a, a` instructions where the source and destination are the same s

 **Nop prefix:** `_nop_mv_`

-### 7. eliminate_unreachable (dead code after return)
+### 7. insert_stone_text (mutable text escape analysis)
+
+Inserts `stone_text` instructions before mutable text values escape their defining slot. This pass supports the mutable text concatenation optimization (see [Stone Memory — Mutable Text](stone.md#mutable-text-concatenation)), which leaves `concat` results unstoned with excess capacity so that subsequent `s = s + x` can append in-place.
+
+The invariant is: **an unstoned heap text is uniquely referenced by exactly one slot.** This pass ensures that whenever a text value is copied or shared (via move, store, push, function argument, closure write, etc.), it is stoned first.
+
+**Algorithm:**
+
+1. **Compute liveness.** Build `first_ref[slot]` and `last_ref[slot]` arrays by scanning all instructions. Extend live ranges for backward jumps (loops): if a backward jump targets label L at position `lpos`, every slot referenced between `lpos` and the jump has its `last_ref` extended to the jump position.
+
+2. **Forward walk with type tracking.** Walk instructions using `track_types` to maintain per-slot types. At each escape point, if the escaping slot is provably `T_TEXT`, insert `stone_text slot` before the instruction.
+
+3. **Move special case.** For `move dest, src`: only insert `stone_text src` if the source is `T_TEXT` **and** `last_ref[src] > i` (the source slot is still live after the move, meaning both slots alias the same text). If the source is dead after the move, the value transfers uniquely — no stoning needed.
+
+**Escape points and the slot that gets stoned:**
+
+| Instruction | Stoned slot | Why it escapes |
+|---|---|---|
+| `move` | source (if still live) | Two slots alias the same value |
+| `store_field` | value | Stored to object property |
+| `store_index` | value | Stored to array element |
+| `store_dynamic` | value | Dynamic property store |
+| `push` | value | Pushed to array |
+| `setarg` | value | Passed as function argument |
+| `put` | source | Written to outer closure frame |
+
+**Not handled by this pass** (handled by VM guards instead):
+
+| Instruction | Reason |
+|---|---|
+| `get` (closure read) | Value arrives from outer frame; type may be T_UNKNOWN at compile time |
+| `return` | Return value's type may be T_UNKNOWN; VM stones at inter-frame boundary |
+
+These two cases use runtime `stone_mutable_text` guards in the VM because the streamline pass cannot always prove the slot type across frame boundaries.
+
+**Nop prefix:** none (inserts instructions, does not create nops)
+
+### 8. eliminate_unreachable (dead code after return)

 Nops instructions after `return` until the next real label. Only `return` is treated as a terminal instruction; `disrupt` is not, because the disruption handler code immediately follows `disrupt` and must remain reachable.

@@ -172,13 +209,13 @@ The mcode compiler emits a label at disruption handler entry points (see `emit_l

 **Nop prefix:** `_nop_ur_`

-### 8. eliminate_dead_jumps (jump-to-next-label elimination)
+### 9. eliminate_dead_jumps (jump-to-next-label elimination)

 Removes `jump L` instructions where `L` is the immediately following label (skipping over any intervening nop strings). These are common after other passes eliminate conditional branches, leaving behind jumps that fall through naturally.

 **Nop prefix:** `_nop_dj_`

-### 9. diagnose_function (compile-time diagnostics)
+### 10. diagnose_function (compile-time diagnostics)

 Optional pass that runs when `_warn` is set on the mcode input. Performs a forward type-tracking scan and emits diagnostics for provably wrong operations. Diagnostics are collected in `ir._diagnostics` as `{severity, file, line, col, message}` records.

@@ -219,6 +256,7 @@ eliminate_type_checks    → uses param_types + write_types
 simplify_algebra
 simplify_booleans
 eliminate_moves
+insert_stone_text        → escape analysis for mutable text
 eliminate_unreachable
 eliminate_dead_jumps
 diagnose_function        → optional, when _warn is set
@@ -286,7 +324,9 @@ move 2, 7           // i = temp
 subtract 2, 2, 6    // i = i - 1 (direct)
 ```

-The `+` operator is excluded from target slot propagation when it would use the full text+num dispatch (i.e., when neither operand is a known number), because writing both `concat` and `add` to the variable's slot would pollute its write type. When the known-number shortcut applies, `+` uses `emit_numeric_binop` and would be safe for target propagation, but this is not currently implemented — the exclusion is by operator kind, not by dispatch path.
+The `+` operator uses target slot propagation when the target slot equals the left operand (`target == left_slot`), i.e. for self-assign patterns like `s = s + x`. In this case both `concat` and `add` write to the same slot that already holds the left operand, so write-type pollution is acceptable — the value is being updated in place. For other cases (target differs from left operand), `+` still allocates a temp to avoid polluting the target slot's write type with both T_TEXT and T_NUM.
+
+This enables the VM's in-place append fast path for string concatenation: when `concat dest, dest, src` has the same destination and left operand, the VM can append directly to a mutable text's excess capacity without allocating.

 ## Debugging Tools

@@ -375,7 +415,7 @@ This was implemented and tested but causes a bootstrap failure during self-hosti

 ### Target Slot Propagation for Add with Known Numbers

-When the known-number add shortcut applies (one operand is a literal number), the generated code uses `emit_numeric_binop` which has a single write path. Target slot propagation should be safe in this case, but is currently blocked by the blanket `kind != "+"` exclusion. Refining the exclusion to check whether the shortcut will apply (by testing `is_known_number` on either operand) would enable direct writes for patterns like `i = i + 1`.
+When the known-number add shortcut applies (one operand is a literal number), the generated code uses `emit_numeric_binop` which has a single write path. Target slot propagation is already enabled for the self-assign case (`i = i + 1`), but when the target differs from the left operand and neither operand is a known number, a temp is still used. Refining the exclusion to check `is_known_number` would enable direct writes for the remaining non-self-assign cases like `j = i + 1`.

 ### Forward Type Narrowing from Typed Operations

--- a/mcode.cm
+++ b/mcode.cm
@@ -88,6 +88,7 @@ var mcode = function(ast) {
  var s_cur_col = 0
  var s_filename = null
  var s_has_disruption = false
+  var s_slot_types = {}

  // Shared closure vars for binop helpers (avoids >4 param functions)
  var _bp_dest = 0
@@ -116,7 +117,8 @@ var mcode = function(ast) {
      intrinsic_cache: s_intrinsic_cache,
      cur_line: s_cur_line,
      cur_col: s_cur_col,
-      has_disruption: s_has_disruption
+      has_disruption: s_has_disruption,
+      slot_types: s_slot_types
    }
  }

@@ -138,6 +140,7 @@ var mcode = function(ast) {
    s_cur_line = saved.cur_line
    s_cur_col = saved.cur_col
    s_has_disruption = saved.has_disruption
+    s_slot_types = saved.slot_types
  }

  // Slot allocation
@@ -330,20 +333,48 @@ var mcode = function(ast) {
    return node.kind == "null"
  }

+  // Slot-type tracking helpers
+  var slot_is_num = function(slot) {
+    var t = s_slot_types[text(slot)]
+    return t == "num" || t == "int"
+  }
+
+  var slot_is_text = function(slot) {
+    return s_slot_types[text(slot)] == "text"
+  }
+
+  var mark_slot = function(slot, typ) {
+    s_slot_types[text(slot)] = typ
+  }
+
+  var propagate_slot = function(dest, src) {
+    s_slot_types[text(dest)] = s_slot_types[text(src)]
+  }
+
  // emit_add_decomposed: emit type-dispatched add (text → concat, num → add)
  // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
  var emit_add_decomposed = function() {
-    if (is_known_text(_bp_ln) && is_known_text(_bp_rn)) {
+    var left_is_num = is_known_number(_bp_ln) || slot_is_num(_bp_left)
+    var left_is_text = is_known_text(_bp_ln) || slot_is_text(_bp_left)
+    var right_is_num = is_known_number(_bp_rn) || slot_is_num(_bp_right)
+    var right_is_text = is_known_text(_bp_rn) || slot_is_text(_bp_right)
+
+    // Both known text → concat
+    if (left_is_text && right_is_text) {
      emit_3("concat", _bp_dest, _bp_left, _bp_right)
+      mark_slot(_bp_dest, "text")
      return null
    }
-    if (is_known_number(_bp_ln) && is_known_number(_bp_rn)) {
+    // Both known number → add
+    if (left_is_num && right_is_num) {
      emit_3("add", _bp_dest, _bp_left, _bp_right)
+      mark_slot(_bp_dest, "num")
      return null
    }
-    // If either operand is a known number, concat is impossible
-    if (is_known_number(_bp_ln) || is_known_number(_bp_rn)) {
+    // One known number, other unknown → emit_numeric_binop (guard on unknown side)
+    if (left_is_num || right_is_num) {
      emit_numeric_binop("add")
+      mark_slot(_bp_dest, "num")
      return null
    }
    // Unknown types: emit full dispatch
@@ -380,8 +411,10 @@ var mcode = function(ast) {
  // emit_numeric_binop: emit type-guarded numeric binary op
  // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
  var emit_numeric_binop = function(op_str) {
-    if (is_known_number(_bp_ln) && is_known_number(_bp_rn)) {
+    if ((is_known_number(_bp_ln) || slot_is_num(_bp_left))
+        && (is_known_number(_bp_rn) || slot_is_num(_bp_right))) {
      emit_3(op_str, _bp_dest, _bp_left, _bp_right)
+      mark_slot(_bp_dest, "num")
      return null
    }
    var t0 = alloc_slot()
@@ -399,238 +432,33 @@ var mcode = function(ast) {
    emit_log_error("cannot apply '" + _bp_op_sym + "': operands must be numbers")
    emit_0("disrupt")
    emit_label(done)
+    mark_slot(_bp_dest, "num")
    return null
  }

-  // emit_eq_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(false)
-  // reads _bp_dest, _bp_left, _bp_right from closure
+  // emit_eq_decomposed: VM eq handles all types (int fast path, text memcmp, identity, mixed→false)
  var emit_eq_decomposed = function() {
-    var dest = _bp_dest
-    var left = _bp_left
-    var right = _bp_right
-    var t0 = 0
-    var t1 = 0
-    var done = gen_label("eq_done")
-    var not_int = gen_label("eq_ni")
-    var not_num = gen_label("eq_nn")
-    var not_text = gen_label("eq_nt")
-    var not_null = gen_label("eq_nnl")
-    var not_bool = gen_label("eq_nb")
-
-    // Identical check
-    emit_3("is_identical", dest, left, right)
-    emit_jump_cond("jump_true", dest, done)
-
-    // Int path
-    t0 = alloc_slot()
-    emit_2("is_int", t0, left)
-    emit_jump_cond("jump_false", t0, not_int)
-    t1 = alloc_slot()
-    emit_2("is_int", t1, right)
-    emit_jump_cond("jump_false", t1, not_int)
-    emit_3("eq_int", dest, left, right)
-    emit_jump(done)
-
-    // Float path
-    emit_label(not_int)
-    emit_2("is_num", t0, left)
-    emit_jump_cond("jump_false", t0, not_num)
-    emit_2("is_num", t1, right)
-    emit_jump_cond("jump_false", t1, not_num)
-    emit_3("eq_float", dest, left, right)
-    emit_jump(done)
-
-    // Text path
-    emit_label(not_num)
-    emit_2("is_text", t0, left)
-    emit_jump_cond("jump_false", t0, not_text)
-    emit_2("is_text", t1, right)
-    emit_jump_cond("jump_false", t1, not_text)
-    emit_3("eq_text", dest, left, right)
-    emit_jump(done)
-
-    // Null path
-    emit_label(not_text)
-    emit_2("is_null", t0, left)
-    emit_jump_cond("jump_false", t0, not_null)
-    emit_2("is_null", t1, right)
-    emit_jump_cond("jump_false", t1, not_null)
-    emit_1("true", dest)
-    emit_jump(done)
-
-    // Bool path
-    emit_label(not_null)
-    emit_2("is_bool", t0, left)
-    emit_jump_cond("jump_false", t0, not_bool)
-    emit_2("is_bool", t1, right)
-    emit_jump_cond("jump_false", t1, not_bool)
-    emit_3("eq_bool", dest, left, right)
-    emit_jump(done)
-
-    // Mismatch -> false
-    emit_label(not_bool)
-    emit_1("false", dest)
-    emit_label(done)
+    emit_3("eq", _bp_dest, _bp_left, _bp_right)
    return null
  }

-  // emit_ne_decomposed: identical -> int -> float -> text -> null -> bool -> mismatch(true)
-  // reads _bp_dest, _bp_left, _bp_right from closure
+  // emit_ne_decomposed: VM ne handles all types (int fast path, text memcmp, identity, mixed→true)
  var emit_ne_decomposed = function() {
-    var dest = _bp_dest
-    var left = _bp_left
-    var right = _bp_right
-    var t0 = 0
-    var t1 = 0
-    var done = gen_label("ne_done")
-    var not_ident = gen_label("ne_nid")
-    var not_int = gen_label("ne_ni")
-    var not_num = gen_label("ne_nn")
-    var not_text = gen_label("ne_nt")
-    var not_null = gen_label("ne_nnl")
-    var not_bool = gen_label("ne_nb")
-
-    // Identical -> false
-    emit_3("is_identical", dest, left, right)
-    emit_jump_cond("jump_true", dest, not_ident)
-    // If jump_true doesn't fire, dest already holds false, continue to checks
-    emit_jump(not_int)
-
-    emit_label(not_ident)
-    emit_1("false", dest)
-    emit_jump(done)
-
-    // Int path
-    emit_label(not_int)
-    t0 = alloc_slot()
-    emit_2("is_int", t0, left)
-    emit_jump_cond("jump_false", t0, not_num)
-    t1 = alloc_slot()
-    emit_2("is_int", t1, right)
-    emit_jump_cond("jump_false", t1, not_num)
-    emit_3("ne_int", dest, left, right)
-    emit_jump(done)
-
-    // Float path
-    emit_label(not_num)
-    emit_2("is_num", t0, left)
-    emit_jump_cond("jump_false", t0, not_text)
-    emit_2("is_num", t1, right)
-    emit_jump_cond("jump_false", t1, not_text)
-    emit_3("ne_float", dest, left, right)
-    emit_jump(done)
-
-    // Text path
-    emit_label(not_text)
-    emit_2("is_text", t0, left)
-    emit_jump_cond("jump_false", t0, not_null)
-    emit_2("is_text", t1, right)
-    emit_jump_cond("jump_false", t1, not_null)
-    emit_3("ne_text", dest, left, right)
-    emit_jump(done)
-
-    // Null path
-    emit_label(not_null)
-    emit_2("is_null", t0, left)
-    emit_jump_cond("jump_false", t0, not_bool)
-    emit_2("is_null", t1, right)
-    emit_jump_cond("jump_false", t1, not_bool)
-    emit_1("false", dest)
-    emit_jump(done)
-
-    // Bool path
-    var mismatch = gen_label("ne_mis")
-    emit_label(not_bool)
-    emit_2("is_bool", t0, left)
-    emit_jump_cond("jump_false", t0, mismatch)
-    emit_2("is_bool", t1, right)
-    emit_jump_cond("jump_false", t1, mismatch)
-    emit_3("ne_bool", dest, left, right)
-    emit_jump(done)
-
-    // Mismatch -> true (ne of different types is true)
-    emit_label(mismatch)
-    emit_1("true", dest)
-    emit_label(done)
+    emit_3("ne", _bp_dest, _bp_left, _bp_right)
    return null
  }

-  // emit_relational: int -> float -> text -> disrupt
-  // reads _bp_dest, _bp_left, _bp_right, _bp_ln, _bp_rn from closure
-  var emit_relational = function(int_op, float_op, text_op) {
-    var dest = _bp_dest
-    var left = _bp_left
-    var right = _bp_right
-    var t0 = 0
-    var t1 = 0
-    var left_is_int = is_known_int(_bp_ln)
-    var left_is_num = is_known_number(_bp_ln)
-    var left_is_text = is_known_text(_bp_ln)
-    var right_is_int = is_known_int(_bp_rn)
-    var right_is_num = is_known_number(_bp_rn)
-    var right_is_text = is_known_text(_bp_rn)
-    var not_int = null
-    var not_num = null
-    var done = null
-    var err = null
-
-    // Both known int
-    if (left_is_int && right_is_int) {
-      emit_3(int_op, dest, left, right)
-      return null
-    }
-    // Both known number
-    if (left_is_num && right_is_num) {
-      emit_3(float_op, dest, left, right)
-      return null
-    }
-    // Both known text
-    if (left_is_text && right_is_text) {
-      emit_3(text_op, dest, left, right)
-      return null
-    }
-
-    not_int = gen_label("rel_ni")
-    not_num = gen_label("rel_nn")
-    done = gen_label("rel_done")
-    err = gen_label("rel_err")
-
-    t0 = alloc_slot()
-    emit_2("is_int", t0, left)
-    emit_jump_cond("jump_false", t0, not_int)
-    t1 = alloc_slot()
-    emit_2("is_int", t1, right)
-    emit_jump_cond("jump_false", t1, not_int)
-    emit_3(int_op, dest, left, right)
-    emit_jump(done)
-
-    emit_label(not_int)
-    emit_2("is_num", t0, left)
-    emit_jump_cond("jump_false", t0, not_num)
-    emit_2("is_num", t1, right)
-    emit_jump_cond("jump_false", t1, not_num)
-    emit_3(float_op, dest, left, right)
-    emit_jump(done)
-
-    emit_label(not_num)
-    emit_2("is_text", t0, left)
-    emit_jump_cond("jump_false", t0, err)
-    emit_2("is_text", t1, right)
-    emit_jump_cond("jump_false", t1, err)
-    emit_3(text_op, dest, left, right)
-    emit_jump(done)
-
-    emit_label(err)
-    emit_log_error("cannot compare with '" + _bp_op_sym + "': operands must be same type")
-    emit_0("disrupt")
-    emit_label(done)
+  // emit_relational: VM lt/le/gt/ge handle numbers and text, disrupt on mismatch
+  var emit_relational = function(op_str) {
+    emit_3(op_str, _bp_dest, _bp_left, _bp_right)
    return null
  }

  // emit_neg_decomposed: emit type-guarded negate
  var emit_neg_decomposed = function(dest, src, src_node) {
-    if (is_known_number(src_node)) {
+    if (is_known_number(src_node) || slot_is_num(src)) {
      emit_2("negate", dest, src)
+      mark_slot(dest, "num")
      return null
    }
    var t0 = alloc_slot()
@@ -645,19 +473,13 @@ var mcode = function(ast) {
    emit_log_error("cannot negate: operand must be a number")
    emit_0("disrupt")
    emit_label(done)
+    mark_slot(dest, "num")
    return null
  }

  // Central router: maps op string to decomposition helper
  // Sets _bp_* closure vars then calls helper with reduced args
-  var relational_ops = {
-    lt: ["lt_int", "lt_float", "lt_text"],
-    le: ["le_int", "le_float", "le_text"],
-    gt: ["gt_int", "gt_float", "gt_text"],
-    ge: ["ge_int", "ge_float", "ge_text"]
-  }
  var emit_binop = function(op_str, dest, left, right) {
-    var rel = null
    _bp_dest = dest
    _bp_left = left
    _bp_right = right
@@ -668,18 +490,15 @@ var mcode = function(ast) {
      emit_eq_decomposed()
    } else if (op_str == "ne") {
      emit_ne_decomposed()
+    } else if (op_str == "lt" || op_str == "le" || op_str == "gt" || op_str == "ge") {
+      emit_relational(op_str)
+    } else if (op_str == "subtract" || op_str == "multiply" ||
+               op_str == "divide" || op_str == "modulo" || op_str == "remainder" ||
+               op_str == "pow") {
+      emit_numeric_binop(op_str)
    } else {
-      rel = relational_ops[op_str]
-      if (rel != null) {
-        emit_relational(rel[0], rel[1], rel[2])
-      } else if (op_str == "subtract" || op_str == "multiply" ||
-                 op_str == "divide" || op_str == "modulo" || op_str == "remainder" ||
-                 op_str == "pow") {
-        emit_numeric_binop(op_str)
-      } else {
-        // Passthrough for bitwise, in, etc.
-        emit_3(op_str, dest, left, right)
-      }
+      // Passthrough for bitwise, in, etc.
+      emit_3(op_str, dest, left, right)
    }
    return null
  }
@@ -716,9 +535,6 @@ var mcode = function(ast) {
    var argc = length(args)
    var frame_slot = alloc_slot()
    emit_3("frame", frame_slot, func_slot, argc)
-    var null_slot = alloc_slot()
-    emit_1("null", null_slot)
-    emit_3("setarg", frame_slot, 0, null_slot)
    var arg_idx = 1
    var _i = 0
    while (_i < argc) {
@@ -1060,20 +876,20 @@ var mcode = function(ast) {
    emit_1("null", null_s)
    emit_label(loop_label)
    if (forward) {
-      emit_3("lt_int", check, i, len)
+      emit_3("lt", check, i, len)
    } else {
-      emit_3("ge_int", check, i, zero)
+      emit_3("ge", check, i, zero)
    }
    emit_jump_cond("jump_false", check, done_label)
    emit_3("load_index", item, arr_slot, i)
-    emit_3("eq_int", arity_is_zero, fn_arity, zero)
+    emit_3("eq", arity_is_zero, fn_arity, zero)
    emit_jump_cond("jump_false", arity_is_zero, call_one_label)
    emit_3("frame", f, fn_slot, 0)
    emit_3("setarg", f, 0, null_s)
    emit_2("invoke", f, acc)
    emit_jump(call_done_label)
    emit_label(call_one_label)
-    emit_3("eq_int", arity_is_one, fn_arity, one)
+    emit_3("eq", arity_is_one, fn_arity, one)
    emit_jump_cond("jump_false", arity_is_one, call_two_label)
    emit_3("frame", f, fn_slot, 1)
    emit_3("setarg", f, 0, null_s)
@@ -1121,17 +937,17 @@ var mcode = function(ast) {
    emit_1("null", null_s)
    emit_2("length", fn_arity, fn_slot)
    emit_label(loop_label)
-    emit_3("lt_int", check, i, len)
+    emit_3("lt", check, i, len)
    emit_jump_cond("jump_false", check, done_label)
    emit_3("load_index", item, arr_slot, i)
-    emit_3("eq_int", arity_is_zero, fn_arity, zero)
+    emit_3("eq", arity_is_zero, fn_arity, zero)
    emit_jump_cond("jump_false", arity_is_zero, call_one_label)
    emit_3("frame", f, fn_slot, 0)
    emit_3("setarg", f, 0, null_s)
    emit_2("invoke", f, discard)
    emit_jump(call_done_label)
    emit_label(call_one_label)
-    emit_3("eq_int", arity_is_one, fn_arity, one)
+    emit_3("eq", arity_is_one, fn_arity, one)
    emit_jump_cond("jump_false", arity_is_one, call_two_label)
    emit_3("frame", f, fn_slot, 1)
    emit_3("setarg", f, 0, null_s)
@@ -1178,10 +994,10 @@ var mcode = function(ast) {
    emit_1("null", null_s)
    emit_2("length", fn_arity, fn_slot)
    emit_label(loop_label)
-    emit_3("lt_int", check, i, len)
+    emit_3("lt", check, i, len)
    emit_jump_cond("jump_false", check, ret_true)
    emit_3("load_index", item, arr_slot, i)
-    emit_3("eq_int", arity_is_zero, fn_arity, zero)
+    emit_3("eq", arity_is_zero, fn_arity, zero)
    emit_jump_cond("jump_false", arity_is_zero, call_one_label)
    emit_3("frame", f, fn_slot, 0)
    emit_3("setarg", f, 0, null_s)
@@ -1231,10 +1047,10 @@ var mcode = function(ast) {
    emit_1("null", null_s)
    emit_2("length", fn_arity, fn_slot)
    emit_label(loop_label)
-    emit_3("lt_int", check, i, len)
+    emit_3("lt", check, i, len)
    emit_jump_cond("jump_false", check, ret_false)
    emit_3("load_index", item, arr_slot, i)
-    emit_3("eq_int", arity_is_zero, fn_arity, zero)
+    emit_3("eq", arity_is_zero, fn_arity, zero)
    emit_jump_cond("jump_false", arity_is_zero, call_one_label)
    emit_3("frame", f, fn_slot, 0)
    emit_3("setarg", f, 0, null_s)
@@ -1287,17 +1103,17 @@ var mcode = function(ast) {
    emit_1("null", null_s)
    emit_2("length", fn_arity, fn_slot)
    emit_label(loop_label)
-    emit_3("lt_int", check, i, len)
+    emit_3("lt", check, i, len)
    emit_jump_cond("jump_false", check, done_label)
    emit_3("load_index", item, arr_slot, i)
-    emit_3("eq_int", arity_is_zero, fn_arity, zero)
+    emit_3("eq", arity_is_zero, fn_arity, zero)
    emit_jump_cond("jump_false", arity_is_zero, call_one_label)
    emit_3("frame", f, fn_slot, 0)
    emit_3("setarg", f, 0, null_s)
    emit_2("invoke", f, val)
    emit_jump(call_done_label)
    emit_label(call_one_label)
-    emit_3("eq_int", arity_is_one, fn_arity, one)
+    emit_3("eq", arity_is_one, fn_arity, one)
    emit_jump_cond("jump_false", arity_is_one, call_two_label)
    emit_3("frame", f, fn_slot, 1)
    emit_3("setarg", f, 0, null_s)
@@ -1352,7 +1168,7 @@ var mcode = function(ast) {
    if (nargs == 2) {
      null_label = gen_label("reduce_null")
      d1 = gen_label("reduce_d1")
-      emit_3("lt_int", check, zero, len)
+      emit_3("lt", check, zero, len)
      emit_jump_cond("jump_false", check, null_label)
      emit_3("load_index", acc, arr_slot, zero)
      emit_2("move", i, one)
@@ -1371,7 +1187,7 @@ var mcode = function(ast) {
      emit_2("is_null", check, init_slot)
      emit_jump_cond("jump_false", check, has_init)
      // No initial, forward
-      emit_3("lt_int", check, zero, len)
+      emit_3("lt", check, zero, len)
      emit_jump_cond("jump_false", check, null_label)
      emit_3("load_index", acc, arr_slot, zero)
      emit_2("move", i, one)
@@ -1403,7 +1219,7 @@ var mcode = function(ast) {
      emit_2("is_null", check, init_slot)
      emit_jump_cond("jump_false", check, has_init)
      // No initial
-      emit_3("lt_int", check, zero, len)
+      emit_3("lt", check, zero, len)
      emit_jump_cond("jump_false", check, null_label)
      emit_jump_cond("jump_true", rev_slot, no_init_rev)
      // No initial, forward
@@ -1514,8 +1330,10 @@ var mcode = function(ast) {
    // Standard binary ops
    left_slot = gen_expr(left, -1)
    right_slot = gen_expr(right, -1)
-    // Use target slot for ops without multi-type dispatch (add has text+num paths)
-    dest = (target >= 0 && kind != "+") ? target : alloc_slot()
+    // Use target slot for ops without multi-type dispatch (add has text+num paths).
+    // Exception: allow + to write directly to target when target == left_slot
+    // (self-assign pattern like s = s + x) since concat/add reads before writing.
+    dest = (target >= 0 && (kind != "+" || target == left_slot)) ? target : alloc_slot()
    op = binop_map[kind]
    if (op == null) {
      op = "add"
@@ -1581,6 +1399,7 @@ var mcode = function(ast) {
        local = find_var(name)
        if (local >= 0) {
          emit_2("move", local, dest)
+          propagate_slot(local, dest)
        }
      } else if (level > 0) {
        _lv = level - 1
@@ -1680,9 +1499,11 @@ var mcode = function(ast) {
      if (level == 0 || level == -1) {
        slot = find_var(name)
        if (slot >= 0) {
+          mark_slot(slot, null)
          val_slot = gen_expr(right, slot)
          if (val_slot != slot) {
            emit_2("move", slot, val_slot)
+            propagate_slot(slot, val_slot)
          }
          return val_slot
        }
@@ -1807,6 +1628,7 @@ var mcode = function(ast) {
    if (kind == "number") {
      slot = target >= 0 ? target : alloc_slot()
      emit_const_num(slot, expr.number)
+      mark_slot(slot, is_integer(expr.number) ? "int" : "num")
      return slot
    }
    if (kind == "text") {
@@ -1816,6 +1638,7 @@ var mcode = function(ast) {
        val = ""
      }
      emit_const_str(slot, val)
+      mark_slot(slot, "text")
      return slot
    }
    // Template literal
@@ -1852,6 +1675,7 @@ var mcode = function(ast) {
      // Call format(fmt_str, array)
      result_slot = target >= 0 ? target : alloc_slot()
      emit_call(result_slot, fmt_func_slot, [fmt_str_slot, arr_slot])
+      mark_slot(result_slot, "text")
      return result_slot
    }
    if (kind == "regexp") {
@@ -1870,16 +1694,19 @@ var mcode = function(ast) {
    if (kind == "true") {
      slot = target >= 0 ? target : alloc_slot()
      emit_const_bool(slot, true)
+      mark_slot(slot, "bool")
      return slot
    }
    if (kind == "false") {
      slot = target >= 0 ? target : alloc_slot()
      emit_const_bool(slot, false)
+      mark_slot(slot, "bool")
      return slot
    }
    if (kind == "null") {
      slot = target >= 0 ? target : alloc_slot()
      emit_const_null(slot)
+      mark_slot(slot, null)
      return slot
    }
    if (kind == "this") {
@@ -2774,6 +2601,7 @@ var mcode = function(ast) {
    s_instructions = []
    s_vars = []
    s_intrinsic_cache = []
+    s_slot_types = {}
    s_loop_break = null
    s_loop_continue = null
    s_label_map = {}
@@ -2972,6 +2800,7 @@ var mcode = function(ast) {
    s_max_slot = 1
    s_label_counter = 0
    s_func_counter = 0
+    s_slot_types = {}
    s_loop_break = null
    s_loop_continue = null
    s_label_map = {}
--- a/parse.cm
+++ b/parse.cm
@@ -1627,8 +1627,10 @@ var parse = function(tokens, src, filename, tokenizer) {
      if (r.v != null) {
        left_node.level = r.level
        left_node.function_nr = r.def_function_nr
-        r.v.nr_uses = r.v.nr_uses + 1
-        if (r.level > 0) r.v.closure = 1
+        if (r.level > 0) {
+          r.v.nr_uses = r.v.nr_uses + 1
+          r.v.closure = 1
+        }
      } else {
        left_node.level = -1
      }
@@ -1720,8 +1722,10 @@ var parse = function(tokens, src, filename, tokenizer) {
          if (r.v != null) {
            operand.level = r.level
            operand.function_nr = r.def_function_nr
-            r.v.nr_uses = r.v.nr_uses + 1
-            if (r.level > 0) r.v.closure = 1
+            if (r.level > 0) {
+              r.v.nr_uses = r.v.nr_uses + 1
+              r.v.closure = 1
+            }
          } else {
            operand.level = -1
          }
--- a/slots.ce
+++ b/slots.ce
@@ -0,0 +1,303 @@
+// slots.ce — slot data flow / use-def chains
+//
+// Usage:
+//   cell slots --fn <N|name> <file>             Slot summary for function
+//   cell slots --slot <N> --fn <N|name> <file>  Trace slot N in function
+//   cell slots <file>                           Slot summary for all functions
+
+var shop = use("internal/shop")
+
+var pad_right = function(s, w) {
+  var r = s
+  while (length(r) < w) {
+    r = r + " "
+  }
+  return r
+}
+
+var fmt_val = function(v) {
+  if (is_null(v)) return "null"
+  if (is_number(v)) return text(v)
+  if (is_text(v)) return `"${v}"`
+  if (is_object(v)) return text(v)
+  if (is_logical(v)) return v ? "true" : "false"
+  return text(v)
+}
+
+// DEF/USE functions — populated from streamline's log hooks
+var sl_get_defs = null
+var sl_get_uses = null
+
+var run = function() {
+  var filename = null
+  var fn_filter = null
+  var slot_filter = null
+  var i = 0
+  var compiled = null
+  var type_info = {}
+  var sl_log = null
+  var td = null
+  var main_name = null
+  var fi = 0
+  var func = null
+  var fname = null
+
+  while (i < length(args)) {
+    if (args[i] == '--fn') {
+      i = i + 1
+      fn_filter = args[i]
+    } else if (args[i] == '--slot') {
+      i = i + 1
+      slot_filter = number(args[i])
+    } else if (args[i] == '--help' || args[i] == '-h') {
+      log.console("Usage: cell slots [--fn <N|name>] [--slot <N>] <file>")
+      log.console("")
+      log.console("  --fn <N|name>   Filter to function by index or name")
+      log.console("  --slot <N>      Trace a specific slot")
+      return null
+    } else if (!starts_with(args[i], '-')) {
+      filename = args[i]
+    }
+    i = i + 1
+  }
+
+  if (!filename) {
+    log.console("Usage: cell slots [--fn <N|name>] [--slot <N>] <file>")
+    return null
+  }
+
+  compiled = shop.mcode_file(filename)
+
+  // Try to get type info from streamline
+  var get_type_info = function() {
+    var mcode_copy = shop.mcode_file(filename)
+    var streamline = use("streamline")
+    var ti = 0
+    sl_log = {
+      passes: [],
+      events: null,
+      type_deltas: [],
+      request_def_use: true
+    }
+    streamline(mcode_copy, sl_log)
+    if (sl_log.get_slot_defs != null) {
+      sl_get_defs = sl_log.get_slot_defs
+      sl_get_uses = sl_log.get_slot_uses
+    }
+    if (sl_log.type_deltas != null) {
+      ti = 0
+      while (ti < length(sl_log.type_deltas)) {
+        td = sl_log.type_deltas[ti]
+        if (td.fn != null) {
+          type_info[td.fn] = td.slot_types
+        }
+        ti = ti + 1
+      }
+    }
+    return null
+  } disruption {
+    // Type info is optional
+  }
+  get_type_info()
+
+  var fn_matches = function(index, name) {
+    var match = null
+    if (fn_filter == null) return true
+    if (index >= 0 && fn_filter == text(index)) return true
+    if (name != null) {
+      match = search(name, fn_filter)
+      if (match != null && match >= 0) return true
+    }
+    return false
+  }
+
+  var analyze_function = function(func, name, index) {
+    var nr_args = func.nr_args != null ? func.nr_args : 0
+    var nr_slots = func.nr_slots != null ? func.nr_slots : 0
+    var instrs = func.instructions
+    var defs = {}
+    var uses = {}
+    var first_def = {}
+    var first_def_op = {}
+    var events = []
+    var pc = 0
+    var ii = 0
+    var instr = null
+    var op = null
+    var n = 0
+    var def_positions = null
+    var use_positions = null
+    var di = 0
+    var ui = 0
+    var slot_num = null
+    var operand_val = null
+    var parts = null
+    var j = 0
+    var operands = null
+    var slot_types = null
+    var type_key = null
+    var ei = 0
+    var evt = null
+    var found = false
+    var line_str = null
+    var si = 0
+    var slot_key = null
+    var d_count = 0
+    var u_count = 0
+    var t = null
+    var first = null
+    var dead_marker = null
+
+    if (instrs == null) instrs = []
+
+    // Walk instructions, build def/use chains
+    ii = 0
+    while (ii < length(instrs)) {
+      instr = instrs[ii]
+      if (is_text(instr)) {
+        ii = ii + 1
+        continue
+      }
+      if (!is_array(instr)) {
+        ii = ii + 1
+        continue
+      }
+
+      op = instr[0]
+      n = length(instr)
+      def_positions = sl_get_defs(instr)
+      use_positions = sl_get_uses(instr)
+
+      di = 0
+      while (di < length(def_positions)) {
+        operand_val = instr[def_positions[di]]
+        if (is_number(operand_val)) {
+          slot_num = text(operand_val)
+          if (!defs[slot_num]) defs[slot_num] = 0
+          defs[slot_num] = defs[slot_num] + 1
+          if (first_def[slot_num] == null) {
+            first_def[slot_num] = pc
+            first_def_op[slot_num] = op
+          }
+          push(events, {kind: "DEF", slot: operand_val, pc: pc, instr: instr})
+        }
+        di = di + 1
+      }
+
+      ui = 0
+      while (ui < length(use_positions)) {
+        operand_val = instr[use_positions[ui]]
+        if (is_number(operand_val)) {
+          slot_num = text(operand_val)
+          if (!uses[slot_num]) uses[slot_num] = 0
+          uses[slot_num] = uses[slot_num] + 1
+          push(events, {kind: "USE", slot: operand_val, pc: pc, instr: instr})
+        }
+        ui = ui + 1
+      }
+
+      pc = pc + 1
+      ii = ii + 1
+    }
+
+    // Get type info for this function
+    type_key = func.name != null ? func.name : name
+    if (type_info[type_key]) {
+      slot_types = type_info[type_key]
+    }
+
+    // --slot mode: show trace
+    if (slot_filter != null) {
+      log.compile(`\n=== slot ${text(slot_filter)} in ${name} ===`)
+      ei = 0
+      found = false
+      while (ei < length(events)) {
+        evt = events[ei]
+        if (evt.slot == slot_filter) {
+          found = true
+          n = length(evt.instr)
+          parts = []
+          j = 1
+          while (j < n - 2) {
+            push(parts, fmt_val(evt.instr[j]))
+            j = j + 1
+          }
+          operands = text(parts, ", ")
+          line_str = evt.instr[n - 2] != null ? `:${text(evt.instr[n - 2])}` : ""
+          log.compile(`  ${pad_right(evt.kind, 5)}pc ${pad_right(text(evt.pc) + ":", 6)} ${pad_right(evt.instr[0], 15)}${pad_right(operands, 30)}${line_str}`)
+        }
+        ei = ei + 1
+      }
+      if (!found) {
+        log.compile("  (no activity)")
+      }
+      return null
+    }
+
+    // Summary mode
+    log.compile(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
+    log.compile(`  ${pad_right("slot", 8)}${pad_right("defs", 8)}${pad_right("uses", 8)}${pad_right("type", 12)}first-def`)
+
+    si = 0
+    while (si < nr_slots) {
+      slot_key = text(si)
+      d_count = defs[slot_key] != null ? defs[slot_key] : 0
+      u_count = uses[slot_key] != null ? uses[slot_key] : 0
+
+      // Skip slots with no activity unless they're args or have type info
+      if (d_count == 0 && u_count == 0 && si >= nr_args + 1) {
+        si = si + 1
+        continue
+      }
+
+      t = "-"
+      if (slot_types != null && slot_types[slot_key] != null) {
+        t = slot_types[slot_key]
+      }
+
+      first = ""
+      if (si == 0) {
+        first = "(this)"
+      } else if (si > 0 && si <= nr_args) {
+        first = `(arg ${text(si - 1)})`
+      } else if (first_def[slot_key] != null) {
+        first = `pc ${text(first_def[slot_key])}: ${first_def_op[slot_key]}`
+      }
+
+      dead_marker = ""
+      if (d_count > 0 && u_count == 0 && si > nr_args) {
+        dead_marker = "  <- dead"
+      }
+
+      log.compile(`  ${pad_right("s" + slot_key, 8)}${pad_right(text(d_count), 8)}${pad_right(text(u_count), 8)}${pad_right(t, 12)}${first}${dead_marker}`)
+      si = si + 1
+    }
+    return null
+  }
+
+  // Process functions
+  main_name = compiled.name != null ? compiled.name : "<main>"
+
+  if (compiled.main != null) {
+    if (fn_matches(-1, main_name)) {
+      analyze_function(compiled.main, main_name, -1)
+    }
+  }
+
+  if (compiled.functions != null) {
+    fi = 0
+    while (fi < length(compiled.functions)) {
+      func = compiled.functions[fi]
+      fname = func.name != null ? func.name : "<anonymous>"
+      if (fn_matches(fi, fname)) {
+        analyze_function(func, `[${text(fi)}] ${fname}`, fi)
+      }
+      fi = fi + 1
+    }
+  }
+
+  return null
+}
+
+run()
+$stop()
--- a/source/mach.c
+++ b/source/mach.c
@@ -212,34 +212,7 @@ typedef enum MachOpcode {

  /* Text */
  MACH_CONCAT,        /* R(A) = R(B) ++ R(C) — string concatenation */
-
-  /* Typed integer comparisons (ABC) */
-  MACH_EQ_INT,        /* R(A) = (R(B) == R(C)) — int */
-  MACH_NE_INT,        /* R(A) = (R(B) != R(C)) — int */
-  MACH_LT_INT,        /* R(A) = (R(B) <  R(C)) — int */
-  MACH_LE_INT,        /* R(A) = (R(B) <= R(C)) — int */
-  MACH_GT_INT,        /* R(A) = (R(B) >  R(C)) — int */
-  MACH_GE_INT,        /* R(A) = (R(B) >= R(C)) — int */
-
-  /* Typed float comparisons (ABC) */
-  MACH_EQ_FLOAT,      /* R(A) = (R(B) == R(C)) — float */
-  MACH_NE_FLOAT,      /* R(A) = (R(B) != R(C)) — float */
-  MACH_LT_FLOAT,      /* R(A) = (R(B) <  R(C)) — float */
-  MACH_LE_FLOAT,      /* R(A) = (R(B) <= R(C)) — float */
-  MACH_GT_FLOAT,      /* R(A) = (R(B) >  R(C)) — float */
-  MACH_GE_FLOAT,      /* R(A) = (R(B) >= R(C)) — float */
-
-  /* Typed text comparisons (ABC) */
-  MACH_EQ_TEXT,        /* R(A) = (R(B) == R(C)) — text */
-  MACH_NE_TEXT,        /* R(A) = (R(B) != R(C)) — text */
-  MACH_LT_TEXT,        /* R(A) = (R(B) <  R(C)) — text */
-  MACH_LE_TEXT,        /* R(A) = (R(B) <= R(C)) — text */
-  MACH_GT_TEXT,        /* R(A) = (R(B) >  R(C)) — text */
-  MACH_GE_TEXT,        /* R(A) = (R(B) >= R(C)) — text */
-
-  /* Typed bool comparisons (ABC) */
-  MACH_EQ_BOOL,       /* R(A) = (R(B) == R(C)) — bool */
-  MACH_NE_BOOL,       /* R(A) = (R(B) != R(C)) — bool */
+  MACH_STONE_TEXT,    /* stone(R(A)) — freeze mutable text before escape */

  /* Special comparisons */
  MACH_IS_IDENTICAL,  /* R(A) = (R(B) === R(C)) — identity check (ABC) */
@@ -372,26 +345,7 @@ static const char *mach_opcode_names[MACH_OP_COUNT] = {
  [MACH_NOP] = "nop",
  /* Mcode-derived */
  [MACH_CONCAT] = "concat",
-  [MACH_EQ_INT] = "eq_int",
-  [MACH_NE_INT] = "ne_int",
-  [MACH_LT_INT] = "lt_int",
-  [MACH_LE_INT] = "le_int",
-  [MACH_GT_INT] = "gt_int",
-  [MACH_GE_INT] = "ge_int",
-  [MACH_EQ_FLOAT] = "eq_float",
-  [MACH_NE_FLOAT] = "ne_float",
-  [MACH_LT_FLOAT] = "lt_float",
-  [MACH_LE_FLOAT] = "le_float",
-  [MACH_GT_FLOAT] = "gt_float",
-  [MACH_GE_FLOAT] = "ge_float",
-  [MACH_EQ_TEXT] = "eq_text",
-  [MACH_NE_TEXT] = "ne_text",
-  [MACH_LT_TEXT] = "lt_text",
-  [MACH_LE_TEXT] = "le_text",
-  [MACH_GT_TEXT] = "gt_text",
-  [MACH_GE_TEXT] = "ge_text",
-  [MACH_EQ_BOOL] = "eq_bool",
-  [MACH_NE_BOOL] = "ne_bool",
+  [MACH_STONE_TEXT] = "stone_text",
  [MACH_IS_IDENTICAL] = "is_identical",
  [MACH_IS_INT] = "is_int",
  [MACH_IS_NUM] = "is_num",
@@ -1080,10 +1034,6 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) {
    }
  }

-  /* String concat for ADD */
-  if (op == MACH_ADD && mist_is_text(a) && mist_is_text(b))
-    return JS_ConcatString(ctx, a, b);
-
  /* Comparison ops allow mixed types — return false for mismatches */
  if (op >= MACH_EQ && op <= MACH_GE) {
    /* Fast path: identical values (chase pointers for forwarded objects) */
@@ -1142,7 +1092,10 @@ static JSValue reg_vm_binop(JSContext *ctx, int op, JSValue a, JSValue b) {
      default: break;
      }
    }
-    /* Different types: EQ→false, NEQ→true, others→false */
+    /* Different types for ordering comparisons: disrupt */
+    if (op >= MACH_LT && op <= MACH_GE)
+      return JS_RaiseDisrupt(ctx, "cannot compare: operands must be same type");
+    /* EQ/NEQ with different types: false/true */
    if (op == MACH_NEQ) return JS_NewBool(ctx, 1);
    return JS_NewBool(ctx, 0);
  }
@@ -1422,17 +1375,7 @@ vm_dispatch:
      DT(MACH_HASPROP), DT(MACH_REGEXP),
      DT(MACH_EQ_TOL), DT(MACH_NEQ_TOL),
      DT(MACH_NOP),
-      DT(MACH_CONCAT),
-      DT(MACH_EQ_INT), DT(MACH_NE_INT),
-      DT(MACH_LT_INT), DT(MACH_LE_INT),
-      DT(MACH_GT_INT), DT(MACH_GE_INT),
-      DT(MACH_EQ_FLOAT), DT(MACH_NE_FLOAT),
-      DT(MACH_LT_FLOAT), DT(MACH_LE_FLOAT),
-      DT(MACH_GT_FLOAT), DT(MACH_GE_FLOAT),
-      DT(MACH_EQ_TEXT), DT(MACH_NE_TEXT),
-      DT(MACH_LT_TEXT), DT(MACH_LE_TEXT),
-      DT(MACH_GT_TEXT), DT(MACH_GE_TEXT),
-      DT(MACH_EQ_BOOL), DT(MACH_NE_BOOL),
+      DT(MACH_CONCAT), DT(MACH_STONE_TEXT),
      DT(MACH_IS_IDENTICAL),
      DT(MACH_IS_INT), DT(MACH_IS_NUM),
      DT(MACH_IS_TEXT), DT(MACH_IS_BOOL),
@@ -2062,6 +2005,7 @@ vm_dispatch:
        }
        target = next;
      }
+      stone_mutable_text(target->slots[c]);
      frame->slots[a] = target->slots[c];
      VM_BREAK();
    }
@@ -2171,6 +2115,7 @@ vm_dispatch:
    }

    VM_CASE(MACH_RETURN):
+      stone_mutable_text(frame->slots[a]);
      result = frame->slots[a];
      if (!JS_IsPtr(frame->caller)) goto done;
      {
@@ -2338,81 +2283,46 @@ vm_dispatch:

    /* === New mcode-derived opcodes === */

-    /* Text concatenation */
+    /* Text concatenation — with in-place append fast path for s = s + x */
    VM_CASE(MACH_CONCAT): {
-      JSValue res = JS_ConcatString(ctx, frame->slots[b], frame->slots[c]);
-      frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
-      if (JS_IsException(res)) goto disrupt;
-      frame->slots[a] = res;
-      VM_BREAK();
-    }
-
-    /* Typed integer comparisons */
-    VM_CASE(MACH_EQ_INT):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_INT(frame->slots[b]) == JS_VALUE_GET_INT(frame->slots[c]));
-      VM_BREAK();
-    VM_CASE(MACH_NE_INT):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_INT(frame->slots[b]) != JS_VALUE_GET_INT(frame->slots[c]));
-      VM_BREAK();
-    VM_CASE(MACH_LT_INT):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_INT(frame->slots[b]) < JS_VALUE_GET_INT(frame->slots[c]));
-      VM_BREAK();
-    VM_CASE(MACH_LE_INT):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_INT(frame->slots[b]) <= JS_VALUE_GET_INT(frame->slots[c]));
-      VM_BREAK();
-    VM_CASE(MACH_GT_INT):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_INT(frame->slots[b]) > JS_VALUE_GET_INT(frame->slots[c]));
-      VM_BREAK();
-    VM_CASE(MACH_GE_INT):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_INT(frame->slots[b]) >= JS_VALUE_GET_INT(frame->slots[c]));
-      VM_BREAK();
-
-    /* Typed float comparisons */
-    VM_CASE(MACH_EQ_FLOAT): VM_CASE(MACH_NE_FLOAT):
-    VM_CASE(MACH_LT_FLOAT): VM_CASE(MACH_LE_FLOAT):
-    VM_CASE(MACH_GT_FLOAT): VM_CASE(MACH_GE_FLOAT): {
-      double da, db;
-      JS_ToFloat64(ctx, &da, frame->slots[b]);
-      JS_ToFloat64(ctx, &db, frame->slots[c]);
-      int r;
-      switch (op) {
-      case MACH_EQ_FLOAT: r = (da == db); break;
-      case MACH_NE_FLOAT: r = (da != db); break;
-      case MACH_LT_FLOAT: r = (da <  db); break;
-      case MACH_LE_FLOAT: r = (da <= db); break;
-      case MACH_GT_FLOAT: r = (da >  db); break;
-      case MACH_GE_FLOAT: r = (da >= db); break;
-      default: r = 0; break;
+      if (a == b) {
+        /* Self-assign pattern: slot[a] = slot[a] + slot[c] */
+        JSValue left = frame->slots[a];
+        JSValue right = frame->slots[c];
+        /* Inline fast path: mutable heap text with enough capacity */
+        if (JS_IsPtr(left)) {
+          JSText *s = (JSText *)chase(left);
+          int slen = (int)s->length;
+          int rlen = js_string_value_len(right);
+          int cap = (int)objhdr_cap56(s->hdr);
+          if (objhdr_type(s->hdr) == OBJ_TEXT
+              && !(s->hdr & OBJHDR_S_MASK)
+              && slen + rlen <= cap) {
+            /* Append in-place — zero allocation, no GC possible */
+            for (int i = 0; i < rlen; i++)
+              string_put(s, slen + i, js_string_value_get(right, i));
+            s->length = slen + rlen;
+            VM_BREAK();
+          }
+        }
+        /* Slow path: allocate with growth factor, leave unstoned */
+        JSValue res = JS_ConcatStringGrow(ctx, frame->slots[b], frame->slots[c]);
+        frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
+        if (JS_IsException(res)) goto disrupt;
+        frame->slots[a] = res;
+      } else {
+        /* Different target: use existing exact-fit stoned path */
+        JSValue res = JS_ConcatString(ctx, frame->slots[b], frame->slots[c]);
+        frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
+        if (JS_IsException(res)) goto disrupt;
+        frame->slots[a] = res;
      }
-      frame->slots[a] = JS_NewBool(ctx, r);
      VM_BREAK();
    }

-    /* Typed text comparisons */
-    VM_CASE(MACH_EQ_TEXT): VM_CASE(MACH_NE_TEXT):
-    VM_CASE(MACH_LT_TEXT): VM_CASE(MACH_LE_TEXT):
-    VM_CASE(MACH_GT_TEXT): VM_CASE(MACH_GE_TEXT): {
-      int cmp = js_string_compare_value(ctx, frame->slots[b], frame->slots[c], FALSE);
-      int r;
-      switch (op) {
-      case MACH_EQ_TEXT: r = (cmp == 0); break;
-      case MACH_NE_TEXT: r = (cmp != 0); break;
-      case MACH_LT_TEXT: r = (cmp <  0); break;
-      case MACH_LE_TEXT: r = (cmp <= 0); break;
-      case MACH_GT_TEXT: r = (cmp >  0); break;
-      case MACH_GE_TEXT: r = (cmp >= 0); break;
-      default: r = 0; break;
-      }
-      frame->slots[a] = JS_NewBool(ctx, r);
-      VM_BREAK();
-    }
-
-    /* Typed bool comparisons */
-    VM_CASE(MACH_EQ_BOOL):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_BOOL(frame->slots[b]) == JS_VALUE_GET_BOOL(frame->slots[c]));
-      VM_BREAK();
-    VM_CASE(MACH_NE_BOOL):
-      frame->slots[a] = JS_NewBool(ctx, JS_VALUE_GET_BOOL(frame->slots[b]) != JS_VALUE_GET_BOOL(frame->slots[c]));
+    /* Stone mutable text — compiler-emitted at escape points */
+    VM_CASE(MACH_STONE_TEXT):
+      stone_mutable_text(frame->slots[a]);
      VM_BREAK();

    /* Identity check */
@@ -2622,7 +2532,15 @@ vm_dispatch:
        frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
        goto disrupt;
      }
-      int nr = c + 2; /* argc + this + func overhead */
+      JSFunction *fn = JS_VALUE_GET_FUNCTION(func_val);
+      int nr;
+      if (fn->kind == JS_FUNC_KIND_REGISTER) {
+        JSCodeRegister *fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code;
+        nr = fn_code->nr_slots;
+        if (nr < c + 2) nr = c + 2; /* safety: never smaller than argc+2 */
+      } else {
+        nr = c + 2;
+      }
      JSFrameRegister *call_frame = alloc_frame_register(ctx, nr);
      if (!call_frame) {
        frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
@@ -2631,6 +2549,7 @@ vm_dispatch:
      frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
      func_val = frame->slots[b]; /* re-read after GC */
      call_frame->function = func_val;
+      call_frame->address = JS_NewInt32(ctx, c); /* store actual argc */
      frame->slots[a] = JS_MKPTR(call_frame);
      VM_BREAK();
    }
@@ -2643,36 +2562,19 @@ vm_dispatch:
    VM_CASE(MACH_INVOKE): {
      /* A=frame_slot, B=result_slot */
      JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
-      int nr = (int)objhdr_cap56(fr->header);
-      int c_argc = (nr >= 2) ? nr - 2 : 0;
+      int c_argc = JS_VALUE_GET_INT(fr->address); /* actual argc stored by FRAME */
      JSValue fn_val = fr->function;
      JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
      if (!mach_check_call_arity(ctx, fn, c_argc))
        goto disrupt;

      if (fn->kind == JS_FUNC_KIND_REGISTER) {
-        /* Register function: switch frames inline (fast path) */
-        JSCodeRegister *fn_code = JS_VALUE_GET_CODE(FN_READ_CODE(fn))->u.reg.code;
-        JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots);
-        if (!new_frame) {
-          frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
-          goto disrupt;
-        }
-        frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
-        fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
-        fn_val = fr->function;
-        fn = JS_VALUE_GET_FUNCTION(fn_val);
-        fn_code = JS_VALUE_GET_CODE(FN_READ_CODE(fn))->u.reg.code;
-        new_frame->function = fn_val;
-        /* Copy this + args from call frame to new frame */
-        int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity;
-        new_frame->slots[0] = fr->slots[0]; /* this */
-        for (int i = 0; i < copy_count; i++)
-          new_frame->slots[1 + i] = fr->slots[1 + i];
+        /* Register function: FRAME already allocated nr_slots — just switch */
+        JSCodeRegister *fn_code = JS_VALUE_GET_CODE(fn->u.cell.code)->u.reg.code;
        /* Save return info */
        frame->address = JS_NewInt32(ctx, (pc << 16) | b);
-        new_frame->caller = JS_MKPTR(frame);
-        frame = new_frame;
+        fr->caller = JS_MKPTR(frame);
+        frame = fr;
        frame_ref.val = JS_MKPTR(frame);
        code = fn_code;
        env = fn->u.cell.env_record;
@@ -2716,8 +2618,7 @@ vm_dispatch:
    VM_CASE(MACH_GOINVOKE): {
      /* Tail call: replace current frame with callee */
      JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
-      int nr = (int)objhdr_cap56(fr->header);
-      int c_argc = (nr >= 2) ? nr - 2 : 0;
+      int c_argc = JS_VALUE_GET_INT(fr->address); /* actual argc stored by FRAME */
      JSValue fn_val = fr->function;
      JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
      if (!mach_check_call_arity(ctx, fn, c_argc))
@@ -2742,25 +2643,10 @@ vm_dispatch:
          env = fn->u.cell.env_record;
          pc = code->entry_point;
        } else {
-          /* SLOW PATH: callee needs more slots, must allocate */
-          JSFrameRegister *new_frame = alloc_frame_register(ctx, fn_code->nr_slots);
-          if (!new_frame) {
-            frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
-            goto disrupt;
-          }
-          frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
-          fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame->slots[a]);
-          fn_val = fr->function;
-          fn = JS_VALUE_GET_FUNCTION(fn_val);
-          fn_code = JS_VALUE_GET_CODE(FN_READ_CODE(fn))->u.reg.code;
-          new_frame->function = fn_val;
-          int copy_count = (c_argc < fn_code->arity) ? c_argc : fn_code->arity;
-          new_frame->slots[0] = fr->slots[0]; /* this */
-          for (int i = 0; i < copy_count; i++)
-            new_frame->slots[1 + i] = fr->slots[1 + i];
-          new_frame->caller = frame->caller;
+          /* SLOW PATH: GOFRAME already allocated nr_slots — use fr directly */
+          fr->caller = frame->caller;
          frame->caller = JS_NULL;
-          frame = new_frame;
+          frame = fr;
          frame_ref.val = JS_MKPTR(frame);
          code = fn_code;
          env = fn->u.cell.env_record;
@@ -3014,10 +2900,10 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
  if (s.nr_slots > 255) {
    cJSON *nm_chk = cJSON_GetObjectItemCaseSensitive(fobj, "name");
    const char *fn_name = nm_chk ? cJSON_GetStringValue(nm_chk) : "<anonymous>";
-    fprintf(stderr, "ERROR: function '%s' has %d slots (max 255). "
+    fprintf(stderr, "FATAL: function '%s' has %d slots (max 255). "
            "Ensure the streamline optimizer ran before mach compilation.\n",
            fn_name, s.nr_slots);
-    return NULL;
+    abort();
  }
  int dis_raw = (int)cJSON_GetNumberValue(
      cJSON_GetObjectItemCaseSensitive(fobj, "disruption_pc"));
@@ -3084,6 +2970,7 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
    else if (strcmp(op, "move") == 0)    { AB2(MACH_MOVE); }
    /* Text */
    else if (strcmp(op, "concat") == 0)  { ABC3(MACH_CONCAT); }
+    else if (strcmp(op, "stone_text") == 0) { EM(MACH_ABC(MACH_STONE_TEXT, A1, 0, 0)); }
    /* Generic arithmetic */
    else if (strcmp(op, "add") == 0)     { ABC3(MACH_ADD); }
    else if (strcmp(op, "subtract") == 0) { ABC3(MACH_SUB); }
@@ -3103,30 +2990,13 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
    else if (strcmp(op, "ceiling") == 0) { ABC3(MACH_CEILING); }
    else if (strcmp(op, "round") == 0)   { ABC3(MACH_ROUND); }
    else if (strcmp(op, "trunc") == 0)   { ABC3(MACH_TRUNC); }
-    /* Typed integer comparisons */
-    else if (strcmp(op, "eq_int") == 0)  { ABC3(MACH_EQ_INT); }
-    else if (strcmp(op, "ne_int") == 0)  { ABC3(MACH_NE_INT); }
-    else if (strcmp(op, "lt_int") == 0)  { ABC3(MACH_LT_INT); }
-    else if (strcmp(op, "le_int") == 0)  { ABC3(MACH_LE_INT); }
-    else if (strcmp(op, "gt_int") == 0)  { ABC3(MACH_GT_INT); }
-    else if (strcmp(op, "ge_int") == 0)  { ABC3(MACH_GE_INT); }
-    /* Typed float comparisons */
-    else if (strcmp(op, "eq_float") == 0) { ABC3(MACH_EQ_FLOAT); }
-    else if (strcmp(op, "ne_float") == 0) { ABC3(MACH_NE_FLOAT); }
-    else if (strcmp(op, "lt_float") == 0) { ABC3(MACH_LT_FLOAT); }
-    else if (strcmp(op, "le_float") == 0) { ABC3(MACH_LE_FLOAT); }
-    else if (strcmp(op, "gt_float") == 0) { ABC3(MACH_GT_FLOAT); }
-    else if (strcmp(op, "ge_float") == 0) { ABC3(MACH_GE_FLOAT); }
-    /* Typed text comparisons */
-    else if (strcmp(op, "eq_text") == 0) { ABC3(MACH_EQ_TEXT); }
-    else if (strcmp(op, "ne_text") == 0) { ABC3(MACH_NE_TEXT); }
-    else if (strcmp(op, "lt_text") == 0) { ABC3(MACH_LT_TEXT); }
-    else if (strcmp(op, "le_text") == 0) { ABC3(MACH_LE_TEXT); }
-    else if (strcmp(op, "gt_text") == 0) { ABC3(MACH_GT_TEXT); }
-    else if (strcmp(op, "ge_text") == 0) { ABC3(MACH_GE_TEXT); }
-    /* Typed bool comparisons */
-    else if (strcmp(op, "eq_bool") == 0) { ABC3(MACH_EQ_BOOL); }
-    else if (strcmp(op, "ne_bool") == 0) { ABC3(MACH_NE_BOOL); }
+    /* Generic comparisons */
+    else if (strcmp(op, "eq") == 0)      { ABC3(MACH_EQ); }
+    else if (strcmp(op, "ne") == 0)      { ABC3(MACH_NEQ); }
+    else if (strcmp(op, "lt") == 0)      { ABC3(MACH_LT); }
+    else if (strcmp(op, "le") == 0)      { ABC3(MACH_LE); }
+    else if (strcmp(op, "gt") == 0)      { ABC3(MACH_GT); }
+    else if (strcmp(op, "ge") == 0)      { ABC3(MACH_GE); }
    /* Special comparisons */
    else if (strcmp(op, "is_identical") == 0) { ABC3(MACH_IS_IDENTICAL); }
    else if (strcmp(op, "eq_tol") == 0) {
--- a/source/quickjs-internal.h
+++ b/source/quickjs-internal.h
@@ -479,6 +479,17 @@ static inline void mach_resolve_forward(JSValue *slot) {
  }
 }

+/* Stone a mutable (unstoned) heap text in-place.  Used at escape points
+   in the VM to enforce the invariant that an unstoned text is uniquely
+   referenced by exactly one slot. */
+static inline void stone_mutable_text(JSValue v) {
+  if (JS_IsPtr(v)) {
+    objhdr_t *oh = (objhdr_t *)JS_VALUE_GET_PTR(v);
+    if (objhdr_type(*oh) == OBJ_TEXT && !(*oh & OBJHDR_S_MASK))
+      *oh = objhdr_set_s(*oh, true);
+  }
+}
+
 /* Inline type checks — use these in the VM dispatch loop to avoid
   function call overhead. The public API (JS_IsArray etc. in quickjs.h)
   remains non-inline for external callers; those wrappers live in runtime.c. */
@@ -1213,6 +1224,7 @@ int JS_SetPropertyKey (JSContext *ctx, JSValue this_obj, JSValue key, JSValue va
 void *js_realloc_rt (void *ptr, size_t size);
 char *js_strdup_rt (const char *str);
 JSValue JS_ConcatString (JSContext *ctx, JSValue op1, JSValue op2);
+JSValue JS_ConcatStringGrow (JSContext *ctx, JSValue op1, JSValue op2);
 JSText *pretext_init (JSContext *ctx, int capacity);
 JSText *pretext_putc (JSContext *ctx, JSText *s, uint32_t c);
 JSText *pretext_concat_value (JSContext *ctx, JSText *s, JSValue v);
--- a/source/runtime.c
+++ b/source/runtime.c
@@ -2910,6 +2910,84 @@ JSValue JS_ConcatString (JSContext *ctx, JSValue op1, JSValue op2) {
  return ret_val;
 }

+/* Concat with over-allocated capacity and NO stoning.
+   Used by MACH_CONCAT self-assign (s = s + x) slow path so that
+   subsequent appends can reuse the excess capacity in-place. */
+JSValue JS_ConcatStringGrow (JSContext *ctx, JSValue op1, JSValue op2) {
+  if (unlikely (!JS_IsText (op1))) {
+    JSGCRef op2_guard;
+    JS_PushGCRef (ctx, &op2_guard);
+    op2_guard.val = op2;
+    op1 = JS_ToString (ctx, op1);
+    op2 = op2_guard.val;
+    JS_PopGCRef (ctx, &op2_guard);
+    if (JS_IsException (op1)) return JS_EXCEPTION;
+  }
+  if (unlikely (!JS_IsText (op2))) {
+    JSGCRef op1_guard;
+    JS_PushGCRef (ctx, &op1_guard);
+    op1_guard.val = op1;
+    op2 = JS_ToString (ctx, op2);
+    op1 = op1_guard.val;
+    JS_PopGCRef (ctx, &op1_guard);
+    if (JS_IsException (op2)) return JS_EXCEPTION;
+  }
+
+  int len1 = js_string_value_len (op1);
+  int len2 = js_string_value_len (op2);
+  int new_len = len1 + len2;
+
+  /* Try immediate ASCII for short results */
+  if (new_len <= MIST_ASCII_MAX_LEN) {
+    char buf[8];
+    BOOL all_ascii = TRUE;
+    for (int i = 0; i < len1 && all_ascii; i++) {
+      uint32_t c = js_string_value_get (op1, i);
+      if (c >= 0x80) all_ascii = FALSE;
+      else buf[i] = (char)c;
+    }
+    for (int i = 0; i < len2 && all_ascii; i++) {
+      uint32_t c = js_string_value_get (op2, i);
+      if (c >= 0x80) all_ascii = FALSE;
+      else buf[len1 + i] = (char)c;
+    }
+    if (all_ascii) {
+      JSValue imm = MIST_TryNewImmediateASCII (buf, new_len);
+      if (!JS_IsNull (imm)) return imm;
+    }
+  }
+
+  /* Allocate with 2x growth factor, minimum 16 */
+  int capacity = new_len * 2;
+  if (capacity < 16) capacity = 16;
+
+  JSGCRef op1_ref, op2_ref;
+  JS_PushGCRef (ctx, &op1_ref);
+  op1_ref.val = op1;
+  JS_PushGCRef (ctx, &op2_ref);
+  op2_ref.val = op2;
+
+  JSText *p = js_alloc_string (ctx, capacity);
+  if (!p) {
+    JS_PopGCRef (ctx, &op2_ref);
+    JS_PopGCRef (ctx, &op1_ref);
+    return JS_EXCEPTION;
+  }
+
+  op1 = op1_ref.val;
+  op2 = op2_ref.val;
+  JS_PopGCRef (ctx, &op2_ref);
+  JS_PopGCRef (ctx, &op1_ref);
+
+  for (int i = 0; i < len1; i++)
+    string_put (p, i, js_string_value_get (op1, i));
+  for (int i = 0; i < len2; i++)
+    string_put (p, len1 + i, js_string_value_get (op2, i));
+  p->length = new_len;
+  /* Do NOT stone — leave mutable so in-place append can reuse capacity */
+  return JS_MKPTR (p);
+}
+
 /* WARNING: proto must be an object or JS_NULL */
 JSValue JS_NewObjectProtoClass (JSContext *ctx, JSValue proto_val, JSClassID class_id) {
  JSGCRef proto_ref;
--- a/streamline.cm
+++ b/streamline.cm
@@ -41,14 +41,8 @@ var streamline = function(ir, log) {
    max: true, min: true, pow: true
  }
  var bool_result_ops = {
-    eq_int: true, ne_int: true, lt_int: true, gt_int: true,
-    le_int: true, ge_int: true,
-    eq_float: true, ne_float: true, lt_float: true, gt_float: true,
-    le_float: true, ge_float: true,
-    eq_text: true, ne_text: true, lt_text: true, gt_text: true,
-    le_text: true, ge_text: true,
-    eq_bool: true, ne_bool: true,
-    eq_tol: true, ne_tol: true,
+    eq: true, ne: true, lt: true, gt: true, le: true, ge: true,
+    eq_tol: true, ne_tol: true, in: true,
    not: true, and: true, or: true,
    is_int: true, is_text: true, is_num: true,
    is_bool: true, is_null: true, is_identical: true,
@@ -63,22 +57,18 @@ var streamline = function(ir, log) {

  // simplify_algebra dispatch tables
  var self_true_ops = {
-    eq_int: true, eq_float: true, eq_text: true, eq_bool: true,
-    is_identical: true,
-    le_int: true, le_float: true, le_text: true,
-    ge_int: true, ge_float: true, ge_text: true
+    eq: true, is_identical: true, le: true, ge: true
  }
  var self_false_ops = {
-    ne_int: true, ne_float: true, ne_text: true, ne_bool: true,
-    lt_int: true, lt_float: true, lt_text: true,
-    gt_int: true, gt_float: true, gt_text: true
+    ne: true, lt: true, gt: true
  }
  var no_clear_ops = {
    int: true, access: true, true: true, false: true, move: true, null: true,
    jump: true, jump_true: true, jump_false: true, jump_not_null: true,
    return: true, disrupt: true,
    store_field: true, store_index: true, store_dynamic: true,
-    push: true, setarg: true, invoke: true, tail_invoke: true
+    push: true, setarg: true, invoke: true, tail_invoke: true,
+    stone_text: true
  }

  // --- Logging support ---
@@ -141,6 +131,13 @@ var streamline = function(ir, log) {
  }

  // track_types reuses write_rules table; move handled specially
+  // Ops safe to narrow from T_NUM to T_INT when both operands are T_INT.
+  // Excludes divide (int/int can produce float) and pow (int**neg produces float).
+  var int_narrowable_ops = {
+    add: true, subtract: true, multiply: true,
+    remainder: true, modulo: true, max: true, min: true
+  }
+
  var track_types = function(slot_types, instr) {
    var op = instr[0]
    var rule = null
@@ -157,6 +154,13 @@ var streamline = function(ir, log) {
      if (typ == null) {
        typ = access_value_type(instr[2])
      }
+      // Narrow T_NUM to T_INT when both operands are T_INT
+      if (typ == T_NUM && instr[3] != null && int_narrowable_ops[op] == true) {
+        if (slot_is(slot_types, instr[2], T_INT)
+            && slot_is(slot_types, instr[3], T_INT)) {
+          typ = T_INT
+        }
+      }
      slot_types[instr[rule[0]]] = typ
    }
    return null
@@ -350,13 +354,6 @@ var streamline = function(ir, log) {
    load_index: [1, T_UNKNOWN], load_dynamic: [1, T_UNKNOWN],
    pop: [1, T_UNKNOWN], get: [1, T_UNKNOWN],
    invoke: [2, T_UNKNOWN], tail_invoke: [2, T_UNKNOWN],
-    eq_int: [1, T_BOOL], ne_int: [1, T_BOOL], lt_int: [1, T_BOOL],
-    gt_int: [1, T_BOOL], le_int: [1, T_BOOL], ge_int: [1, T_BOOL],
-    eq_float: [1, T_BOOL], ne_float: [1, T_BOOL], lt_float: [1, T_BOOL],
-    gt_float: [1, T_BOOL], le_float: [1, T_BOOL], ge_float: [1, T_BOOL],
-    eq_text: [1, T_BOOL], ne_text: [1, T_BOOL], lt_text: [1, T_BOOL],
-    gt_text: [1, T_BOOL], le_text: [1, T_BOOL], ge_text: [1, T_BOOL],
-    eq_bool: [1, T_BOOL], ne_bool: [1, T_BOOL],
    eq_tol: [1, T_BOOL], ne_tol: [1, T_BOOL],
    not: [1, T_BOOL], and: [1, T_BOOL], or: [1, T_BOOL],
    is_int: [1, T_BOOL], is_text: [1, T_BOOL], is_num: [1, T_BOOL],
@@ -373,6 +370,34 @@ var streamline = function(ir, log) {
    max: T_NUM, min: T_NUM, remainder: T_NUM, modulo: T_NUM
  }

+  var narrow_arith_type = function(write_types, param_types, instr, typ) {
+    var s2 = null
+    var s3 = null
+    var t2 = null
+    var t3 = null
+    if (typ != T_NUM || instr[3] == null || int_narrowable_ops[instr[0]] != true) {
+      return typ
+    }
+    s2 = instr[2]
+    s3 = instr[3]
+    if (is_number(s2)) {
+      t2 = write_types[s2]
+      if (t2 == null && param_types != null && s2 < length(param_types)) {
+        t2 = param_types[s2]
+      }
+    }
+    if (is_number(s3)) {
+      t3 = write_types[s3]
+      if (t3 == null && param_types != null && s3 < length(param_types)) {
+        t3 = param_types[s3]
+      }
+    }
+    if (t2 == T_INT && t3 == T_INT) {
+      return T_INT
+    }
+    return typ
+  }
+
  var infer_slot_write_types = function(func, param_types) {
    var instructions = func.instructions
    var nr_args = func.nr_args != null ? func.nr_args : 0
@@ -457,6 +482,19 @@ var streamline = function(ir, log) {
          i = i + 1
          continue
        }
+        if (op == "get" && func._closure_slot_types != null) {
+          slot = instr[1]
+          typ = T_UNKNOWN
+          src_typ = func._closure_slot_types[text(instr[2]) + "_" + text(instr[3])]
+          if (src_typ != null) {
+            typ = src_typ
+          }
+          if (slot > 0 && slot > nr_args) {
+            merge_backward(write_types, slot, typ)
+          }
+          i = i + 1
+          continue
+        }

        rule = write_rules[op]
        if (rule != null) {
@@ -465,6 +503,7 @@ var streamline = function(ir, log) {
          if (typ == null) {
            typ = access_value_type(instr[2])
          }
+          typ = narrow_arith_type(write_types, param_types, instr, typ)
          if (slot > 0 && slot > nr_args) {
            merge_backward(write_types, slot, typ)
          }
@@ -1058,7 +1097,9 @@ var streamline = function(ir, log) {
  }

  // =========================================================
-  // Pass: eliminate_moves — move a, a → nop
+  // Pass: eliminate_moves — copy propagation + self-move nop
+  // Tracks move chains within basic blocks, substitutes read
+  // operands to use the original source, and nops self-moves.
  // =========================================================
  var eliminate_moves = function(func, log) {
    var instructions = func.instructions
@@ -1067,6 +1108,19 @@ var streamline = function(ir, log) {
    var i = 0
    var instr = null
    var events = null
+    var copies = null
+    var key = null
+    var actual = null
+    var dest = 0
+    var src = 0
+    var wr = null
+    var write_pos = null
+    var op = null
+    var j = 0
+    var k = 0
+    var keys = null
+    var special = null
+    var limit = 0

    if (instructions == null || length(instructions) == 0) {
      return null
@@ -1076,24 +1130,321 @@ var streamline = function(ir, log) {
      events = log.events
    }

+    copies = {}
    num_instr = length(instructions)
    i = 0
    while (i < num_instr) {
      instr = instructions[i]
-      if (is_array(instr) && instr[0] == "move" && instr[1] == instr[2]) {
-        nc = nc + 1
-        instructions[i] = "_nop_mv_" + text(nc)
-        if (events != null) {
-          events[] = {
-            event: "rewrite", pass: "eliminate_moves",
-            rule: "self_move", at: i,
-            before: instr, after: instructions[i]
+
+      // Labels: clear copies at join points
+      if (is_text(instr)) {
+        if (!starts_with(instr, "_nop_")) {
+          copies = {}
+        }
+        i = i + 1
+        continue
+      }
+
+      if (!is_array(instr)) {
+        i = i + 1
+        continue
+      }
+
+      op = instr[0]
+
+      // Control flow without reads: clear copies
+      if (op == "jump" || op == "disrupt") {
+        copies = {}
+        i = i + 1
+        continue
+      }
+
+      // Control flow with a read at position 1: substitute then clear
+      if (op == "return" || op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
+        actual = copies[text(instr[1])]
+        if (actual != null) {
+          instr[1] = actual
+        }
+        copies = {}
+        i = i + 1
+        continue
+      }
+
+      // Move: copy propagation
+      if (op == "move") {
+        dest = instr[1]
+        src = instr[2]
+
+        // Follow transitive chain for src
+        actual = copies[text(src)]
+        if (actual == null) {
+          actual = src
+        }
+
+        // Rewrite the move's src operand
+        instr[2] = actual
+
+        // Kill stale entries for dest
+        key = text(dest)
+        copies[key] = null
+        keys = array(copies)
+        k = 0
+        while (k < length(keys)) {
+          if (copies[keys[k]] == dest) {
+            copies[keys[k]] = null
          }
+          k = k + 1
+        }
+
+        // Record the new copy
+        copies[text(dest)] = actual
+
+        // Self-move after substitution → nop
+        if (dest == actual) {
+          nc = nc + 1
+          instructions[i] = "_nop_mv_" + text(nc)
+          if (events != null) {
+            events[] = {
+              event: "rewrite", pass: "eliminate_moves",
+              rule: "self_move", at: i,
+              before: ["move", dest, src], after: instructions[i]
+            }
+          }
+        }
+
+        i = i + 1
+        continue
+      }
+
+      // General instruction: substitute reads, then kill write
+      wr = write_rules[op]
+      write_pos = null
+      if (wr != null) {
+        write_pos = wr[0]
+      }
+
+      // Substitute read operands
+      special = slot_idx_special[op]
+      if (special != null) {
+        j = 0
+        while (j < length(special)) {
+          k = special[j]
+          if (k != write_pos && is_number(instr[k])) {
+            actual = copies[text(instr[k])]
+            if (actual != null) {
+              instr[k] = actual
+            }
+          }
+          j = j + 1
+        }
+      } else {
+        limit = length(instr) - 2
+        j = 1
+        while (j < limit) {
+          if (j != write_pos && is_number(instr[j])) {
+            actual = copies[text(instr[j])]
+            if (actual != null) {
+              instr[j] = actual
+            }
+          }
+          j = j + 1
+        }
+      }
+
+      // Kill write destination
+      if (write_pos != null && is_number(instr[write_pos])) {
+        dest = instr[write_pos]
+        key = text(dest)
+        copies[key] = null
+        keys = array(copies)
+        k = 0
+        while (k < length(keys)) {
+          if (copies[keys[k]] == dest) {
+            copies[keys[k]] = null
+          }
+          k = k + 1
+        }
+      }
+
+      i = i + 1
+    }
+
+    return null
+  }
+
+  // =========================================================
+  // Pass: insert_stone_text — freeze mutable text at escape points
+  // Only inserts stone_text when the slot is provably T_TEXT.
+  // Escape points: setfield, setindex, store_field, store_index,
+  // store_dynamic, push, setarg, put (value leaving its slot).
+  // move: stone source only if source is still live after the move.
+  // =========================================================
+
+  // Map: escape opcode → index of the escaping slot in the instruction
+  var escape_slot_index = {
+    setfield: 3, setindex: 3,
+    store_field: 3, store_index: 3, store_dynamic: 3,
+    push: 2, setarg: 3, put: 1
+  }
+
+  // Build last_ref liveness array for a function's instructions.
+  // Returns array where last_ref[slot] = last instruction index referencing that slot.
+  // Uses get_slot_refs to only visit actual slot reference positions.
+  var build_slot_liveness = function(instructions, nr_slots) {
+    var last_ref = array(nr_slots, -1)
+    var n = length(instructions)
+    var refs = null
+    var i = 0
+    var j = 0
+    var s = 0
+    var instr = null
+    var label_map = null
+    var changed = false
+    var op = null
+    var target = null
+    var tpos = 0
+
+    // Scan instructions for slot references
+    while (i < n) {
+      instr = instructions[i]
+      if (is_array(instr)) {
+        refs = get_slot_refs(instr)
+        j = 0
+        while (j < length(refs)) {
+          s = instr[refs[j]]
+          if (is_number(s) && s >= 0 && s < nr_slots) {
+            last_ref[s] = i
+          }
+          j = j + 1
        }
      }
      i = i + 1
    }

+    // Extend for backward jumps (loops)
+    label_map = {}
+    i = 0
+    while (i < n) {
+      instr = instructions[i]
+      if (is_text(instr) && !starts_with(instr, "_nop_")) {
+        label_map[instr] = i
+      }
+      i = i + 1
+    }
+    changed = true
+    while (changed) {
+      changed = false
+      i = 0
+      while (i < n) {
+        instr = instructions[i]
+        if (is_array(instr)) {
+          target = null
+          op = instr[0]
+          if (op == "jump") {
+            target = instr[1]
+          } else if (op == "jump_true" || op == "jump_false" || op == "jump_not_null") {
+            target = instr[2]
+          }
+          if (target != null && is_text(target)) {
+            tpos = label_map[target]
+            if (tpos != null && tpos < i) {
+              s = 0
+              while (s < nr_slots) {
+                if (last_ref[s] >= 0 && last_ref[s] >= tpos && last_ref[s] < i) {
+                  last_ref[s] = i
+                  changed = true
+                }
+                s = s + 1
+              }
+            }
+          }
+        }
+        i = i + 1
+      }
+    }
+
+    return last_ref
+  }
+
+  var insert_stone_text = function(func, log) {
+    var instructions = func.instructions
+    var nr_slots = func.nr_slots
+    var dpc = func.disruption_pc
+    var events = null
+    var slot_types = null
+    var result = null
+    var i = 0
+    var n = 0
+    var instr = null
+    var op = null
+    var esc = null
+    var slot = 0
+    var nc = 0
+    var shift = 0
+    var last_ref = null
+
+    if (instructions == null || length(instructions) == 0) {
+      return null
+    }
+
+    if (log != null && log.events != null) {
+      events = log.events
+    }
+
+    // Build liveness info (in separate function to stay under slot limit)
+    last_ref = build_slot_liveness(instructions, nr_slots)
+
+    // Walk instructions, tracking types, inserting stone_text
+    n = length(instructions)
+    slot_types = array(nr_slots, T_UNKNOWN)
+    result = []
+    i = 0
+    while (i < n) {
+      instr = instructions[i]
+      if (is_array(instr)) {
+        op = instr[0]
+        esc = escape_slot_index[op]
+        if (esc != null) {
+          slot = instr[esc]
+          if (is_number(slot) && slot_is(slot_types, slot, T_TEXT)) {
+            result[] = ["stone_text", slot]
+            nc = nc + 1
+            if (is_number(dpc) && i < dpc) shift = shift + 1
+            if (events != null) {
+              events[] = {
+                event: "insert", pass: "insert_stone_text",
+                rule: "escape_stone", at: i, slot: slot, op: op
+              }
+            }
+          }
+        } else if (op == "move") {
+          // Stone source before move only if source is provably text
+          // AND source slot is still live after this instruction
+          slot = instr[2]
+          if (is_number(slot) && slot_is(slot_types, slot, T_TEXT) && last_ref[slot] > i) {
+            result[] = ["stone_text", slot]
+            nc = nc + 1
+            if (is_number(dpc) && i < dpc) shift = shift + 1
+            if (events != null) {
+              events[] = {
+                event: "insert", pass: "insert_stone_text",
+                rule: "move_alias_stone", at: i, slot: slot
+              }
+            }
+          }
+        }
+        track_types(slot_types, instr)
+      }
+      result[] = instr
+      i = i + 1
+    }
+
+    if (nc > 0) {
+      func.instructions = result
+      if (is_number(dpc) && shift > 0) {
+        func.disruption_pc = dpc + shift
+      }
+    }
    return null
  }

@@ -1212,7 +1563,7 @@ var streamline = function(ir, log) {

    idx = 0
    while (idx < num_instr) {
-      if (!reachable[idx] && is_array(instructions[idx])) {
+      if (!reachable[idx] && is_array(instructions[idx]) && (disruption_pc < 0 || idx >= disruption_pc)) {
        nc = nc + 1
        instructions[idx] = "_nop_ucfg_" + text(nc)
      }
@@ -1254,6 +1605,10 @@ var streamline = function(ir, log) {
        while (j < num_instr) {
          peek = instructions[j]
          if (is_text(peek)) {
+            if (starts_with(peek, "_nop_")) {
+              j = j + 1
+              continue
+            }
            if (peek == target_label) {
              nc = nc + 1
              instructions[i] = "_nop_dj_" + text(nc)
@@ -1299,7 +1654,8 @@ var streamline = function(ir, log) {
    frame: [1, 2], goframe: [1, 2],
    jump: [], disrupt: [],
    jump_true: [1], jump_false: [1], jump_not_null: [1],
-    return: [1]
+    return: [1],
+    stone_text: [1]
  }

  var get_slot_refs = function(instr) {
@@ -1318,6 +1674,54 @@ var streamline = function(ir, log) {
    return result
  }

+  // DEF/USE classification: which instruction positions are definitions vs uses
+  var slot_def_special = {
+    get: [1], put: [], access: [1], int: [1], function: [1], regexp: [1],
+    true: [1], false: [1], null: [1], record: [1], array: [1],
+    invoke: [2], tail_invoke: [2], goinvoke: [],
+    move: [1], load_field: [1], load_index: [1], load_dynamic: [1],
+    pop: [1], frame: [1], goframe: [1],
+    setarg: [], store_field: [], store_index: [], store_dynamic: [],
+    push: [], set_var: [], stone_text: [],
+    jump: [], jump_true: [], jump_false: [], jump_not_null: [],
+    return: [], disrupt: []
+  }
+
+  var slot_use_special = {
+    get: [], put: [1], access: [], int: [], function: [], regexp: [],
+    true: [], false: [], null: [], record: [], array: [],
+    invoke: [1], tail_invoke: [1], goinvoke: [1],
+    move: [2], load_field: [2], load_index: [2, 3], load_dynamic: [2, 3],
+    pop: [2], frame: [2], goframe: [2],
+    setarg: [1, 3], store_field: [1, 3], store_index: [1, 2, 3],
+    store_dynamic: [1, 2, 3],
+    push: [1, 2], set_var: [1], stone_text: [1],
+    jump: [], jump_true: [1], jump_false: [1], jump_not_null: [1],
+    return: [1], disrupt: []
+  }
+
+  var get_slot_defs = function(instr) {
+    var special = slot_def_special[instr[0]]
+    if (special != null) return special
+    return [1]
+  }
+
+  var get_slot_uses = function(instr) {
+    var special = slot_use_special[instr[0]]
+    var result = null
+    var j = 0
+    var limit = 0
+    if (special != null) return special
+    result = []
+    limit = length(instr) - 2
+    j = 2
+    while (j < limit) {
+      if (is_number(instr[j])) result[] = j
+      j = j + 1
+    }
+    return result
+  }
+
  var compress_one_fn = function(func, captured_slots) {
    var instructions = func.instructions
    var nr_slots = func.nr_slots
@@ -1853,6 +2257,71 @@ var streamline = function(ir, log) {
      }
      fi = fi + 1
    }
+    ir._parent_of = parent_of
+    ir._parent_fc = fc
+    return null
+  }
+
+  // =========================================================
+  // Resolve closure slot types from parent write_types.
+  // For each `get` in func, walk the parent chain and look up
+  // the ancestor's inferred write type for that closure slot.
+  // =========================================================
+  var resolve_closure_types = function(func, fi, ir) {
+    var parent_of = ir._parent_of
+    var fc = ir._parent_fc
+    var instructions = func.instructions
+    var num_instr = 0
+    var closure_types = null
+    var i = 0
+    var instr = null
+    var slot = 0
+    var depth = 0
+    var anc = 0
+    var j = 0
+    var target = null
+    var typ = null
+    var key = null
+
+    if (instructions == null || parent_of == null) {
+      return null
+    }
+
+    num_instr = length(instructions)
+    closure_types = {}
+    i = 0
+    while (i < num_instr) {
+      instr = instructions[i]
+      if (is_array(instr) && instr[0] == "get") {
+        slot = instr[2]
+        depth = instr[3]
+        key = text(slot) + "_" + text(depth)
+        if (closure_types[key] == null) {
+          anc = fi
+          j = 0
+          while (j < depth && anc >= 0) {
+            anc = parent_of[anc]
+            j = j + 1
+          }
+          if (anc >= 0) {
+            if (anc == fc) {
+              target = ir.main
+            } else {
+              target = ir.functions[anc]
+            }
+            if (target != null && target._write_types != null) {
+              typ = target._write_types[slot]
+              if (typ != null) {
+                closure_types[key] = typ
+              }
+            }
+          }
+        }
+      }
+      i = i + 1
+    }
+
+    func._closure_slot_types = closure_types
    return null
  }

@@ -2095,12 +2564,14 @@ var streamline = function(ir, log) {
    var slot_types = null
    var run_cycle = function(suffix) {
      var name = null
-      name = "infer_param_types" + suffix
-      run_pass(func, name, function() {
-        param_types = infer_param_types(func)
-        return param_types
-      })
-      if (verify_fn) verify_fn(func, "after " + name)
+      if (param_types == null) {
+        name = "infer_param_types" + suffix
+        run_pass(func, name, function() {
+          param_types = infer_param_types(func)
+          return param_types
+        })
+        if (verify_fn) verify_fn(func, "after " + name)
+      }

      name = "infer_slot_write_types" + suffix
      run_pass(func, name, function() {
@@ -2154,6 +2625,12 @@ var streamline = function(ir, log) {
        return eliminate_dead_jumps(func, log)
      })
      if (verify_fn) verify_fn(func, "after " + name)
+
+      name = "eliminate_unreachable_cfg" + suffix
+      run_pass(func, name, function() {
+        return eliminate_unreachable_cfg(func)
+      })
+      if (verify_fn) verify_fn(func, "after " + name)
      return null
    }

@@ -2162,6 +2639,8 @@ var streamline = function(ir, log) {
    }

    run_cycle("")
+    run_cycle("_2")
+    func._write_types = write_types
    if (ir._warn) {
      diagnose_function(func, {param_types: param_types, write_types: write_types}, ir)
    }
@@ -2181,14 +2660,17 @@ var streamline = function(ir, log) {
  // Process main function
  if (ir.main != null) {
    optimize_function(ir.main, log)
+    insert_stone_text(ir.main, log)
  }

-  // Process all sub-functions
+  // Process all sub-functions (resolve closure types from parent first)
  var fi = 0
  if (ir.functions != null) {
    fi = 0
    while (fi < length(ir.functions)) {
+      resolve_closure_types(ir.functions[fi], fi, ir)
      optimize_function(ir.functions[fi], log)
+      insert_stone_text(ir.functions[fi], log)
      fi = fi + 1
    }
  }
@@ -2196,6 +2678,14 @@ var streamline = function(ir, log) {
  // Compress slots across all functions (must run after per-function passes)
  compress_slots(ir)

+  // Expose DEF/USE functions via log if requested
+  if (log != null) {
+    if (log.request_def_use) {
+      log.get_slot_defs = get_slot_defs
+      log.get_slot_uses = get_slot_uses
+    }
+  }
+
  return ir
 }

--- a/vm_suite.ce
+++ b/vm_suite.ce
@@ -103,6 +103,29 @@ run("string concatenation empty", function() {
  if ("" + "world" != "world") fail("empty + string failed")
 })

+run("string concat does not mutate alias", function() {
+  var a = "hello world"
+  var b = a
+  a = a + " appended"
+  if (a != "hello world appended") fail("a wrong, got " + a)
+  if (b != "hello world") fail("b should still be hello world, got " + b)
+})
+
+run("string concat in loop preserves aliases", function() {
+  var a = "starting value"
+  var copies = [a]
+  var i = 0
+  while (i < 5) {
+    a = a + " more"
+    copies[] = a
+    i = i + 1
+  }
+  if (copies[0] != "starting value") fail("copies[0] wrong, got " + copies[0])
+  if (copies[1] != "starting value more") fail("copies[1] wrong, got " + copies[1])
+  if (copies[5] != "starting value more more more more more") fail("copies[5] wrong, got " + copies[5])
+  if (a != "starting value more more more more more") fail("a wrong, got " + a)
+})
+
 // ============================================================================
 // TYPE MIXING SHOULD DISRUPT
 // ============================================================================
@@ -2738,6 +2761,22 @@ run("modulo floats", function() {
  if (result < 1.4 || result > 1.6) fail("modulo floats failed")
 })

+run("remainder float basic", function() {
+  if (remainder(5.5, 2.5) != 0.5) fail("remainder 5.5 % 2.5 failed")
+})
+
+run("modulo float basic", function() {
+  if (modulo(5.5, 2.5) != 0.5) fail("modulo 5.5 % 2.5 failed")
+})
+
+run("remainder float negative", function() {
+  if (remainder(-5.5, 2.5) != -0.5) fail("remainder -5.5 % 2.5 failed")
+})
+
+run("modulo float negative", function() {
+  if (modulo(-5.5, 2.5) != 2.0) fail("modulo -5.5 % 2.5 failed")
+})
+
 // ============================================================================
 // MIN AND MAX FUNCTIONS
 // ============================================================================
--- a/xref.ce
+++ b/xref.ce
@@ -0,0 +1,249 @@
+// xref.ce — cross-reference / call graph
+//
+// Usage:
+//   cell xref <file>                   Full creation tree
+//   cell xref --callers <N> <file>     Who creates function [N]?
+//   cell xref --callees <N> <file>     What does [N] create/call?
+//   cell xref --optimized <file>       Use optimized IR
+//   cell xref --dot <file>             DOT graph for graphviz
+
+var shop = use("internal/shop")
+
+var run = function() {
+  var filename = null
+  var use_optimized = false
+  var show_callers = null
+  var show_callees = null
+  var show_dot = false
+  var i = 0
+  var compiled = null
+  var creates = {}
+  var created_by = {}
+  var func_names = {}
+  var fi = 0
+  var func = null
+  var fname = null
+  var main_name = null
+  var creators = null
+  var c = null
+  var line_info = null
+  var children = null
+  var ch = null
+  var ch_line = null
+  var parent_keys = null
+  var ki = 0
+  var parent_idx = 0
+  var ch_list = null
+  var ci = 0
+  var printed = {}
+
+  while (i < length(args)) {
+    if (args[i] == '--callers') {
+      i = i + 1
+      show_callers = number(args[i])
+    } else if (args[i] == '--callees') {
+      i = i + 1
+      show_callees = number(args[i])
+    } else if (args[i] == '--dot') {
+      show_dot = true
+    } else if (args[i] == '--optimized') {
+      use_optimized = true
+    } else if (args[i] == '--help' || args[i] == '-h') {
+      log.console("Usage: cell xref [--callers <N>] [--callees <N>] [--dot] [--optimized] <file>")
+      log.console("")
+      log.console("  --callers <N>   Who creates function [N]?")
+      log.console("  --callees <N>   What does [N] create/call?")
+      log.console("  --dot           Output DOT format for graphviz")
+      log.console("  --optimized     Use optimized IR")
+      return null
+    } else if (!starts_with(args[i], '-')) {
+      filename = args[i]
+    }
+    i = i + 1
+  }
+
+  if (!filename) {
+    log.console("Usage: cell xref [--callers <N>] [--callees <N>] [--dot] [--optimized] <file>")
+    return null
+  }
+
+  if (use_optimized) {
+    compiled = shop.compile_file(filename)
+  } else {
+    compiled = shop.mcode_file(filename)
+  }
+
+  main_name = compiled.name != null ? compiled.name : "<main>"
+  func_names["-1"] = main_name
+
+  var scan_func = function(func, parent_idx) {
+    var instrs = func.instructions
+    var j = 0
+    var instr = null
+    var n = 0
+    var child_idx = null
+    var instr_line = null
+    if (instrs == null) return null
+    while (j < length(instrs)) {
+      instr = instrs[j]
+      if (is_array(instr) && instr[0] == "function") {
+        n = length(instr)
+        child_idx = instr[2]
+        instr_line = instr[n - 2]
+        if (!creates[text(parent_idx)]) {
+          creates[text(parent_idx)] = []
+        }
+        push(creates[text(parent_idx)], {child: child_idx, line: instr_line})
+        if (!created_by[text(child_idx)]) {
+          created_by[text(child_idx)] = []
+        }
+        push(created_by[text(child_idx)], {parent: parent_idx, line: instr_line})
+      }
+      j = j + 1
+    }
+    return null
+  }
+
+  if (compiled.main != null) {
+    scan_func(compiled.main, -1)
+  }
+
+  if (compiled.functions != null) {
+    fi = 0
+    while (fi < length(compiled.functions)) {
+      func = compiled.functions[fi]
+      fname = func.name != null ? func.name : "<anonymous>"
+      func_names[text(fi)] = fname
+      scan_func(func, fi)
+      fi = fi + 1
+    }
+  }
+
+  var func_label = function(idx) {
+    var name = func_names[text(idx)]
+    if (idx == -1) return main_name
+    if (name != null) return `[${text(idx)}] ${name}`
+    return `[${text(idx)}]`
+  }
+
+  var safe_label = function(idx) {
+    var name = func_names[text(idx)]
+    if (name != null) return replace(name, '"', '\\"')
+    if (idx == -1) return main_name
+    return `func_${text(idx)}`
+  }
+
+  var node_id = function(idx) {
+    if (idx == -1) return "main"
+    return `f${text(idx)}`
+  }
+
+  // --callers mode
+  if (show_callers != null) {
+    creators = created_by[text(show_callers)]
+    log.compile(`\nCallers of ${func_label(show_callers)}:`)
+    if (creators == null || length(creators) == 0) {
+      log.compile("  (none - may be main or unreferenced)")
+    } else {
+      i = 0
+      while (i < length(creators)) {
+        c = creators[i]
+        line_info = c.line != null ? ` at line ${text(c.line)}` : ""
+        log.compile(`  ${func_label(c.parent)}${line_info}`)
+        i = i + 1
+      }
+    }
+    return null
+  }
+
+  // --callees mode
+  if (show_callees != null) {
+    children = creates[text(show_callees)]
+    log.compile(`\nCallees of ${func_label(show_callees)}:`)
+    if (children == null || length(children) == 0) {
+      log.compile("  (none)")
+    } else {
+      i = 0
+      while (i < length(children)) {
+        ch = children[i]
+        ch_line = ch.line != null ? ` at line ${text(ch.line)}` : ""
+        log.compile(`  ${func_label(ch.child)}${ch_line}`)
+        i = i + 1
+      }
+    }
+    return null
+  }
+
+  // --dot mode
+  if (show_dot) {
+    log.compile("digraph xref {")
+    log.compile("  rankdir=TB;")
+    log.compile("  node [shape=box, style=filled, fillcolor=lightyellow];")
+
+    log.compile(`  ${node_id(-1)} [label="${safe_label(-1)}"];`)
+
+    if (compiled.functions != null) {
+      fi = 0
+      while (fi < length(compiled.functions)) {
+        log.compile(`  ${node_id(fi)} [label="${safe_label(fi)}"];`)
+        fi = fi + 1
+      }
+    }
+
+    parent_keys = array(creates)
+    ki = 0
+    while (ki < length(parent_keys)) {
+      parent_idx = number(parent_keys[ki])
+      ch_list = creates[parent_keys[ki]]
+      ci = 0
+      while (ci < length(ch_list)) {
+        log.compile(`  ${node_id(parent_idx)} -> ${node_id(ch_list[ci].child)};`)
+        ci = ci + 1
+      }
+      ki = ki + 1
+    }
+
+    log.compile("}")
+    return null
+  }
+
+  // Default: indented tree from main
+  var print_tree = function(idx, depth) {
+    var indent = ""
+    var d = 0
+    var children = null
+    var ci = 0
+    var child = null
+    while (d < depth) {
+      indent = indent + "  "
+      d = d + 1
+    }
+
+    log.compile(`${indent}${func_label(idx)}`)
+
+    if (printed[text(idx)]) {
+      log.compile(`${indent}  (already shown)`)
+      return null
+    }
+    printed[text(idx)] = true
+
+    children = creates[text(idx)]
+    if (children != null) {
+      ci = 0
+      while (ci < length(children)) {
+        child = children[ci]
+        print_tree(child.child, depth + 1)
+        ci = ci + 1
+      }
+    }
+    return null
+  }
+
+  log.compile("")
+  print_tree(-1, 0)
+
+  return null
+}
+
+run()
+$stop()