Merge branch 'better_disasm' into optimize_mcode

This commit is contained in:
2026-02-21 01:28:59 -06:00
5 changed files with 1530 additions and 0 deletions

456
cfg.ce Normal file
View File

@@ -0,0 +1,456 @@
// cfg.ce — control flow graph
//
// Usage:
// cell cfg --fn <N|name> <file> Text CFG for function
// cell cfg --dot --fn <N|name> <file> DOT output for graphviz
// cell cfg <file> Text CFG for all functions
var shop = use("internal/shop")
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) return "null"
if (is_number(v)) return text(v)
if (is_text(v)) return `"${v}"`
if (is_object(v)) return text(v)
if (is_logical(v)) return v ? "true" : "false"
return text(v)
}
var is_jump_op = function(op) {
return op == "jump" || op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null"
}
var is_conditional_jump = function(op) {
return op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null"
}
var is_terminator = function(op) {
return op == "return" || op == "disrupt" || op == "tail_invoke" || op == "goinvoke"
}
var run = function() {
var filename = null
var fn_filter = null
var show_dot = false
var use_optimized = false
var i = 0
var compiled = null
var main_name = null
var fi = 0
var func = null
var fname = null
while (i < length(args)) {
if (args[i] == '--fn') {
i = i + 1
fn_filter = args[i]
} else if (args[i] == '--dot') {
show_dot = true
} else if (args[i] == '--optimized') {
use_optimized = true
} else if (args[i] == '--help' || args[i] == '-h') {
log.console("Usage: cell cfg [--fn <N|name>] [--dot] [--optimized] <file>")
log.console("")
log.console(" --fn <N|name> Filter to function by index or name")
log.console(" --dot Output DOT format for graphviz")
log.console(" --optimized Use optimized IR")
return null
} else if (!starts_with(args[i], '-')) {
filename = args[i]
}
i = i + 1
}
if (!filename) {
log.console("Usage: cell cfg [--fn <N|name>] [--dot] [--optimized] <file>")
return null
}
if (use_optimized) {
compiled = shop.compile_file(filename)
} else {
compiled = shop.mcode_file(filename)
}
var fn_matches = function(index, name) {
var match = null
if (fn_filter == null) return true
if (index >= 0 && fn_filter == text(index)) return true
if (name != null) {
match = search(name, fn_filter)
if (match != null && match >= 0) return true
}
return false
}
var build_cfg = function(func) {
var instrs = func.instructions
var blocks = []
var label_to_block = {}
var pc_to_block = {}
var label_to_pc = {}
var block_start_pcs = {}
var after_terminator = false
var current_block = null
var current_label = null
var pc = 0
var ii = 0
var bi = 0
var instr = null
var op = null
var n = 0
var line_num = null
var blk = null
var last_instr_data = null
var last_op = null
var target_label = null
var target_bi = null
var edge_type = null
if (instrs == null || length(instrs) == 0) return []
// Pass 1: identify block start PCs
block_start_pcs["0"] = true
pc = 0
ii = 0
while (ii < length(instrs)) {
instr = instrs[ii]
if (is_array(instr)) {
op = instr[0]
if (after_terminator) {
block_start_pcs[text(pc)] = true
after_terminator = false
}
if (is_jump_op(op) || is_terminator(op)) {
after_terminator = true
}
pc = pc + 1
}
ii = ii + 1
}
// Pass 2: map labels to PCs and mark as block starts
pc = 0
ii = 0
while (ii < length(instrs)) {
instr = instrs[ii]
if (is_text(instr) && !starts_with(instr, "_nop_")) {
label_to_pc[instr] = pc
block_start_pcs[text(pc)] = true
} else if (is_array(instr)) {
pc = pc + 1
}
ii = ii + 1
}
// Pass 3: build basic blocks
pc = 0
ii = 0
current_label = null
while (ii < length(instrs)) {
instr = instrs[ii]
if (is_text(instr)) {
if (!starts_with(instr, "_nop_")) {
current_label = instr
}
ii = ii + 1
continue
}
if (is_array(instr)) {
if (block_start_pcs[text(pc)]) {
if (current_block != null) {
push(blocks, current_block)
}
current_block = {
id: length(blocks),
label: current_label,
start_pc: pc,
end_pc: pc,
instrs: [],
edges: [],
first_line: null,
last_line: null
}
current_label = null
}
if (current_block != null) {
push(current_block.instrs, {pc: pc, instr: instr})
current_block.end_pc = pc
n = length(instr)
line_num = instr[n - 2]
if (line_num != null) {
if (current_block.first_line == null) {
current_block.first_line = line_num
}
current_block.last_line = line_num
}
}
pc = pc + 1
}
ii = ii + 1
}
if (current_block != null) {
push(blocks, current_block)
}
// Build block index
bi = 0
while (bi < length(blocks)) {
pc_to_block[text(blocks[bi].start_pc)] = bi
if (blocks[bi].label != null) {
label_to_block[blocks[bi].label] = bi
}
bi = bi + 1
}
// Pass 4: compute edges
bi = 0
while (bi < length(blocks)) {
blk = blocks[bi]
if (length(blk.instrs) > 0) {
last_instr_data = blk.instrs[length(blk.instrs) - 1]
last_op = last_instr_data.instr[0]
n = length(last_instr_data.instr)
if (is_jump_op(last_op)) {
if (last_op == "jump") {
target_label = last_instr_data.instr[1]
} else {
target_label = last_instr_data.instr[2]
}
target_bi = label_to_block[target_label]
if (target_bi != null) {
edge_type = "jump"
if (target_bi <= bi) {
edge_type = "loop back-edge"
}
push(blk.edges, {target: target_bi, kind: edge_type})
}
if (is_conditional_jump(last_op)) {
if (bi + 1 < length(blocks)) {
push(blk.edges, {target: bi + 1, kind: "fallthrough"})
}
}
} else if (is_terminator(last_op)) {
push(blk.edges, {target: -1, kind: "EXIT (" + last_op + ")"})
} else {
if (bi + 1 < length(blocks)) {
push(blk.edges, {target: bi + 1, kind: "fallthrough"})
}
}
}
bi = bi + 1
}
return blocks
}
var print_cfg_text = function(blocks, name) {
var bi = 0
var blk = null
var header = null
var ii = 0
var idata = null
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var operands = null
var ei = 0
var edge = null
var target_label = null
log.compile(`\n=== ${name} ===`)
if (length(blocks) == 0) {
log.compile(" (empty)")
return null
}
bi = 0
while (bi < length(blocks)) {
blk = blocks[bi]
header = ` B${text(bi)}`
if (blk.label != null) {
header = header + ` "${blk.label}"`
}
header = header + ` [pc ${text(blk.start_pc)}-${text(blk.end_pc)}`
if (blk.first_line != null) {
if (blk.first_line == blk.last_line) {
header = header + `, line ${text(blk.first_line)}`
} else {
header = header + `, lines ${text(blk.first_line)}-${text(blk.last_line)}`
}
}
header = header + "]:"
log.compile(header)
ii = 0
while (ii < length(blk.instrs)) {
idata = blk.instrs[ii]
instr = idata.instr
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
log.compile(` ${pad_right(text(idata.pc), 6)}${pad_right(op, 15)}${operands}`)
ii = ii + 1
}
ei = 0
while (ei < length(blk.edges)) {
edge = blk.edges[ei]
if (edge.target == -1) {
log.compile(` -> ${edge.kind}`)
} else {
target_label = blocks[edge.target].label
if (target_label != null) {
log.compile(` -> B${text(edge.target)} "${target_label}" (${edge.kind})`)
} else {
log.compile(` -> B${text(edge.target)} (${edge.kind})`)
}
}
ei = ei + 1
}
log.compile("")
bi = bi + 1
}
return null
}
var print_cfg_dot = function(blocks, name) {
var safe_name = replace(replace(name, '"', '\\"'), ' ', '_')
var bi = 0
var blk = null
var label_text = null
var ii = 0
var idata = null
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var operands = null
var ei = 0
var edge = null
var style = null
log.compile(`digraph "${safe_name}" {`)
log.compile(" rankdir=TB;")
log.compile(" node [shape=record, fontname=monospace, fontsize=10];")
bi = 0
while (bi < length(blocks)) {
blk = blocks[bi]
label_text = "B" + text(bi)
if (blk.label != null) {
label_text = label_text + " (" + blk.label + ")"
}
label_text = label_text + "\\npc " + text(blk.start_pc) + "-" + text(blk.end_pc)
if (blk.first_line != null) {
label_text = label_text + "\\nline " + text(blk.first_line)
}
label_text = label_text + "|"
ii = 0
while (ii < length(blk.instrs)) {
idata = blk.instrs[ii]
instr = idata.instr
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
label_text = label_text + text(idata.pc) + " " + op + " " + replace(operands, '"', '\\"') + "\\l"
ii = ii + 1
}
log.compile(" B" + text(bi) + " [label=\"{" + label_text + "}\"];")
bi = bi + 1
}
// Edges
bi = 0
while (bi < length(blocks)) {
blk = blocks[bi]
ei = 0
while (ei < length(blk.edges)) {
edge = blk.edges[ei]
if (edge.target >= 0) {
style = ""
if (edge.kind == "loop back-edge") {
style = " [style=bold, color=red, label=\"loop\"]"
} else if (edge.kind == "fallthrough") {
style = " [style=dashed]"
}
log.compile(` B${text(bi)} -> B${text(edge.target)}${style};`)
}
ei = ei + 1
}
bi = bi + 1
}
log.compile("}")
return null
}
var process_function = function(func, name, index) {
var blocks = build_cfg(func)
if (show_dot) {
print_cfg_dot(blocks, name)
} else {
print_cfg_text(blocks, name)
}
return null
}
// Process functions
main_name = compiled.name != null ? compiled.name : "<main>"
if (compiled.main != null) {
if (fn_matches(-1, main_name)) {
process_function(compiled.main, main_name, -1)
}
}
if (compiled.functions != null) {
fi = 0
while (fi < length(compiled.functions)) {
func = compiled.functions[fi]
fname = func.name != null ? func.name : "<anonymous>"
if (fn_matches(fi, fname)) {
process_function(func, `[${text(fi)}] ${fname}`, fi)
}
fi = fi + 1
}
}
return null
}
run()
$stop()

310
diff_ir.ce Normal file
View File

@@ -0,0 +1,310 @@
// diff_ir.ce — mcode vs streamline diff
//
// Usage:
// cell diff_ir <file> Diff all functions
// cell diff_ir --fn <N|name> <file> Diff only one function
// cell diff_ir --summary <file> Counts only
var fd = use("fd")
var shop = use("internal/shop")
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) return "null"
if (is_number(v)) return text(v)
if (is_text(v)) return `"${v}"`
if (is_object(v)) return text(v)
if (is_logical(v)) return v ? "true" : "false"
return text(v)
}
var run = function() {
var fn_filter = null
var show_summary = false
var filename = null
var i = 0
var mcode_ir = null
var opt_ir = null
var source_text = null
var source_lines = null
var main_name = null
var fi = 0
var func = null
var opt_func = null
var fname = null
while (i < length(args)) {
if (args[i] == '--fn') {
i = i + 1
fn_filter = args[i]
} else if (args[i] == '--summary') {
show_summary = true
} else if (args[i] == '--help' || args[i] == '-h') {
log.console("Usage: cell diff_ir [--fn <N|name>] [--summary] <file>")
log.console("")
log.console(" --fn <N|name> Filter to function by index or name")
log.console(" --summary Show counts only")
return null
} else if (!starts_with(args[i], '-')) {
filename = args[i]
}
i = i + 1
}
if (!filename) {
log.console("Usage: cell diff_ir [--fn <N|name>] [--summary] <file>")
return null
}
mcode_ir = shop.mcode_file(filename)
opt_ir = shop.compile_file(filename)
source_text = text(fd.slurp(filename))
source_lines = array(source_text, "\n")
var get_source_line = function(line_num) {
if (line_num < 1 || line_num > length(source_lines)) return null
return source_lines[line_num - 1]
}
var fn_matches = function(index, name) {
var match = null
if (fn_filter == null) return true
if (index >= 0 && fn_filter == text(index)) return true
if (name != null) {
match = search(name, fn_filter)
if (match != null && match >= 0) return true
}
return false
}
var fmt_instr = function(instr) {
var op = instr[0]
var n = length(instr)
var parts = []
var j = 1
var operands = null
var line_str = null
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
line_str = instr[n - 2] != null ? `:${text(instr[n - 2])}` : ""
return pad_right(`${pad_right(op, 15)}${operands}`, 45) + line_str
}
var classify = function(before, after) {
var bn = 0
var an = 0
var k = 0
if (is_text(after) && starts_with(after, "_nop_")) return "eliminated"
if (is_array(before) && is_array(after)) {
if (before[0] != after[0]) return "rewritten"
bn = length(before)
an = length(after)
if (bn != an) return "rewritten"
k = 1
while (k < bn - 2) {
if (before[k] != after[k]) return "rewritten"
k = k + 1
}
return "identical"
}
return "identical"
}
var total_eliminated = 0
var total_rewritten = 0
var total_funcs = 0
var diff_function = function(mcode_func, opt_func, name, index) {
var nr_args = mcode_func.nr_args != null ? mcode_func.nr_args : 0
var nr_slots = mcode_func.nr_slots != null ? mcode_func.nr_slots : 0
var m_instrs = mcode_func.instructions
var o_instrs = opt_func.instructions
var eliminated = 0
var rewritten = 0
var mi = 0
var oi = 0
var pc = 0
var m_instr = null
var o_instr = null
var kind = null
var last_line = null
var instr_line = null
var n = 0
var src = null
var annotation = null
if (m_instrs == null) m_instrs = []
if (o_instrs == null) o_instrs = []
// First pass: count changes
mi = 0
oi = 0
while (mi < length(m_instrs) && oi < length(o_instrs)) {
m_instr = m_instrs[mi]
o_instr = o_instrs[oi]
if (is_text(m_instr)) {
mi = mi + 1
oi = oi + 1
continue
}
if (is_text(o_instr) && starts_with(o_instr, "_nop_")) {
if (is_array(m_instr)) {
eliminated = eliminated + 1
}
mi = mi + 1
oi = oi + 1
continue
}
if (is_array(m_instr) && is_array(o_instr)) {
kind = classify(m_instr, o_instr)
if (kind == "rewritten") {
rewritten = rewritten + 1
}
}
mi = mi + 1
oi = oi + 1
}
total_eliminated = total_eliminated + eliminated
total_rewritten = total_rewritten + rewritten
total_funcs = total_funcs + 1
if (show_summary) {
if (eliminated == 0 && rewritten == 0) {
log.compile(` ${pad_right(name + ":", 40)} 0 eliminated, 0 rewritten (unchanged)`)
} else {
log.compile(` ${pad_right(name + ":", 40)} ${text(eliminated)} eliminated, ${text(rewritten)} rewritten`)
}
return null
}
if (eliminated == 0 && rewritten == 0) return null
log.compile(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
log.compile(` ${text(eliminated)} eliminated, ${text(rewritten)} rewritten`)
// Second pass: show diffs
mi = 0
oi = 0
pc = 0
last_line = null
while (mi < length(m_instrs) && oi < length(o_instrs)) {
m_instr = m_instrs[mi]
o_instr = o_instrs[oi]
if (is_text(m_instr) && !starts_with(m_instr, "_nop_")) {
mi = mi + 1
oi = oi + 1
continue
}
if (is_text(m_instr) && starts_with(m_instr, "_nop_")) {
mi = mi + 1
oi = oi + 1
continue
}
if (is_text(o_instr) && starts_with(o_instr, "_nop_")) {
if (is_array(m_instr)) {
n = length(m_instr)
instr_line = m_instr[n - 2]
if (instr_line != last_line && instr_line != null) {
src = get_source_line(instr_line)
if (src != null) src = trim(src)
if (last_line != null) log.compile("")
if (src != null && length(src) > 0) {
log.compile(` --- line ${text(instr_line)}: ${src} ---`)
}
last_line = instr_line
}
log.compile(` - ${pad_right(text(pc), 6)}${fmt_instr(m_instr)}`)
log.compile(` + ${pad_right(text(pc), 6)}${pad_right(o_instr, 45)} (eliminated)`)
}
mi = mi + 1
oi = oi + 1
pc = pc + 1
continue
}
if (is_array(m_instr) && is_array(o_instr)) {
kind = classify(m_instr, o_instr)
if (kind != "identical") {
n = length(m_instr)
instr_line = m_instr[n - 2]
if (instr_line != last_line && instr_line != null) {
src = get_source_line(instr_line)
if (src != null) src = trim(src)
if (last_line != null) log.compile("")
if (src != null && length(src) > 0) {
log.compile(` --- line ${text(instr_line)}: ${src} ---`)
}
last_line = instr_line
}
annotation = ""
if (kind == "rewritten") {
if (search(o_instr[0], "_int") != null || search(o_instr[0], "_float") != null || search(o_instr[0], "_text") != null) {
annotation = "(specialized)"
} else {
annotation = "(rewritten)"
}
}
log.compile(` - ${pad_right(text(pc), 6)}${fmt_instr(m_instr)}`)
log.compile(` + ${pad_right(text(pc), 6)}${fmt_instr(o_instr)} ${annotation}`)
}
pc = pc + 1
}
mi = mi + 1
oi = oi + 1
}
return null
}
// Process functions
main_name = mcode_ir.name != null ? mcode_ir.name : "<main>"
if (mcode_ir.main != null && opt_ir.main != null) {
if (fn_matches(-1, main_name)) {
diff_function(mcode_ir.main, opt_ir.main, main_name, -1)
}
}
if (mcode_ir.functions != null && opt_ir.functions != null) {
fi = 0
while (fi < length(mcode_ir.functions) && fi < length(opt_ir.functions)) {
func = mcode_ir.functions[fi]
opt_func = opt_ir.functions[fi]
fname = func.name != null ? func.name : "<anonymous>"
if (fn_matches(fi, fname)) {
diff_function(func, opt_func, `[${text(fi)}] ${fname}`, fi)
}
fi = fi + 1
}
}
if (show_summary) {
log.compile(`\n total: ${text(total_eliminated)} eliminated, ${text(total_rewritten)} rewritten across ${text(total_funcs)} functions`)
}
return null
}
run()
$stop()

View File

@@ -30,6 +30,10 @@ Each stage has a corresponding CLI tool that lets you see its output.
| streamline | `streamline.ce --ir` | Human-readable canonical IR |
| disasm | `disasm.ce` | Source-interleaved disassembly |
| disasm | `disasm.ce --optimized` | Optimized source-interleaved disassembly |
| diff | `diff_ir.ce` | Mcode vs streamline instruction diff |
| xref | `xref.ce` | Cross-reference / call creation graph |
| cfg | `cfg.ce` | Control flow graph (basic blocks) |
| slots | `slots.ce` | Slot data flow / use-def chains |
| all | `ir_report.ce` | Structured optimizer flight recorder |
All tools take a source file as input and run the pipeline up to the relevant stage.
@@ -141,6 +145,160 @@ Function creation instructions include a cross-reference annotation showing the
3 function 5, 12 :235 ; -> [12] helper_fn
```
## diff_ir.ce
Compares mcode IR (before optimization) with streamline IR (after optimization), showing what the optimizer changed. Useful for understanding which instructions were eliminated, specialized, or rewritten.
```bash
cell diff_ir <file> # diff all functions
cell diff_ir --fn <N|name> <file> # diff only one function
cell diff_ir --summary <file> # counts only
```
| Flag | Description |
|------|-------------|
| (none) | Show all diffs with source interleaving |
| `--fn <N\|name>` | Filter to specific function by index or name |
| `--summary` | Show only eliminated/rewritten counts per function |
### Output Format
Changed instructions are shown in diff style with `-` (before) and `+` (after) lines:
```
=== [0] <anonymous> (args=1, slots=40) ===
17 eliminated, 51 rewritten
--- line 4: if (n <= 1) { ---
- 1 is_int 4, 1 :4
+ 1 is_int 3, 1 :4 (specialized)
- 3 is_int 5, 2 :4
+ 3 _nop_tc_1 (eliminated)
```
Summary mode gives a quick overview:
```
[0] <anonymous>: 17 eliminated, 51 rewritten
[1] <anonymous>: 65 eliminated, 181 rewritten
total: 86 eliminated, 250 rewritten across 4 functions
```
## xref.ce
Cross-reference / call graph tool. Shows which functions create other functions (via `function` instructions), building a creation tree.
```bash
cell xref <file> # full creation tree
cell xref --callers <N> <file> # who creates function [N]?
cell xref --callees <N> <file> # what does [N] create/call?
cell xref --dot <file> # DOT graph for graphviz
cell xref --optimized <file> # use optimized IR
```
| Flag | Description |
|------|-------------|
| (none) | Indented creation tree from main |
| `--callers <N>` | Show which functions create function [N] |
| `--callees <N>` | Show what function [N] creates (use -1 for main) |
| `--dot` | Output DOT format for graphviz |
| `--optimized` | Use optimized IR instead of raw mcode |
### Output Format
Default tree view:
```
demo_disasm.cm
[0] <anonymous>
[1] <anonymous>
[2] <anonymous>
```
Caller/callee query:
```
Callers of [0] <anonymous>:
demo_disasm.cm at line 3
```
DOT output can be piped to graphviz: `cell xref --dot file.cm | dot -Tpng -o xref.png`
## cfg.ce
Control flow graph tool. Identifies basic blocks from labels and jumps, computes edges, and detects loop back-edges.
```bash
cell cfg --fn <N|name> <file> # text CFG for function
cell cfg --dot --fn <N|name> <file> # DOT output for graphviz
cell cfg <file> # text CFG for all functions
cell cfg --optimized <file> # use optimized IR
```
| Flag | Description |
|------|-------------|
| `--fn <N\|name>` | Filter to specific function by index or name |
| `--dot` | Output DOT format for graphviz |
| `--optimized` | Use optimized IR instead of raw mcode |
### Output Format
```
=== [0] <anonymous> ===
B0 [pc 0-2, line 4]:
0 access 2, 1
1 is_int 4, 1
2 jump_false 4, "rel_ni_2"
-> B3 "rel_ni_2" (jump)
-> B1 (fallthrough)
B1 [pc 3-4, line 4]:
3 is_int 5, 2
4 jump_false 5, "rel_ni_2"
-> B3 "rel_ni_2" (jump)
-> B2 (fallthrough)
```
Each block shows its ID, PC range, source lines, instructions, and outgoing edges. Loop back-edges (target PC <= source PC) are annotated.
## slots.ce
Slot data flow analysis. Builds use-def chains for every slot in a function, showing where each slot is defined and used. Optionally captures type information from streamline.
```bash
cell slots --fn <N|name> <file> # slot summary for function
cell slots --slot <N> --fn <N|name> <file> # trace slot N
cell slots <file> # slot summary for all functions
```
| Flag | Description |
|------|-------------|
| `--fn <N\|name>` | Filter to specific function by index or name |
| `--slot <N>` | Show chronological DEF/USE trace for a specific slot |
### Output Format
Summary shows each slot with its def count, use count, inferred type, and first definition. Dead slots (defined but never used) are flagged:
```
=== [0] <anonymous> (args=1, slots=40) ===
slot defs uses type first-def
s0 0 0 - (this)
s1 0 10 - (arg 0)
s2 1 6 - pc 0: access
s10 1 0 - pc 29: invoke <- dead
```
Slot trace (`--slot N`) shows every DEF and USE in program order:
```
=== slot 3 in [0] <anonymous> ===
DEF pc 5: le_int 3, 1, 2 :4
DEF pc 11: le_float 3, 1, 2 :4
DEF pc 17: le_text 3, 1, 2 :4
USE pc 31: jump_false 3, "if_else_0" :4
```
## seed.ce
Regenerates the boot seed files in `boot/`. These are pre-compiled mcode IR (JSON) files that bootstrap the compilation pipeline on cold start.

357
slots.ce Normal file
View File

@@ -0,0 +1,357 @@
// slots.ce — slot data flow / use-def chains
//
// Usage:
// cell slots --fn <N|name> <file> Slot summary for function
// cell slots --slot <N> --fn <N|name> <file> Trace slot N in function
// cell slots <file> Slot summary for all functions
var shop = use("internal/shop")
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) return "null"
if (is_number(v)) return text(v)
if (is_text(v)) return `"${v}"`
if (is_object(v)) return text(v)
if (is_logical(v)) return v ? "true" : "false"
return text(v)
}
// Classify instruction operands as DEF or USE
// Returns {defs: [operand_positions], uses: [operand_positions]}
// Positions are 1-based indices into the instruction array
var classify_operands = function(op) {
// Binary ops: DEF=[1], USE=[2,3]
if (op == "add" || op == "subtract" || op == "multiply" || op == "divide" ||
op == "modulo" || op == "pow" || op == "remainder" ||
op == "add_int" || op == "sub_int" || op == "mul_int" || op == "div_int" ||
op == "mod_int" || op == "pow_int" || op == "rem_int" ||
op == "add_float" || op == "sub_float" || op == "mul_float" || op == "div_float" ||
op == "mod_float" || op == "pow_float" ||
op == "eq" || op == "ne" || op == "lt" || op == "gt" || op == "le" || op == "ge" ||
op == "eq_int" || op == "ne_int" || op == "lt_int" || op == "gt_int" ||
op == "le_int" || op == "ge_int" ||
op == "eq_float" || op == "ne_float" || op == "lt_float" || op == "gt_float" ||
op == "le_float" || op == "ge_float" ||
op == "eq_text" || op == "ne_text" || op == "lt_text" || op == "gt_text" ||
op == "le_text" || op == "ge_text" ||
op == "eq_bool" || op == "ne_bool" ||
op == "concat" ||
op == "bitand" || op == "bitor" || op == "bitxor" ||
op == "shl" || op == "shr" || op == "ushr" ||
op == "and" || op == "or" ||
op == "is_identical") {
return {defs: [1], uses: [2, 3]}
}
// Unary ops: DEF=[1], USE=[2]
if (op == "not" || op == "negate" || op == "neg_int" || op == "neg_float" ||
op == "bitnot" || op == "typeof" || op == "length" ||
op == "is_int" || op == "is_num" || op == "is_text" || op == "is_bool" ||
op == "is_null" || op == "is_array" || op == "is_func" || op == "is_record" ||
op == "is_stone" || op == "is_integer") {
return {defs: [1], uses: [2]}
}
// Constants: DEF=[1], USE=[]
if (op == "int" || op == "true" || op == "false" || op == "null" || op == "access") {
return {defs: [1], uses: []}
}
if (op == "move") return {defs: [1], uses: [2]}
if (op == "function") return {defs: [1], uses: []}
if (op == "array") return {defs: [1], uses: []}
if (op == "record") return {defs: [1], uses: []}
if (op == "frame") return {defs: [1], uses: [2]}
if (op == "setarg") return {defs: [], uses: [1, 3]}
if (op == "invoke") return {defs: [2], uses: [1]}
if (op == "tail_invoke" || op == "goinvoke") return {defs: [], uses: [1]}
if (op == "load_field") return {defs: [1], uses: [2]}
if (op == "store_field") return {defs: [], uses: [1, 3]}
if (op == "load_index" || op == "load_dynamic") return {defs: [1], uses: [2, 3]}
if (op == "store_index" || op == "store_dynamic") return {defs: [], uses: [1, 2, 3]}
if (op == "push") return {defs: [], uses: [1, 2]}
if (op == "pop") return {defs: [1], uses: [2]}
if (op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null") return {defs: [], uses: [1]}
if (op == "jump") return {defs: [], uses: []}
if (op == "return") return {defs: [], uses: [1]}
if (op == "disrupt") return {defs: [], uses: []}
if (op == "get") return {defs: [1], uses: []}
if (op == "set_var") return {defs: [], uses: [1]}
return {defs: [], uses: []}
}
var run = function() {
var filename = null
var fn_filter = null
var slot_filter = null
var i = 0
var compiled = null
var type_info = {}
var sl_log = null
var td = null
var main_name = null
var fi = 0
var func = null
var fname = null
while (i < length(args)) {
if (args[i] == '--fn') {
i = i + 1
fn_filter = args[i]
} else if (args[i] == '--slot') {
i = i + 1
slot_filter = number(args[i])
} else if (args[i] == '--help' || args[i] == '-h') {
log.console("Usage: cell slots [--fn <N|name>] [--slot <N>] <file>")
log.console("")
log.console(" --fn <N|name> Filter to function by index or name")
log.console(" --slot <N> Trace a specific slot")
return null
} else if (!starts_with(args[i], '-')) {
filename = args[i]
}
i = i + 1
}
if (!filename) {
log.console("Usage: cell slots [--fn <N|name>] [--slot <N>] <file>")
return null
}
compiled = shop.mcode_file(filename)
// Try to get type info from streamline
var get_type_info = function() {
var mcode_copy = shop.mcode_file(filename)
var streamline = use("streamline")
var ti = 0
sl_log = {
passes: [],
events: null,
type_deltas: []
}
streamline(mcode_copy, sl_log)
if (sl_log.type_deltas != null) {
ti = 0
while (ti < length(sl_log.type_deltas)) {
td = sl_log.type_deltas[ti]
if (td.fn != null) {
type_info[td.fn] = td.slot_types
}
ti = ti + 1
}
}
return null
} disruption {
// Type info is optional
}
get_type_info()
var fn_matches = function(index, name) {
var match = null
if (fn_filter == null) return true
if (index >= 0 && fn_filter == text(index)) return true
if (name != null) {
match = search(name, fn_filter)
if (match != null && match >= 0) return true
}
return false
}
var analyze_function = function(func, name, index) {
var nr_args = func.nr_args != null ? func.nr_args : 0
var nr_slots = func.nr_slots != null ? func.nr_slots : 0
var instrs = func.instructions
var defs = {}
var uses = {}
var first_def = {}
var first_def_op = {}
var events = []
var pc = 0
var ii = 0
var instr = null
var op = null
var n = 0
var cls = null
var di = 0
var ui = 0
var slot_num = null
var operand_val = null
var parts = null
var j = 0
var operands = null
var slot_types = null
var type_key = null
var ei = 0
var evt = null
var found = false
var line_str = null
var si = 0
var slot_key = null
var d_count = 0
var u_count = 0
var t = null
var first = null
var dead_marker = null
if (instrs == null) instrs = []
// Walk instructions, build def/use chains
ii = 0
while (ii < length(instrs)) {
instr = instrs[ii]
if (is_text(instr)) {
ii = ii + 1
continue
}
if (!is_array(instr)) {
ii = ii + 1
continue
}
op = instr[0]
n = length(instr)
cls = classify_operands(op)
di = 0
while (di < length(cls.defs)) {
operand_val = instr[cls.defs[di]]
if (is_number(operand_val)) {
slot_num = text(operand_val)
if (!defs[slot_num]) defs[slot_num] = 0
defs[slot_num] = defs[slot_num] + 1
if (first_def[slot_num] == null) {
first_def[slot_num] = pc
first_def_op[slot_num] = op
}
push(events, {kind: "DEF", slot: operand_val, pc: pc, instr: instr})
}
di = di + 1
}
ui = 0
while (ui < length(cls.uses)) {
operand_val = instr[cls.uses[ui]]
if (is_number(operand_val)) {
slot_num = text(operand_val)
if (!uses[slot_num]) uses[slot_num] = 0
uses[slot_num] = uses[slot_num] + 1
push(events, {kind: "USE", slot: operand_val, pc: pc, instr: instr})
}
ui = ui + 1
}
pc = pc + 1
ii = ii + 1
}
// Get type info for this function
type_key = func.name != null ? func.name : name
if (type_info[type_key]) {
slot_types = type_info[type_key]
}
// --slot mode: show trace
if (slot_filter != null) {
log.compile(`\n=== slot ${text(slot_filter)} in ${name} ===`)
ei = 0
found = false
while (ei < length(events)) {
evt = events[ei]
if (evt.slot == slot_filter) {
found = true
n = length(evt.instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(evt.instr[j]))
j = j + 1
}
operands = text(parts, ", ")
line_str = evt.instr[n - 2] != null ? `:${text(evt.instr[n - 2])}` : ""
log.compile(` ${pad_right(evt.kind, 5)}pc ${pad_right(text(evt.pc) + ":", 6)} ${pad_right(evt.instr[0], 15)}${pad_right(operands, 30)}${line_str}`)
}
ei = ei + 1
}
if (!found) {
log.compile(" (no activity)")
}
return null
}
// Summary mode
log.compile(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
log.compile(` ${pad_right("slot", 8)}${pad_right("defs", 8)}${pad_right("uses", 8)}${pad_right("type", 12)}first-def`)
si = 0
while (si < nr_slots) {
slot_key = text(si)
d_count = defs[slot_key] != null ? defs[slot_key] : 0
u_count = uses[slot_key] != null ? uses[slot_key] : 0
// Skip slots with no activity unless they're args or have type info
if (d_count == 0 && u_count == 0 && si >= nr_args + 1) {
si = si + 1
continue
}
t = "-"
if (slot_types != null && slot_types[slot_key] != null) {
t = slot_types[slot_key]
}
first = ""
if (si == 0) {
first = "(this)"
} else if (si > 0 && si <= nr_args) {
first = `(arg ${text(si - 1)})`
} else if (first_def[slot_key] != null) {
first = `pc ${text(first_def[slot_key])}: ${first_def_op[slot_key]}`
}
dead_marker = ""
if (d_count > 0 && u_count == 0 && si > nr_args) {
dead_marker = " <- dead"
}
log.compile(` ${pad_right("s" + slot_key, 8)}${pad_right(text(d_count), 8)}${pad_right(text(u_count), 8)}${pad_right(t, 12)}${first}${dead_marker}`)
si = si + 1
}
return null
}
// Process functions
main_name = compiled.name != null ? compiled.name : "<main>"
if (compiled.main != null) {
if (fn_matches(-1, main_name)) {
analyze_function(compiled.main, main_name, -1)
}
}
if (compiled.functions != null) {
fi = 0
while (fi < length(compiled.functions)) {
func = compiled.functions[fi]
fname = func.name != null ? func.name : "<anonymous>"
if (fn_matches(fi, fname)) {
analyze_function(func, `[${text(fi)}] ${fname}`, fi)
}
fi = fi + 1
}
}
return null
}
run()
$stop()

249
xref.ce Normal file
View File

@@ -0,0 +1,249 @@
// xref.ce — cross-reference / call graph
//
// Usage:
// cell xref <file> Full creation tree
// cell xref --callers <N> <file> Who creates function [N]?
// cell xref --callees <N> <file> What does [N] create/call?
// cell xref --optimized <file> Use optimized IR
// cell xref --dot <file> DOT graph for graphviz
var shop = use("internal/shop")
var run = function() {
var filename = null
var use_optimized = false
var show_callers = null
var show_callees = null
var show_dot = false
var i = 0
var compiled = null
var creates = {}
var created_by = {}
var func_names = {}
var fi = 0
var func = null
var fname = null
var main_name = null
var creators = null
var c = null
var line_info = null
var children = null
var ch = null
var ch_line = null
var parent_keys = null
var ki = 0
var parent_idx = 0
var ch_list = null
var ci = 0
var printed = {}
while (i < length(args)) {
if (args[i] == '--callers') {
i = i + 1
show_callers = number(args[i])
} else if (args[i] == '--callees') {
i = i + 1
show_callees = number(args[i])
} else if (args[i] == '--dot') {
show_dot = true
} else if (args[i] == '--optimized') {
use_optimized = true
} else if (args[i] == '--help' || args[i] == '-h') {
log.console("Usage: cell xref [--callers <N>] [--callees <N>] [--dot] [--optimized] <file>")
log.console("")
log.console(" --callers <N> Who creates function [N]?")
log.console(" --callees <N> What does [N] create/call?")
log.console(" --dot Output DOT format for graphviz")
log.console(" --optimized Use optimized IR")
return null
} else if (!starts_with(args[i], '-')) {
filename = args[i]
}
i = i + 1
}
if (!filename) {
log.console("Usage: cell xref [--callers <N>] [--callees <N>] [--dot] [--optimized] <file>")
return null
}
if (use_optimized) {
compiled = shop.compile_file(filename)
} else {
compiled = shop.mcode_file(filename)
}
main_name = compiled.name != null ? compiled.name : "<main>"
func_names["-1"] = main_name
var scan_func = function(func, parent_idx) {
var instrs = func.instructions
var j = 0
var instr = null
var n = 0
var child_idx = null
var instr_line = null
if (instrs == null) return null
while (j < length(instrs)) {
instr = instrs[j]
if (is_array(instr) && instr[0] == "function") {
n = length(instr)
child_idx = instr[2]
instr_line = instr[n - 2]
if (!creates[text(parent_idx)]) {
creates[text(parent_idx)] = []
}
push(creates[text(parent_idx)], {child: child_idx, line: instr_line})
if (!created_by[text(child_idx)]) {
created_by[text(child_idx)] = []
}
push(created_by[text(child_idx)], {parent: parent_idx, line: instr_line})
}
j = j + 1
}
return null
}
if (compiled.main != null) {
scan_func(compiled.main, -1)
}
if (compiled.functions != null) {
fi = 0
while (fi < length(compiled.functions)) {
func = compiled.functions[fi]
fname = func.name != null ? func.name : "<anonymous>"
func_names[text(fi)] = fname
scan_func(func, fi)
fi = fi + 1
}
}
var func_label = function(idx) {
var name = func_names[text(idx)]
if (idx == -1) return main_name
if (name != null) return `[${text(idx)}] ${name}`
return `[${text(idx)}]`
}
var safe_label = function(idx) {
var name = func_names[text(idx)]
if (name != null) return replace(name, '"', '\\"')
if (idx == -1) return main_name
return `func_${text(idx)}`
}
var node_id = function(idx) {
if (idx == -1) return "main"
return `f${text(idx)}`
}
// --callers mode
if (show_callers != null) {
creators = created_by[text(show_callers)]
log.compile(`\nCallers of ${func_label(show_callers)}:`)
if (creators == null || length(creators) == 0) {
log.compile(" (none - may be main or unreferenced)")
} else {
i = 0
while (i < length(creators)) {
c = creators[i]
line_info = c.line != null ? ` at line ${text(c.line)}` : ""
log.compile(` ${func_label(c.parent)}${line_info}`)
i = i + 1
}
}
return null
}
// --callees mode
if (show_callees != null) {
children = creates[text(show_callees)]
log.compile(`\nCallees of ${func_label(show_callees)}:`)
if (children == null || length(children) == 0) {
log.compile(" (none)")
} else {
i = 0
while (i < length(children)) {
ch = children[i]
ch_line = ch.line != null ? ` at line ${text(ch.line)}` : ""
log.compile(` ${func_label(ch.child)}${ch_line}`)
i = i + 1
}
}
return null
}
// --dot mode
if (show_dot) {
log.compile("digraph xref {")
log.compile(" rankdir=TB;")
log.compile(" node [shape=box, style=filled, fillcolor=lightyellow];")
log.compile(` ${node_id(-1)} [label="${safe_label(-1)}"];`)
if (compiled.functions != null) {
fi = 0
while (fi < length(compiled.functions)) {
log.compile(` ${node_id(fi)} [label="${safe_label(fi)}"];`)
fi = fi + 1
}
}
parent_keys = array(creates)
ki = 0
while (ki < length(parent_keys)) {
parent_idx = number(parent_keys[ki])
ch_list = creates[parent_keys[ki]]
ci = 0
while (ci < length(ch_list)) {
log.compile(` ${node_id(parent_idx)} -> ${node_id(ch_list[ci].child)};`)
ci = ci + 1
}
ki = ki + 1
}
log.compile("}")
return null
}
// Default: indented tree from main
var print_tree = function(idx, depth) {
var indent = ""
var d = 0
var children = null
var ci = 0
var child = null
while (d < depth) {
indent = indent + " "
d = d + 1
}
log.compile(`${indent}${func_label(idx)}`)
if (printed[text(idx)]) {
log.compile(`${indent} (already shown)`)
return null
}
printed[text(idx)] = true
children = creates[text(idx)]
if (children != null) {
ci = 0
while (ci < length(children)) {
child = children[ci]
print_tree(child.child, depth + 1)
ci = ci + 1
}
}
return null
}
log.compile("")
print_tree(-1, 0)
return null
}
run()
$stop()