diff --git a/cfg.ce b/cfg.ce new file mode 100644 index 00000000..0c2270ef --- /dev/null +++ b/cfg.ce @@ -0,0 +1,456 @@ +// cfg.ce — control flow graph +// +// Usage: +// cell cfg --fn Text CFG for function +// cell cfg --dot --fn DOT output for graphviz +// cell cfg Text CFG for all functions + +var shop = use("internal/shop") + +var pad_right = function(s, w) { + var r = s + while (length(r) < w) { + r = r + " " + } + return r +} + +var fmt_val = function(v) { + if (is_null(v)) return "null" + if (is_number(v)) return text(v) + if (is_text(v)) return `"${v}"` + if (is_object(v)) return text(v) + if (is_logical(v)) return v ? "true" : "false" + return text(v) +} + +var is_jump_op = function(op) { + return op == "jump" || op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null" +} + +var is_conditional_jump = function(op) { + return op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null" +} + +var is_terminator = function(op) { + return op == "return" || op == "disrupt" || op == "tail_invoke" || op == "goinvoke" +} + +var run = function() { + var filename = null + var fn_filter = null + var show_dot = false + var use_optimized = false + var i = 0 + var compiled = null + var main_name = null + var fi = 0 + var func = null + var fname = null + + while (i < length(args)) { + if (args[i] == '--fn') { + i = i + 1 + fn_filter = args[i] + } else if (args[i] == '--dot') { + show_dot = true + } else if (args[i] == '--optimized') { + use_optimized = true + } else if (args[i] == '--help' || args[i] == '-h') { + log.console("Usage: cell cfg [--fn ] [--dot] [--optimized] ") + log.console("") + log.console(" --fn Filter to function by index or name") + log.console(" --dot Output DOT format for graphviz") + log.console(" --optimized Use optimized IR") + return null + } else if (!starts_with(args[i], '-')) { + filename = args[i] + } + i = i + 1 + } + + if (!filename) { + log.console("Usage: cell cfg [--fn ] [--dot] [--optimized] ") + return null + } + + if (use_optimized) { + compiled = shop.compile_file(filename) + } else { + compiled = shop.mcode_file(filename) + } + + var fn_matches = function(index, name) { + var match = null + if (fn_filter == null) return true + if (index >= 0 && fn_filter == text(index)) return true + if (name != null) { + match = search(name, fn_filter) + if (match != null && match >= 0) return true + } + return false + } + + var build_cfg = function(func) { + var instrs = func.instructions + var blocks = [] + var label_to_block = {} + var pc_to_block = {} + var label_to_pc = {} + var block_start_pcs = {} + var after_terminator = false + var current_block = null + var current_label = null + var pc = 0 + var ii = 0 + var bi = 0 + var instr = null + var op = null + var n = 0 + var line_num = null + var blk = null + var last_instr_data = null + var last_op = null + var target_label = null + var target_bi = null + var edge_type = null + + if (instrs == null || length(instrs) == 0) return [] + + // Pass 1: identify block start PCs + block_start_pcs["0"] = true + pc = 0 + ii = 0 + while (ii < length(instrs)) { + instr = instrs[ii] + if (is_array(instr)) { + op = instr[0] + if (after_terminator) { + block_start_pcs[text(pc)] = true + after_terminator = false + } + if (is_jump_op(op) || is_terminator(op)) { + after_terminator = true + } + pc = pc + 1 + } + ii = ii + 1 + } + + // Pass 2: map labels to PCs and mark as block starts + pc = 0 + ii = 0 + while (ii < length(instrs)) { + instr = instrs[ii] + if (is_text(instr) && !starts_with(instr, "_nop_")) { + label_to_pc[instr] = pc + block_start_pcs[text(pc)] = true + } else if (is_array(instr)) { + pc = pc + 1 + } + ii = ii + 1 + } + + // Pass 3: build basic blocks + pc = 0 + ii = 0 + current_label = null + while (ii < length(instrs)) { + instr = instrs[ii] + if (is_text(instr)) { + if (!starts_with(instr, "_nop_")) { + current_label = instr + } + ii = ii + 1 + continue + } + + if (is_array(instr)) { + if (block_start_pcs[text(pc)]) { + if (current_block != null) { + push(blocks, current_block) + } + current_block = { + id: length(blocks), + label: current_label, + start_pc: pc, + end_pc: pc, + instrs: [], + edges: [], + first_line: null, + last_line: null + } + current_label = null + } + + if (current_block != null) { + push(current_block.instrs, {pc: pc, instr: instr}) + current_block.end_pc = pc + n = length(instr) + line_num = instr[n - 2] + if (line_num != null) { + if (current_block.first_line == null) { + current_block.first_line = line_num + } + current_block.last_line = line_num + } + } + pc = pc + 1 + } + ii = ii + 1 + } + if (current_block != null) { + push(blocks, current_block) + } + + // Build block index + bi = 0 + while (bi < length(blocks)) { + pc_to_block[text(blocks[bi].start_pc)] = bi + if (blocks[bi].label != null) { + label_to_block[blocks[bi].label] = bi + } + bi = bi + 1 + } + + // Pass 4: compute edges + bi = 0 + while (bi < length(blocks)) { + blk = blocks[bi] + if (length(blk.instrs) > 0) { + last_instr_data = blk.instrs[length(blk.instrs) - 1] + last_op = last_instr_data.instr[0] + n = length(last_instr_data.instr) + + if (is_jump_op(last_op)) { + if (last_op == "jump") { + target_label = last_instr_data.instr[1] + } else { + target_label = last_instr_data.instr[2] + } + + target_bi = label_to_block[target_label] + if (target_bi != null) { + edge_type = "jump" + if (target_bi <= bi) { + edge_type = "loop back-edge" + } + push(blk.edges, {target: target_bi, kind: edge_type}) + } + + if (is_conditional_jump(last_op)) { + if (bi + 1 < length(blocks)) { + push(blk.edges, {target: bi + 1, kind: "fallthrough"}) + } + } + } else if (is_terminator(last_op)) { + push(blk.edges, {target: -1, kind: "EXIT (" + last_op + ")"}) + } else { + if (bi + 1 < length(blocks)) { + push(blk.edges, {target: bi + 1, kind: "fallthrough"}) + } + } + } + bi = bi + 1 + } + + return blocks + } + + var print_cfg_text = function(blocks, name) { + var bi = 0 + var blk = null + var header = null + var ii = 0 + var idata = null + var instr = null + var op = null + var n = 0 + var parts = null + var j = 0 + var operands = null + var ei = 0 + var edge = null + var target_label = null + + log.compile(`\n=== ${name} ===`) + + if (length(blocks) == 0) { + log.compile(" (empty)") + return null + } + + bi = 0 + while (bi < length(blocks)) { + blk = blocks[bi] + header = ` B${text(bi)}` + if (blk.label != null) { + header = header + ` "${blk.label}"` + } + header = header + ` [pc ${text(blk.start_pc)}-${text(blk.end_pc)}` + if (blk.first_line != null) { + if (blk.first_line == blk.last_line) { + header = header + `, line ${text(blk.first_line)}` + } else { + header = header + `, lines ${text(blk.first_line)}-${text(blk.last_line)}` + } + } + header = header + "]:" + + log.compile(header) + + ii = 0 + while (ii < length(blk.instrs)) { + idata = blk.instrs[ii] + instr = idata.instr + op = instr[0] + n = length(instr) + parts = [] + j = 1 + while (j < n - 2) { + push(parts, fmt_val(instr[j])) + j = j + 1 + } + operands = text(parts, ", ") + log.compile(` ${pad_right(text(idata.pc), 6)}${pad_right(op, 15)}${operands}`) + ii = ii + 1 + } + + ei = 0 + while (ei < length(blk.edges)) { + edge = blk.edges[ei] + if (edge.target == -1) { + log.compile(` -> ${edge.kind}`) + } else { + target_label = blocks[edge.target].label + if (target_label != null) { + log.compile(` -> B${text(edge.target)} "${target_label}" (${edge.kind})`) + } else { + log.compile(` -> B${text(edge.target)} (${edge.kind})`) + } + } + ei = ei + 1 + } + + log.compile("") + bi = bi + 1 + } + return null + } + + var print_cfg_dot = function(blocks, name) { + var safe_name = replace(replace(name, '"', '\\"'), ' ', '_') + var bi = 0 + var blk = null + var label_text = null + var ii = 0 + var idata = null + var instr = null + var op = null + var n = 0 + var parts = null + var j = 0 + var operands = null + var ei = 0 + var edge = null + var style = null + + log.compile(`digraph "${safe_name}" {`) + log.compile(" rankdir=TB;") + log.compile(" node [shape=record, fontname=monospace, fontsize=10];") + + bi = 0 + while (bi < length(blocks)) { + blk = blocks[bi] + label_text = "B" + text(bi) + if (blk.label != null) { + label_text = label_text + " (" + blk.label + ")" + } + label_text = label_text + "\\npc " + text(blk.start_pc) + "-" + text(blk.end_pc) + if (blk.first_line != null) { + label_text = label_text + "\\nline " + text(blk.first_line) + } + label_text = label_text + "|" + + ii = 0 + while (ii < length(blk.instrs)) { + idata = blk.instrs[ii] + instr = idata.instr + op = instr[0] + n = length(instr) + parts = [] + j = 1 + while (j < n - 2) { + push(parts, fmt_val(instr[j])) + j = j + 1 + } + operands = text(parts, ", ") + label_text = label_text + text(idata.pc) + " " + op + " " + replace(operands, '"', '\\"') + "\\l" + ii = ii + 1 + } + + log.compile(" B" + text(bi) + " [label=\"{" + label_text + "}\"];") + bi = bi + 1 + } + + // Edges + bi = 0 + while (bi < length(blocks)) { + blk = blocks[bi] + ei = 0 + while (ei < length(blk.edges)) { + edge = blk.edges[ei] + if (edge.target >= 0) { + style = "" + if (edge.kind == "loop back-edge") { + style = " [style=bold, color=red, label=\"loop\"]" + } else if (edge.kind == "fallthrough") { + style = " [style=dashed]" + } + log.compile(` B${text(bi)} -> B${text(edge.target)}${style};`) + } + ei = ei + 1 + } + bi = bi + 1 + } + + log.compile("}") + return null + } + + var process_function = function(func, name, index) { + var blocks = build_cfg(func) + if (show_dot) { + print_cfg_dot(blocks, name) + } else { + print_cfg_text(blocks, name) + } + return null + } + + // Process functions + main_name = compiled.name != null ? compiled.name : "
" + + if (compiled.main != null) { + if (fn_matches(-1, main_name)) { + process_function(compiled.main, main_name, -1) + } + } + + if (compiled.functions != null) { + fi = 0 + while (fi < length(compiled.functions)) { + func = compiled.functions[fi] + fname = func.name != null ? func.name : "" + if (fn_matches(fi, fname)) { + process_function(func, `[${text(fi)}] ${fname}`, fi) + } + fi = fi + 1 + } + } + + return null +} + +run() +$stop() diff --git a/diff_ir.ce b/diff_ir.ce new file mode 100644 index 00000000..ac7e7e17 --- /dev/null +++ b/diff_ir.ce @@ -0,0 +1,310 @@ +// diff_ir.ce — mcode vs streamline diff +// +// Usage: +// cell diff_ir Diff all functions +// cell diff_ir --fn Diff only one function +// cell diff_ir --summary Counts only + +var fd = use("fd") +var shop = use("internal/shop") + +var pad_right = function(s, w) { + var r = s + while (length(r) < w) { + r = r + " " + } + return r +} + +var fmt_val = function(v) { + if (is_null(v)) return "null" + if (is_number(v)) return text(v) + if (is_text(v)) return `"${v}"` + if (is_object(v)) return text(v) + if (is_logical(v)) return v ? "true" : "false" + return text(v) +} + +var run = function() { + var fn_filter = null + var show_summary = false + var filename = null + var i = 0 + var mcode_ir = null + var opt_ir = null + var source_text = null + var source_lines = null + var main_name = null + var fi = 0 + var func = null + var opt_func = null + var fname = null + + while (i < length(args)) { + if (args[i] == '--fn') { + i = i + 1 + fn_filter = args[i] + } else if (args[i] == '--summary') { + show_summary = true + } else if (args[i] == '--help' || args[i] == '-h') { + log.console("Usage: cell diff_ir [--fn ] [--summary] ") + log.console("") + log.console(" --fn Filter to function by index or name") + log.console(" --summary Show counts only") + return null + } else if (!starts_with(args[i], '-')) { + filename = args[i] + } + i = i + 1 + } + + if (!filename) { + log.console("Usage: cell diff_ir [--fn ] [--summary] ") + return null + } + + mcode_ir = shop.mcode_file(filename) + opt_ir = shop.compile_file(filename) + + source_text = text(fd.slurp(filename)) + source_lines = array(source_text, "\n") + + var get_source_line = function(line_num) { + if (line_num < 1 || line_num > length(source_lines)) return null + return source_lines[line_num - 1] + } + + var fn_matches = function(index, name) { + var match = null + if (fn_filter == null) return true + if (index >= 0 && fn_filter == text(index)) return true + if (name != null) { + match = search(name, fn_filter) + if (match != null && match >= 0) return true + } + return false + } + + var fmt_instr = function(instr) { + var op = instr[0] + var n = length(instr) + var parts = [] + var j = 1 + var operands = null + var line_str = null + while (j < n - 2) { + push(parts, fmt_val(instr[j])) + j = j + 1 + } + operands = text(parts, ", ") + line_str = instr[n - 2] != null ? `:${text(instr[n - 2])}` : "" + return pad_right(`${pad_right(op, 15)}${operands}`, 45) + line_str + } + + var classify = function(before, after) { + var bn = 0 + var an = 0 + var k = 0 + if (is_text(after) && starts_with(after, "_nop_")) return "eliminated" + if (is_array(before) && is_array(after)) { + if (before[0] != after[0]) return "rewritten" + bn = length(before) + an = length(after) + if (bn != an) return "rewritten" + k = 1 + while (k < bn - 2) { + if (before[k] != after[k]) return "rewritten" + k = k + 1 + } + return "identical" + } + return "identical" + } + + var total_eliminated = 0 + var total_rewritten = 0 + var total_funcs = 0 + + var diff_function = function(mcode_func, opt_func, name, index) { + var nr_args = mcode_func.nr_args != null ? mcode_func.nr_args : 0 + var nr_slots = mcode_func.nr_slots != null ? mcode_func.nr_slots : 0 + var m_instrs = mcode_func.instructions + var o_instrs = opt_func.instructions + var eliminated = 0 + var rewritten = 0 + var mi = 0 + var oi = 0 + var pc = 0 + var m_instr = null + var o_instr = null + var kind = null + var last_line = null + var instr_line = null + var n = 0 + var src = null + var annotation = null + + if (m_instrs == null) m_instrs = [] + if (o_instrs == null) o_instrs = [] + + // First pass: count changes + mi = 0 + oi = 0 + while (mi < length(m_instrs) && oi < length(o_instrs)) { + m_instr = m_instrs[mi] + o_instr = o_instrs[oi] + + if (is_text(m_instr)) { + mi = mi + 1 + oi = oi + 1 + continue + } + + if (is_text(o_instr) && starts_with(o_instr, "_nop_")) { + if (is_array(m_instr)) { + eliminated = eliminated + 1 + } + mi = mi + 1 + oi = oi + 1 + continue + } + + if (is_array(m_instr) && is_array(o_instr)) { + kind = classify(m_instr, o_instr) + if (kind == "rewritten") { + rewritten = rewritten + 1 + } + } + mi = mi + 1 + oi = oi + 1 + } + + total_eliminated = total_eliminated + eliminated + total_rewritten = total_rewritten + rewritten + total_funcs = total_funcs + 1 + + if (show_summary) { + if (eliminated == 0 && rewritten == 0) { + log.compile(` ${pad_right(name + ":", 40)} 0 eliminated, 0 rewritten (unchanged)`) + } else { + log.compile(` ${pad_right(name + ":", 40)} ${text(eliminated)} eliminated, ${text(rewritten)} rewritten`) + } + return null + } + + if (eliminated == 0 && rewritten == 0) return null + + log.compile(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`) + log.compile(` ${text(eliminated)} eliminated, ${text(rewritten)} rewritten`) + + // Second pass: show diffs + mi = 0 + oi = 0 + pc = 0 + last_line = null + while (mi < length(m_instrs) && oi < length(o_instrs)) { + m_instr = m_instrs[mi] + o_instr = o_instrs[oi] + + if (is_text(m_instr) && !starts_with(m_instr, "_nop_")) { + mi = mi + 1 + oi = oi + 1 + continue + } + + if (is_text(m_instr) && starts_with(m_instr, "_nop_")) { + mi = mi + 1 + oi = oi + 1 + continue + } + + if (is_text(o_instr) && starts_with(o_instr, "_nop_")) { + if (is_array(m_instr)) { + n = length(m_instr) + instr_line = m_instr[n - 2] + if (instr_line != last_line && instr_line != null) { + src = get_source_line(instr_line) + if (src != null) src = trim(src) + if (last_line != null) log.compile("") + if (src != null && length(src) > 0) { + log.compile(` --- line ${text(instr_line)}: ${src} ---`) + } + last_line = instr_line + } + log.compile(` - ${pad_right(text(pc), 6)}${fmt_instr(m_instr)}`) + log.compile(` + ${pad_right(text(pc), 6)}${pad_right(o_instr, 45)} (eliminated)`) + } + mi = mi + 1 + oi = oi + 1 + pc = pc + 1 + continue + } + + if (is_array(m_instr) && is_array(o_instr)) { + kind = classify(m_instr, o_instr) + if (kind != "identical") { + n = length(m_instr) + instr_line = m_instr[n - 2] + if (instr_line != last_line && instr_line != null) { + src = get_source_line(instr_line) + if (src != null) src = trim(src) + if (last_line != null) log.compile("") + if (src != null && length(src) > 0) { + log.compile(` --- line ${text(instr_line)}: ${src} ---`) + } + last_line = instr_line + } + + annotation = "" + if (kind == "rewritten") { + if (search(o_instr[0], "_int") != null || search(o_instr[0], "_float") != null || search(o_instr[0], "_text") != null) { + annotation = "(specialized)" + } else { + annotation = "(rewritten)" + } + } + + log.compile(` - ${pad_right(text(pc), 6)}${fmt_instr(m_instr)}`) + log.compile(` + ${pad_right(text(pc), 6)}${fmt_instr(o_instr)} ${annotation}`) + } + pc = pc + 1 + } + + mi = mi + 1 + oi = oi + 1 + } + + return null + } + + // Process functions + main_name = mcode_ir.name != null ? mcode_ir.name : "
" + + if (mcode_ir.main != null && opt_ir.main != null) { + if (fn_matches(-1, main_name)) { + diff_function(mcode_ir.main, opt_ir.main, main_name, -1) + } + } + + if (mcode_ir.functions != null && opt_ir.functions != null) { + fi = 0 + while (fi < length(mcode_ir.functions) && fi < length(opt_ir.functions)) { + func = mcode_ir.functions[fi] + opt_func = opt_ir.functions[fi] + fname = func.name != null ? func.name : "" + if (fn_matches(fi, fname)) { + diff_function(func, opt_func, `[${text(fi)}] ${fname}`, fi) + } + fi = fi + 1 + } + } + + if (show_summary) { + log.compile(`\n total: ${text(total_eliminated)} eliminated, ${text(total_rewritten)} rewritten across ${text(total_funcs)} functions`) + } + + return null +} + +run() +$stop() diff --git a/docs/compiler-tools.md b/docs/compiler-tools.md index d1a62129..dc7d4d8c 100644 --- a/docs/compiler-tools.md +++ b/docs/compiler-tools.md @@ -30,6 +30,10 @@ Each stage has a corresponding CLI tool that lets you see its output. | streamline | `streamline.ce --ir` | Human-readable canonical IR | | disasm | `disasm.ce` | Source-interleaved disassembly | | disasm | `disasm.ce --optimized` | Optimized source-interleaved disassembly | +| diff | `diff_ir.ce` | Mcode vs streamline instruction diff | +| xref | `xref.ce` | Cross-reference / call creation graph | +| cfg | `cfg.ce` | Control flow graph (basic blocks) | +| slots | `slots.ce` | Slot data flow / use-def chains | | all | `ir_report.ce` | Structured optimizer flight recorder | All tools take a source file as input and run the pipeline up to the relevant stage. @@ -141,6 +145,160 @@ Function creation instructions include a cross-reference annotation showing the 3 function 5, 12 :235 ; -> [12] helper_fn ``` +## diff_ir.ce + +Compares mcode IR (before optimization) with streamline IR (after optimization), showing what the optimizer changed. Useful for understanding which instructions were eliminated, specialized, or rewritten. + +```bash +cell diff_ir # diff all functions +cell diff_ir --fn # diff only one function +cell diff_ir --summary # counts only +``` + +| Flag | Description | +|------|-------------| +| (none) | Show all diffs with source interleaving | +| `--fn ` | Filter to specific function by index or name | +| `--summary` | Show only eliminated/rewritten counts per function | + +### Output Format + +Changed instructions are shown in diff style with `-` (before) and `+` (after) lines: + +``` +=== [0] (args=1, slots=40) === + 17 eliminated, 51 rewritten + + --- line 4: if (n <= 1) { --- + - 1 is_int 4, 1 :4 + + 1 is_int 3, 1 :4 (specialized) + - 3 is_int 5, 2 :4 + + 3 _nop_tc_1 (eliminated) +``` + +Summary mode gives a quick overview: + +``` + [0] : 17 eliminated, 51 rewritten + [1] : 65 eliminated, 181 rewritten + total: 86 eliminated, 250 rewritten across 4 functions +``` + +## xref.ce + +Cross-reference / call graph tool. Shows which functions create other functions (via `function` instructions), building a creation tree. + +```bash +cell xref # full creation tree +cell xref --callers # who creates function [N]? +cell xref --callees # what does [N] create/call? +cell xref --dot # DOT graph for graphviz +cell xref --optimized # use optimized IR +``` + +| Flag | Description | +|------|-------------| +| (none) | Indented creation tree from main | +| `--callers ` | Show which functions create function [N] | +| `--callees ` | Show what function [N] creates (use -1 for main) | +| `--dot` | Output DOT format for graphviz | +| `--optimized` | Use optimized IR instead of raw mcode | + +### Output Format + +Default tree view: + +``` +demo_disasm.cm + [0] + [1] + [2] +``` + +Caller/callee query: + +``` +Callers of [0] : + demo_disasm.cm at line 3 +``` + +DOT output can be piped to graphviz: `cell xref --dot file.cm | dot -Tpng -o xref.png` + +## cfg.ce + +Control flow graph tool. Identifies basic blocks from labels and jumps, computes edges, and detects loop back-edges. + +```bash +cell cfg --fn # text CFG for function +cell cfg --dot --fn # DOT output for graphviz +cell cfg # text CFG for all functions +cell cfg --optimized # use optimized IR +``` + +| Flag | Description | +|------|-------------| +| `--fn ` | Filter to specific function by index or name | +| `--dot` | Output DOT format for graphviz | +| `--optimized` | Use optimized IR instead of raw mcode | + +### Output Format + +``` +=== [0] === + B0 [pc 0-2, line 4]: + 0 access 2, 1 + 1 is_int 4, 1 + 2 jump_false 4, "rel_ni_2" + -> B3 "rel_ni_2" (jump) + -> B1 (fallthrough) + + B1 [pc 3-4, line 4]: + 3 is_int 5, 2 + 4 jump_false 5, "rel_ni_2" + -> B3 "rel_ni_2" (jump) + -> B2 (fallthrough) +``` + +Each block shows its ID, PC range, source lines, instructions, and outgoing edges. Loop back-edges (target PC <= source PC) are annotated. + +## slots.ce + +Slot data flow analysis. Builds use-def chains for every slot in a function, showing where each slot is defined and used. Optionally captures type information from streamline. + +```bash +cell slots --fn # slot summary for function +cell slots --slot --fn # trace slot N +cell slots # slot summary for all functions +``` + +| Flag | Description | +|------|-------------| +| `--fn ` | Filter to specific function by index or name | +| `--slot ` | Show chronological DEF/USE trace for a specific slot | + +### Output Format + +Summary shows each slot with its def count, use count, inferred type, and first definition. Dead slots (defined but never used) are flagged: + +``` +=== [0] (args=1, slots=40) === + slot defs uses type first-def + s0 0 0 - (this) + s1 0 10 - (arg 0) + s2 1 6 - pc 0: access + s10 1 0 - pc 29: invoke <- dead +``` + +Slot trace (`--slot N`) shows every DEF and USE in program order: + +``` +=== slot 3 in [0] === + DEF pc 5: le_int 3, 1, 2 :4 + DEF pc 11: le_float 3, 1, 2 :4 + DEF pc 17: le_text 3, 1, 2 :4 + USE pc 31: jump_false 3, "if_else_0" :4 +``` + ## seed.ce Regenerates the boot seed files in `boot/`. These are pre-compiled mcode IR (JSON) files that bootstrap the compilation pipeline on cold start. diff --git a/slots.ce b/slots.ce new file mode 100644 index 00000000..a000a8fb --- /dev/null +++ b/slots.ce @@ -0,0 +1,357 @@ +// slots.ce — slot data flow / use-def chains +// +// Usage: +// cell slots --fn Slot summary for function +// cell slots --slot --fn Trace slot N in function +// cell slots Slot summary for all functions + +var shop = use("internal/shop") + +var pad_right = function(s, w) { + var r = s + while (length(r) < w) { + r = r + " " + } + return r +} + +var fmt_val = function(v) { + if (is_null(v)) return "null" + if (is_number(v)) return text(v) + if (is_text(v)) return `"${v}"` + if (is_object(v)) return text(v) + if (is_logical(v)) return v ? "true" : "false" + return text(v) +} + +// Classify instruction operands as DEF or USE +// Returns {defs: [operand_positions], uses: [operand_positions]} +// Positions are 1-based indices into the instruction array +var classify_operands = function(op) { + // Binary ops: DEF=[1], USE=[2,3] + if (op == "add" || op == "subtract" || op == "multiply" || op == "divide" || + op == "modulo" || op == "pow" || op == "remainder" || + op == "add_int" || op == "sub_int" || op == "mul_int" || op == "div_int" || + op == "mod_int" || op == "pow_int" || op == "rem_int" || + op == "add_float" || op == "sub_float" || op == "mul_float" || op == "div_float" || + op == "mod_float" || op == "pow_float" || + op == "eq" || op == "ne" || op == "lt" || op == "gt" || op == "le" || op == "ge" || + op == "eq_int" || op == "ne_int" || op == "lt_int" || op == "gt_int" || + op == "le_int" || op == "ge_int" || + op == "eq_float" || op == "ne_float" || op == "lt_float" || op == "gt_float" || + op == "le_float" || op == "ge_float" || + op == "eq_text" || op == "ne_text" || op == "lt_text" || op == "gt_text" || + op == "le_text" || op == "ge_text" || + op == "eq_bool" || op == "ne_bool" || + op == "concat" || + op == "bitand" || op == "bitor" || op == "bitxor" || + op == "shl" || op == "shr" || op == "ushr" || + op == "and" || op == "or" || + op == "is_identical") { + return {defs: [1], uses: [2, 3]} + } + + // Unary ops: DEF=[1], USE=[2] + if (op == "not" || op == "negate" || op == "neg_int" || op == "neg_float" || + op == "bitnot" || op == "typeof" || op == "length" || + op == "is_int" || op == "is_num" || op == "is_text" || op == "is_bool" || + op == "is_null" || op == "is_array" || op == "is_func" || op == "is_record" || + op == "is_stone" || op == "is_integer") { + return {defs: [1], uses: [2]} + } + + // Constants: DEF=[1], USE=[] + if (op == "int" || op == "true" || op == "false" || op == "null" || op == "access") { + return {defs: [1], uses: []} + } + + if (op == "move") return {defs: [1], uses: [2]} + if (op == "function") return {defs: [1], uses: []} + if (op == "array") return {defs: [1], uses: []} + if (op == "record") return {defs: [1], uses: []} + if (op == "frame") return {defs: [1], uses: [2]} + if (op == "setarg") return {defs: [], uses: [1, 3]} + if (op == "invoke") return {defs: [2], uses: [1]} + if (op == "tail_invoke" || op == "goinvoke") return {defs: [], uses: [1]} + if (op == "load_field") return {defs: [1], uses: [2]} + if (op == "store_field") return {defs: [], uses: [1, 3]} + if (op == "load_index" || op == "load_dynamic") return {defs: [1], uses: [2, 3]} + if (op == "store_index" || op == "store_dynamic") return {defs: [], uses: [1, 2, 3]} + if (op == "push") return {defs: [], uses: [1, 2]} + if (op == "pop") return {defs: [1], uses: [2]} + if (op == "jump_true" || op == "jump_false" || op == "jump_null" || op == "jump_not_null") return {defs: [], uses: [1]} + if (op == "jump") return {defs: [], uses: []} + if (op == "return") return {defs: [], uses: [1]} + if (op == "disrupt") return {defs: [], uses: []} + if (op == "get") return {defs: [1], uses: []} + if (op == "set_var") return {defs: [], uses: [1]} + + return {defs: [], uses: []} +} + +var run = function() { + var filename = null + var fn_filter = null + var slot_filter = null + var i = 0 + var compiled = null + var type_info = {} + var sl_log = null + var td = null + var main_name = null + var fi = 0 + var func = null + var fname = null + + while (i < length(args)) { + if (args[i] == '--fn') { + i = i + 1 + fn_filter = args[i] + } else if (args[i] == '--slot') { + i = i + 1 + slot_filter = number(args[i]) + } else if (args[i] == '--help' || args[i] == '-h') { + log.console("Usage: cell slots [--fn ] [--slot ] ") + log.console("") + log.console(" --fn Filter to function by index or name") + log.console(" --slot Trace a specific slot") + return null + } else if (!starts_with(args[i], '-')) { + filename = args[i] + } + i = i + 1 + } + + if (!filename) { + log.console("Usage: cell slots [--fn ] [--slot ] ") + return null + } + + compiled = shop.mcode_file(filename) + + // Try to get type info from streamline + var get_type_info = function() { + var mcode_copy = shop.mcode_file(filename) + var streamline = use("streamline") + var ti = 0 + sl_log = { + passes: [], + events: null, + type_deltas: [] + } + streamline(mcode_copy, sl_log) + if (sl_log.type_deltas != null) { + ti = 0 + while (ti < length(sl_log.type_deltas)) { + td = sl_log.type_deltas[ti] + if (td.fn != null) { + type_info[td.fn] = td.slot_types + } + ti = ti + 1 + } + } + return null + } disruption { + // Type info is optional + } + get_type_info() + + var fn_matches = function(index, name) { + var match = null + if (fn_filter == null) return true + if (index >= 0 && fn_filter == text(index)) return true + if (name != null) { + match = search(name, fn_filter) + if (match != null && match >= 0) return true + } + return false + } + + var analyze_function = function(func, name, index) { + var nr_args = func.nr_args != null ? func.nr_args : 0 + var nr_slots = func.nr_slots != null ? func.nr_slots : 0 + var instrs = func.instructions + var defs = {} + var uses = {} + var first_def = {} + var first_def_op = {} + var events = [] + var pc = 0 + var ii = 0 + var instr = null + var op = null + var n = 0 + var cls = null + var di = 0 + var ui = 0 + var slot_num = null + var operand_val = null + var parts = null + var j = 0 + var operands = null + var slot_types = null + var type_key = null + var ei = 0 + var evt = null + var found = false + var line_str = null + var si = 0 + var slot_key = null + var d_count = 0 + var u_count = 0 + var t = null + var first = null + var dead_marker = null + + if (instrs == null) instrs = [] + + // Walk instructions, build def/use chains + ii = 0 + while (ii < length(instrs)) { + instr = instrs[ii] + if (is_text(instr)) { + ii = ii + 1 + continue + } + if (!is_array(instr)) { + ii = ii + 1 + continue + } + + op = instr[0] + n = length(instr) + cls = classify_operands(op) + + di = 0 + while (di < length(cls.defs)) { + operand_val = instr[cls.defs[di]] + if (is_number(operand_val)) { + slot_num = text(operand_val) + if (!defs[slot_num]) defs[slot_num] = 0 + defs[slot_num] = defs[slot_num] + 1 + if (first_def[slot_num] == null) { + first_def[slot_num] = pc + first_def_op[slot_num] = op + } + push(events, {kind: "DEF", slot: operand_val, pc: pc, instr: instr}) + } + di = di + 1 + } + + ui = 0 + while (ui < length(cls.uses)) { + operand_val = instr[cls.uses[ui]] + if (is_number(operand_val)) { + slot_num = text(operand_val) + if (!uses[slot_num]) uses[slot_num] = 0 + uses[slot_num] = uses[slot_num] + 1 + push(events, {kind: "USE", slot: operand_val, pc: pc, instr: instr}) + } + ui = ui + 1 + } + + pc = pc + 1 + ii = ii + 1 + } + + // Get type info for this function + type_key = func.name != null ? func.name : name + if (type_info[type_key]) { + slot_types = type_info[type_key] + } + + // --slot mode: show trace + if (slot_filter != null) { + log.compile(`\n=== slot ${text(slot_filter)} in ${name} ===`) + ei = 0 + found = false + while (ei < length(events)) { + evt = events[ei] + if (evt.slot == slot_filter) { + found = true + n = length(evt.instr) + parts = [] + j = 1 + while (j < n - 2) { + push(parts, fmt_val(evt.instr[j])) + j = j + 1 + } + operands = text(parts, ", ") + line_str = evt.instr[n - 2] != null ? `:${text(evt.instr[n - 2])}` : "" + log.compile(` ${pad_right(evt.kind, 5)}pc ${pad_right(text(evt.pc) + ":", 6)} ${pad_right(evt.instr[0], 15)}${pad_right(operands, 30)}${line_str}`) + } + ei = ei + 1 + } + if (!found) { + log.compile(" (no activity)") + } + return null + } + + // Summary mode + log.compile(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`) + log.compile(` ${pad_right("slot", 8)}${pad_right("defs", 8)}${pad_right("uses", 8)}${pad_right("type", 12)}first-def`) + + si = 0 + while (si < nr_slots) { + slot_key = text(si) + d_count = defs[slot_key] != null ? defs[slot_key] : 0 + u_count = uses[slot_key] != null ? uses[slot_key] : 0 + + // Skip slots with no activity unless they're args or have type info + if (d_count == 0 && u_count == 0 && si >= nr_args + 1) { + si = si + 1 + continue + } + + t = "-" + if (slot_types != null && slot_types[slot_key] != null) { + t = slot_types[slot_key] + } + + first = "" + if (si == 0) { + first = "(this)" + } else if (si > 0 && si <= nr_args) { + first = `(arg ${text(si - 1)})` + } else if (first_def[slot_key] != null) { + first = `pc ${text(first_def[slot_key])}: ${first_def_op[slot_key]}` + } + + dead_marker = "" + if (d_count > 0 && u_count == 0 && si > nr_args) { + dead_marker = " <- dead" + } + + log.compile(` ${pad_right("s" + slot_key, 8)}${pad_right(text(d_count), 8)}${pad_right(text(u_count), 8)}${pad_right(t, 12)}${first}${dead_marker}`) + si = si + 1 + } + return null + } + + // Process functions + main_name = compiled.name != null ? compiled.name : "
" + + if (compiled.main != null) { + if (fn_matches(-1, main_name)) { + analyze_function(compiled.main, main_name, -1) + } + } + + if (compiled.functions != null) { + fi = 0 + while (fi < length(compiled.functions)) { + func = compiled.functions[fi] + fname = func.name != null ? func.name : "" + if (fn_matches(fi, fname)) { + analyze_function(func, `[${text(fi)}] ${fname}`, fi) + } + fi = fi + 1 + } + } + + return null +} + +run() +$stop() diff --git a/xref.ce b/xref.ce new file mode 100644 index 00000000..b8eabaea --- /dev/null +++ b/xref.ce @@ -0,0 +1,249 @@ +// xref.ce — cross-reference / call graph +// +// Usage: +// cell xref Full creation tree +// cell xref --callers Who creates function [N]? +// cell xref --callees What does [N] create/call? +// cell xref --optimized Use optimized IR +// cell xref --dot DOT graph for graphviz + +var shop = use("internal/shop") + +var run = function() { + var filename = null + var use_optimized = false + var show_callers = null + var show_callees = null + var show_dot = false + var i = 0 + var compiled = null + var creates = {} + var created_by = {} + var func_names = {} + var fi = 0 + var func = null + var fname = null + var main_name = null + var creators = null + var c = null + var line_info = null + var children = null + var ch = null + var ch_line = null + var parent_keys = null + var ki = 0 + var parent_idx = 0 + var ch_list = null + var ci = 0 + var printed = {} + + while (i < length(args)) { + if (args[i] == '--callers') { + i = i + 1 + show_callers = number(args[i]) + } else if (args[i] == '--callees') { + i = i + 1 + show_callees = number(args[i]) + } else if (args[i] == '--dot') { + show_dot = true + } else if (args[i] == '--optimized') { + use_optimized = true + } else if (args[i] == '--help' || args[i] == '-h') { + log.console("Usage: cell xref [--callers ] [--callees ] [--dot] [--optimized] ") + log.console("") + log.console(" --callers Who creates function [N]?") + log.console(" --callees What does [N] create/call?") + log.console(" --dot Output DOT format for graphviz") + log.console(" --optimized Use optimized IR") + return null + } else if (!starts_with(args[i], '-')) { + filename = args[i] + } + i = i + 1 + } + + if (!filename) { + log.console("Usage: cell xref [--callers ] [--callees ] [--dot] [--optimized] ") + return null + } + + if (use_optimized) { + compiled = shop.compile_file(filename) + } else { + compiled = shop.mcode_file(filename) + } + + main_name = compiled.name != null ? compiled.name : "
" + func_names["-1"] = main_name + + var scan_func = function(func, parent_idx) { + var instrs = func.instructions + var j = 0 + var instr = null + var n = 0 + var child_idx = null + var instr_line = null + if (instrs == null) return null + while (j < length(instrs)) { + instr = instrs[j] + if (is_array(instr) && instr[0] == "function") { + n = length(instr) + child_idx = instr[2] + instr_line = instr[n - 2] + if (!creates[text(parent_idx)]) { + creates[text(parent_idx)] = [] + } + push(creates[text(parent_idx)], {child: child_idx, line: instr_line}) + if (!created_by[text(child_idx)]) { + created_by[text(child_idx)] = [] + } + push(created_by[text(child_idx)], {parent: parent_idx, line: instr_line}) + } + j = j + 1 + } + return null + } + + if (compiled.main != null) { + scan_func(compiled.main, -1) + } + + if (compiled.functions != null) { + fi = 0 + while (fi < length(compiled.functions)) { + func = compiled.functions[fi] + fname = func.name != null ? func.name : "" + func_names[text(fi)] = fname + scan_func(func, fi) + fi = fi + 1 + } + } + + var func_label = function(idx) { + var name = func_names[text(idx)] + if (idx == -1) return main_name + if (name != null) return `[${text(idx)}] ${name}` + return `[${text(idx)}]` + } + + var safe_label = function(idx) { + var name = func_names[text(idx)] + if (name != null) return replace(name, '"', '\\"') + if (idx == -1) return main_name + return `func_${text(idx)}` + } + + var node_id = function(idx) { + if (idx == -1) return "main" + return `f${text(idx)}` + } + + // --callers mode + if (show_callers != null) { + creators = created_by[text(show_callers)] + log.compile(`\nCallers of ${func_label(show_callers)}:`) + if (creators == null || length(creators) == 0) { + log.compile(" (none - may be main or unreferenced)") + } else { + i = 0 + while (i < length(creators)) { + c = creators[i] + line_info = c.line != null ? ` at line ${text(c.line)}` : "" + log.compile(` ${func_label(c.parent)}${line_info}`) + i = i + 1 + } + } + return null + } + + // --callees mode + if (show_callees != null) { + children = creates[text(show_callees)] + log.compile(`\nCallees of ${func_label(show_callees)}:`) + if (children == null || length(children) == 0) { + log.compile(" (none)") + } else { + i = 0 + while (i < length(children)) { + ch = children[i] + ch_line = ch.line != null ? ` at line ${text(ch.line)}` : "" + log.compile(` ${func_label(ch.child)}${ch_line}`) + i = i + 1 + } + } + return null + } + + // --dot mode + if (show_dot) { + log.compile("digraph xref {") + log.compile(" rankdir=TB;") + log.compile(" node [shape=box, style=filled, fillcolor=lightyellow];") + + log.compile(` ${node_id(-1)} [label="${safe_label(-1)}"];`) + + if (compiled.functions != null) { + fi = 0 + while (fi < length(compiled.functions)) { + log.compile(` ${node_id(fi)} [label="${safe_label(fi)}"];`) + fi = fi + 1 + } + } + + parent_keys = array(creates) + ki = 0 + while (ki < length(parent_keys)) { + parent_idx = number(parent_keys[ki]) + ch_list = creates[parent_keys[ki]] + ci = 0 + while (ci < length(ch_list)) { + log.compile(` ${node_id(parent_idx)} -> ${node_id(ch_list[ci].child)};`) + ci = ci + 1 + } + ki = ki + 1 + } + + log.compile("}") + return null + } + + // Default: indented tree from main + var print_tree = function(idx, depth) { + var indent = "" + var d = 0 + var children = null + var ci = 0 + var child = null + while (d < depth) { + indent = indent + " " + d = d + 1 + } + + log.compile(`${indent}${func_label(idx)}`) + + if (printed[text(idx)]) { + log.compile(`${indent} (already shown)`) + return null + } + printed[text(idx)] = true + + children = creates[text(idx)] + if (children != null) { + ci = 0 + while (ci < length(children)) { + child = children[ci] + print_tree(child.child, depth + 1) + ci = ci + 1 + } + } + return null + } + + log.compile("") + print_tree(-1, 0) + + return null +} + +run() +$stop()