diff --git a/disasm.ce b/disasm.ce new file mode 100644 index 00000000..2eb5ab09 --- /dev/null +++ b/disasm.ce @@ -0,0 +1,265 @@ +// disasm.ce — source-interleaved disassembly +// +// Usage: +// cell disasm Disassemble all functions (mcode) +// cell disasm --optimized Disassemble optimized IR (streamline) +// cell disasm --fn Show only function N or named function +// cell disasm --line Show instructions from source line N + +var fd = use("fd") +var shop = use("internal/shop") + +var pad_right = function(s, w) { + var r = s + while (length(r) < w) { + r = r + " " + } + return r +} + +var fmt_val = function(v) { + if (is_null(v)) return "null" + if (is_number(v)) return text(v) + if (is_text(v)) return `"${v}"` + if (is_object(v)) return text(v) + if (is_logical(v)) return v ? "true" : "false" + return text(v) +} + +var run = function() { + var use_optimized = false + var fn_filter = null + var line_filter = null + var filename = null + var i = 0 + var compiled = null + var source_text = null + var source_lines = null + var main_name = null + var fi = 0 + var func = null + var fname = null + + while (i < length(args)) { + if (args[i] == '--optimized') { + use_optimized = true + } else if (args[i] == '--fn') { + i = i + 1 + fn_filter = args[i] + } else if (args[i] == '--line') { + i = i + 1 + line_filter = number(args[i]) + } else if (args[i] == '--help' || args[i] == '-h') { + log.console("Usage: cell disasm [--optimized] [--fn ] [--line ] ") + log.console("") + log.console(" --optimized Use optimized IR (streamline) instead of raw mcode") + log.console(" --fn Filter to function by index or name") + log.console(" --line Show only instructions from source line N") + return null + } else if (!starts_with(args[i], '-')) { + filename = args[i] + } + i = i + 1 + } + + if (!filename) { + log.console("Usage: cell disasm [--optimized] [--fn ] [--line ] ") + return null + } + + // Compile + + if (use_optimized) { + compiled = shop.compile_file(filename) + } else { + compiled = shop.mcode_file(filename) + } + + // Read source file + + source_text = text(fd.slurp(filename)) + source_lines = array(source_text, "\n") + + // Helpers + + var get_source_line = function(line_num) { + if (line_num < 1 || line_num > length(source_lines)) return null + return source_lines[line_num - 1] + } + + var first_instr_line = function(func) { + var instrs = func.instructions + var i = 0 + var n = 0 + if (instrs == null) return null + while (i < length(instrs)) { + if (is_array(instrs[i])) { + n = length(instrs[i]) + return instrs[i][n - 2] + } + i = i + 1 + } + return null + } + + var func_has_line = function(func, target) { + var instrs = func.instructions + var i = 0 + var n = 0 + if (instrs == null) return false + while (i < length(instrs)) { + if (is_array(instrs[i])) { + n = length(instrs[i]) + if (instrs[i][n - 2] == target) return true + } + i = i + 1 + } + return false + } + + var fn_matches = function(index, name) { + var match = null + if (fn_filter == null) return true + if (index >= 0 && fn_filter == text(index)) return true + if (name != null) { + match = search(name, fn_filter) + if (match != null && match >= 0) return true + } + return false + } + + var func_name_by_index = function(fi) { + var f = null + if (compiled.functions == null) return null + if (fi < 0 || fi >= length(compiled.functions)) return null + f = compiled.functions[fi] + return f.name + } + + var dump_function = function(func, name, index) { + var nr_args = func.nr_args != null ? func.nr_args : 0 + var nr_slots = func.nr_slots != null ? func.nr_slots : 0 + var nr_close = func.nr_close_slots != null ? func.nr_close_slots : 0 + var instrs = func.instructions + var start_line = first_instr_line(func) + var header = null + var i = 0 + var pc = 0 + var instr = null + var op = null + var n = 0 + var parts = null + var j = 0 + var operands = null + var instr_line = null + var last_line = null + var src = null + var line_str = null + var instr_text = null + var target_name = null + + header = `\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}, closures=${text(nr_close)})` + if (start_line != null) { + header = header + ` [line ${text(start_line)}]` + } + header = header + " ===" + log.compile(header) + + if (instrs == null || length(instrs) == 0) { + log.compile(" (empty)") + return null + } + + while (i < length(instrs)) { + instr = instrs[i] + if (is_text(instr)) { + if (!starts_with(instr, "_nop_") && line_filter == null) { + log.compile(` ${instr}:`) + } + } else if (is_array(instr)) { + op = instr[0] + n = length(instr) + instr_line = instr[n - 2] + + if (line_filter != null && instr_line != line_filter) { + pc = pc + 1 + i = i + 1 + continue + } + + if (instr_line != last_line && instr_line != null) { + src = get_source_line(instr_line) + if (src != null) { + src = trim(src) + } + if (last_line != null) { + log.compile("") + } + if (src != null && length(src) > 0) { + log.compile(` --- line ${text(instr_line)}: ${src} ---`) + } else { + log.compile(` --- line ${text(instr_line)} ---`) + } + last_line = instr_line + } + + parts = [] + j = 1 + while (j < n - 2) { + push(parts, fmt_val(instr[j])) + j = j + 1 + } + operands = text(parts, ", ") + line_str = instr_line != null ? `:${text(instr_line)}` : "" + instr_text = ` ${pad_right(text(pc), 6)}${pad_right(op, 15)}${operands}` + + // Cross-reference for function creation instructions + target_name = null + if (op == "function" && n >= 5) { + target_name = func_name_by_index(instr[2]) + } + if (target_name != null) { + instr_text = pad_right(instr_text, 65) + line_str + ` ; -> [${text(instr[2])}] ${target_name}` + } else { + instr_text = pad_right(instr_text, 65) + line_str + } + + log.compile(instr_text) + pc = pc + 1 + } + i = i + 1 + } + return null + } + + // Process functions + + main_name = compiled.name != null ? compiled.name : "
" + + if (compiled.main != null) { + if (fn_matches(-1, main_name)) { + if (line_filter == null || func_has_line(compiled.main, line_filter)) { + dump_function(compiled.main, main_name, -1) + } + } + } + + if (compiled.functions != null) { + fi = 0 + while (fi < length(compiled.functions)) { + func = compiled.functions[fi] + fname = func.name != null ? func.name : "" + if (fn_matches(fi, fname)) { + if (line_filter == null || func_has_line(func, line_filter)) { + dump_function(func, `[${text(fi)}] ${fname}`, fi) + } + } + fi = fi + 1 + } + } + + return null +} + +run() +$stop() diff --git a/docs/compiler-tools.md b/docs/compiler-tools.md index 774df046..d1a62129 100644 --- a/docs/compiler-tools.md +++ b/docs/compiler-tools.md @@ -28,6 +28,8 @@ Each stage has a corresponding CLI tool that lets you see its output. | streamline | `streamline.ce --types` | Optimized IR with type annotations | | streamline | `streamline.ce --stats` | Per-function summary stats | | streamline | `streamline.ce --ir` | Human-readable canonical IR | +| disasm | `disasm.ce` | Source-interleaved disassembly | +| disasm | `disasm.ce --optimized` | Optimized source-interleaved disassembly | | all | `ir_report.ce` | Structured optimizer flight recorder | All tools take a source file as input and run the pipeline up to the relevant stage. @@ -38,6 +40,9 @@ All tools take a source file as input and run the pipeline up to the relevant st # see raw mcode IR (pretty-printed) cell mcode --pretty myfile.ce +# source-interleaved disassembly +cell disasm myfile.ce + # see optimized IR with type annotations cell streamline --types myfile.ce @@ -86,6 +91,56 @@ cell streamline --diagnose # compile-time diagnostics Flags can be combined. +## disasm.ce + +Source-interleaved disassembly. Shows mcode or optimized IR with source lines interleaved, making it easy to see which instructions were generated from which source code. + +```bash +cell disasm # disassemble all functions (mcode) +cell disasm --optimized # disassemble optimized IR (streamline) +cell disasm --fn 87 # show only function 87 +cell disasm --fn my_func # show only functions named "my_func" +cell disasm --line 235 # show instructions generated from line 235 +``` + +| Flag | Description | +|------|-------------| +| (none) | Raw mcode IR with source interleaving (default) | +| `--optimized` | Use optimized IR (streamline) instead of raw mcode | +| `--fn ` | Filter to specific function by index or name substring | +| `--line ` | Show only instructions generated from a specific source line | + +### Output Format + +Functions are shown with a header including argument count, slot count, and the source line where the function begins. Instructions are grouped by source line, with the source text shown before each group: + +``` +=== [87] (args=0, slots=12, closures=0) [line 234] === + + --- line 235: var result = compute(x, y) --- + 0 access 2, "compute" :235 + 1 get 3, 1, 0 :235 + 2 get 4, 1, 1 :235 + 3 invoke 3, 2, 2 :235 + + --- line 236: if (result > 0) { --- + 4 access 5, 0 :236 + 5 gt 6, 4, 5 :236 + 6 jump_false 6, "else_1" :236 +``` + +Each instruction line shows: +- Program counter (left-aligned) +- Opcode +- Operands (comma-separated) +- Source line number (`:N` suffix, right-aligned) + +Function creation instructions include a cross-reference annotation showing the target function's name: + +``` + 3 function 5, 12 :235 ; -> [12] helper_fn +``` + ## seed.ce Regenerates the boot seed files in `boot/`. These are pre-compiled mcode IR (JSON) files that bootstrap the compilation pipeline on cold start.