From 8d449e6fc6827ebb34f559b9d856ec29a71c7124 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Fri, 20 Feb 2026 12:40:49 -0600 Subject: [PATCH] better compiler warnings adn errors --- analyze.cm | 144 ++++++++++++++++++++++++++++ index.cm | 4 +- internal/engine.cm | 130 +++++++++++++++++++++++++- internal/shop.cm | 19 ++++ parse.cm | 7 ++ query.ce | 137 ++++++++++++++++++++------- query.cm | 55 ++--------- source/cell.c | 11 ++- streamline.ce | 37 +++++++- streamline.cm | 228 +++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 679 insertions(+), 93 deletions(-) create mode 100644 analyze.cm diff --git a/analyze.cm b/analyze.cm new file mode 100644 index 00000000..992f2833 --- /dev/null +++ b/analyze.cm @@ -0,0 +1,144 @@ +// analyze.cm — Static analysis over index data. +// +// All functions take an index object (from index.cm) and return structured results. +// Does not depend on streamline — operates purely on source-semantic data. + +var analyze = {} + +// Find all references to a name, with optional scope filter. +// scope: "top" (enclosing == null), "fn" (enclosing != null), null (all) +analyze.find_refs = function(idx, name, scope) { + var hits = [] + var i = 0 + var ref = null + while (i < length(idx.references)) { + ref = idx.references[i] + if (ref.name == name) { + if (scope == null) { + hits[] = ref + } else if (scope == "top" && ref.enclosing == null) { + hits[] = ref + } else if (scope == "fn" && ref.enclosing != null) { + hits[] = ref + } + } + i = i + 1 + } + return hits +} + +// Find all . usage patterns (channel analysis). +// Only counts unshadowed uses (name not declared as local var in scope). +analyze.channels = function(idx, name) { + var channels = {} + var summary = {} + var i = 0 + var cs = null + var callee = null + var prop = null + var prefix_dot = name + "." + while (i < length(idx.call_sites)) { + cs = idx.call_sites[i] + callee = cs.callee + if (callee != null && starts_with(callee, prefix_dot)) { + prop = text(callee, length(prefix_dot), length(callee)) + if (channels[prop] == null) { + channels[prop] = [] + } + channels[prop][] = {span: cs.span} + if (summary[prop] == null) { + summary[prop] = 0 + } + summary[prop] = summary[prop] + 1 + } + i = i + 1 + } + return {channels: channels, summary: summary} +} + +// Find declarations by name, with optional kind filter. +// kind: "var", "def", "fn", "param", or null (any) +analyze.find_decls = function(idx, name, kind) { + var hits = [] + var i = 0 + var sym = null + while (i < length(idx.symbols)) { + sym = idx.symbols[i] + if (sym.name == name) { + if (kind == null || sym.kind == kind) { + hits[] = sym + } + } + i = i + 1 + } + return hits +} + +// Find intrinsic usage by name. +analyze.find_intrinsic = function(idx, name) { + var hits = [] + var i = 0 + var ref = null + if (idx.intrinsic_refs == null) return hits + while (i < length(idx.intrinsic_refs)) { + ref = idx.intrinsic_refs[i] + if (ref.name == name) { + hits[] = ref + } + i = i + 1 + } + return hits +} + +// Call sites with >4 args — always a compile error (max arity is 4). +analyze.excess_args = function(idx) { + var hits = [] + var i = 0 + var cs = null + while (i < length(idx.call_sites)) { + cs = idx.call_sites[i] + if (cs.args_count > 4) { + hits[] = {span: cs.span, callee: cs.callee, args_count: cs.args_count} + } + i = i + 1 + } + return hits +} + +// Extract module export shape from index data (for cross-module analysis). +analyze.module_summary = function(idx) { + var exports = {} + var i = 0 + var j = 0 + var exp = null + var sym = null + var found = false + if (idx.exports == null) return {exports: exports} + while (i < length(idx.exports)) { + exp = idx.exports[i] + found = false + if (exp.symbol_id != null) { + j = 0 + while (j < length(idx.symbols)) { + sym = idx.symbols[j] + if (sym.symbol_id == exp.symbol_id) { + if (sym.kind == "fn" && sym.params != null) { + exports[exp.name] = {type: "function", arity: length(sym.params)} + } else { + exports[exp.name] = {type: sym.kind} + } + found = true + break + } + j = j + 1 + } + } + if (!found) { + exports[exp.name] = {type: "unknown"} + } + i = i + 1 + } + return {exports: exports} +} + +return analyze diff --git a/index.cm b/index.cm index 81874b63..82e95608 100644 --- a/index.cm +++ b/index.cm @@ -201,7 +201,9 @@ var index_ast = function(ast, tokens, filename) { if (node.expression.left != null && node.expression.left.kind == "name") { callee_name = node.expression.left.name } - if (node.expression.right != null && node.expression.right.name != null) { + if (is_text(node.expression.right)) { + callee_name = (callee_name != null ? callee_name + "." : "") + node.expression.right + } else if (node.expression.right != null && node.expression.right.name != null) { callee_name = (callee_name != null ? callee_name + "." : "") + node.expression.right.name } } diff --git a/internal/engine.cm b/internal/engine.cm index 81815b53..4a035cfa 100644 --- a/internal/engine.cm +++ b/internal/engine.cm @@ -182,9 +182,99 @@ function analyze(src, filename) { // Lazy-loaded verify_ir module (loaded on first use) var _verify_ir_mod = null +// Module summary extraction for cross-program analysis. +// Scans mcode IR for use() call patterns and attaches summaries. +// _summary_resolver is set after shop loads (null during bootstrap). +var _summary_resolver = null + +function extract_module_summaries(compiled) { + if (_summary_resolver == null) return null + var instrs = null + var summaries = [] + var i = 0 + var j = 0 + var n = 0 + var instr = null + var prev = null + var op = null + var use_slots = {} + var frame_map = {} + var arg_map = {} + var val_slot = 0 + var f_slot = 0 + var path = null + var result_slot = 0 + var summary = null + + if (compiled.main == null) return null + instrs = compiled.main.instructions + if (instrs == null) return null + n = length(instrs) + + // Pass 1: find access(slot, {make:"intrinsic", name:"use"}) + i = 0 + while (i < n) { + instr = instrs[i] + if (is_array(instr) && instr[0] == "access") { + if (is_object(instr[2]) && instr[2].make == "intrinsic" && instr[2].name == "use") { + use_slots[text(instr[1])] = true + } + } + i = i + 1 + } + + // Pass 2: find frame(frame_slot, use_slot), setarg with string, invoke + i = 0 + while (i < n) { + instr = instrs[i] + if (is_array(instr)) { + op = instr[0] + if (op == "frame" || op == "goframe") { + if (use_slots[text(instr[2])] == true) { + frame_map[text(instr[1])] = true + } + } else if (op == "setarg") { + if (frame_map[text(instr[1])] == true) { + val_slot = instr[3] + j = i - 1 + while (j >= 0) { + prev = instrs[j] + if (is_array(prev) && prev[0] == "access" && prev[1] == val_slot && is_text(prev[2])) { + arg_map[text(instr[1])] = prev[2] + break + } + j = j - 1 + } + } + } else if (op == "invoke" || op == "tail_invoke") { + f_slot = instr[1] + path = arg_map[text(f_slot)] + if (path != null) { + result_slot = instr[2] + summary = _summary_resolver(path) + if (summary != null) { + summaries[] = {slot: result_slot, summary: summary} + } + } + } + } + i = i + 1 + } + + if (length(summaries) > 0) return summaries + return null +} + // Run AST through mcode pipeline -> register VM function run_ast_fn(name, ast, env) { var compiled = mcode_mod(ast) + var ms = null + var optimized = null + var _di = 0 + var _diag = null + var _has_errors = false + var mcode_json = null + var mach_blob = null if (os._verify_ir) { if (_verify_ir_mod == null) { _verify_ir_mod = load_pipeline_module('verify_ir', pipeline_env) @@ -192,13 +282,31 @@ function run_ast_fn(name, ast, env) { compiled._verify = true compiled._verify_mod = _verify_ir_mod } - var optimized = streamline_mod(compiled) + if (!_no_warn) { + compiled._warn = true + ms = extract_module_summaries(compiled) + if (ms != null) { + compiled._module_summaries = ms + } + } + optimized = streamline_mod(compiled) if (optimized._verify) { delete optimized._verify delete optimized._verify_mod } - var mcode_json = json.encode(optimized) - var mach_blob = mach_compile_mcode_bin(name, mcode_json) + if (optimized._diagnostics != null && length(optimized._diagnostics) > 0) { + _di = 0 + _has_errors = false + while (_di < length(optimized._diagnostics)) { + _diag = optimized._diagnostics[_di] + print(`${_diag.file}:${text(_diag.line)}:${text(_diag.col)}: ${_diag.severity}: ${_diag.message}\n`) + if (_diag.severity == "error") _has_errors = true + _di = _di + 1 + } + if (_has_errors) disrupt + } + mcode_json = json.encode(optimized) + mach_blob = mach_compile_mcode_bin(name, mcode_json) return mach_load(mach_blob, env) } @@ -227,6 +335,9 @@ var _init = init if (_init != null && _init.native_mode) native_mode = true +// Inherit warn mode from init (set by C for --no-warn) +var _no_warn = (_init != null && _init.no_warn) ? true : false + // CLI path: convert args to init record if (args != null && (_init == null || !_init.program)) { _program = args[0] @@ -424,6 +535,19 @@ core_extras.native_mode = native_mode // NOW load shop -- it receives all of the above via env var shop = use_core('internal/shop') use_core('build') + +// Wire up module summary resolver now that shop is available +_summary_resolver = function(path) { + var resolved = shop.resolve_use_path(path, null) + if (resolved == null) return null + var summary_fn = function() { + return shop.summary_file(resolved) + } disruption { + return null + } + return summary_fn() +} + var time = use_core('time') var toml = use_core('toml') diff --git a/internal/shop.cm b/internal/shop.cm index 2d549f85..d66c9732 100644 --- a/internal/shop.cm +++ b/internal/shop.cm @@ -519,6 +519,7 @@ var _ast_cache = {} var _analyze_cache = {} var _compile_cache = {} var _index_cache = {} +var _summary_cache = {} var get_tokenize = function() { if (!_tokenize_mod) _tokenize_mod = use_cache['core/tokenize'] || use_cache['tokenize'] @@ -547,6 +548,14 @@ var get_index = function() { } return _index_mod } +var _analyze_mod = null +var get_analyze = function() { + if (!_analyze_mod) { + _analyze_mod = use_cache['core/analyze'] || use_cache['analyze'] + if (!_analyze_mod) _analyze_mod = Shop.use('analyze', 'core') + } + return _analyze_mod +} Shop.tokenize_file = function(path) { var src = text(fd.slurp(path)) @@ -611,6 +620,16 @@ Shop.index_file = function(path) { return idx } +Shop.summary_file = function(path) { + var src = text(fd.slurp(path)) + var key = content_hash(stone(blob(src))) + if (_summary_cache[key]) return _summary_cache[key] + var idx = Shop.index_file(path) + var summary = get_analyze().module_summary(idx) + _summary_cache[key] = summary + return summary +} + Shop.mcode_file = function(path) { var folded = Shop.analyze_file(path) return get_mcode()(folded) diff --git a/parse.cm b/parse.cm index 07c7f185..e5fc3d40 100644 --- a/parse.cm +++ b/parse.cm @@ -1727,6 +1727,13 @@ var parse = function(tokens, src, filename, tokenizer) { return null } + if (kind == "this") { + if (scope.function_nr == 0) { + sem_error(expr, "'this' cannot be used at the top level of a program") + } + return null + } + if (kind == "[") { sem_check_expr(scope, expr.left) sem_check_expr(scope, expr.right) diff --git a/query.ce b/query.ce index 490f93d0..b8d9fe98 100644 --- a/query.ce +++ b/query.ce @@ -1,29 +1,39 @@ // cell query — Semantic queries across packages. // // Usage: -// cell query --this [--top|--fn] [] this references -// cell query --intrinsic [] Find built-in intrinsic usage -// cell query --decl [] Variable declarations by name -// cell query --help Show usage +// cell query Find all references to +// cell query --top Top-level references only +// cell query --fn Inside-function references only +// cell query --channels Show . usage summary +// cell query --decl Find declarations of +// cell query --decl --fn Only function declarations +// cell query --intrinsic Find intrinsic usage +// cell query --excess-args Find call sites with >4 args +// cell query --help var shop = use('internal/shop') -var query_mod = use('query') +var analyze_mod = use('analyze') var fd = use('fd') var mode = null var name = null -var this_scope = null +var scope_filter = null +var kind_filter = null var pkg_filter = null var show_help = false var i = 0 for (i = 0; i < length(args); i++) { - if (args[i] == '--this') { - mode = "this" - } else if (args[i] == '--top') { - this_scope = "top" + if (args[i] == '--top') { + scope_filter = "top" } else if (args[i] == '--fn') { - this_scope = "fn" + if (mode == "decl") { + kind_filter = "fn" + } else { + scope_filter = "fn" + } + } else if (args[i] == '--channels') { + mode = "channels" } else if (args[i] == '--intrinsic') { mode = "intrinsic" if (i + 1 < length(args) && !starts_with(args[i + 1], '-')) { @@ -42,13 +52,26 @@ for (i = 0; i < length(args); i++) { log.error('--decl requires a name') mode = "error" } + } else if (args[i] == '--excess-args') { + mode = "excess_args" } else if (args[i] == '--help' || args[i] == '-h') { show_help = true } else if (!starts_with(args[i], '-')) { - pkg_filter = args[i] + if (name == null && mode == null) { + name = args[i] + mode = "refs" + } else { + pkg_filter = args[i] + } } } +// --channels requires a name from positional arg +if (mode == "channels" && name == null) { + log.error('--channels requires a name (e.g., cell query log --channels)') + mode = "error" +} + var all_files = null var files = [] var j = 0 @@ -56,6 +79,10 @@ var idx = null var hits = null var hit = null var k = 0 +var ch_result = null +var props = null +var prop = null +var parts = null // Use return pattern to avoid closure-over-object issue with disruption. var safe_index = function(path) { @@ -65,21 +92,24 @@ var safe_index = function(path) { } if (show_help) { - log.console("Usage: cell query [options] []") + log.console("Usage: cell query [options] [] []") log.console("") log.console("Semantic queries across packages.") log.console("") - log.console("Options:") - log.console(" --this All this references") - log.console(" --this --top Top-level this only (not inside functions)") - log.console(" --this --fn this inside functions only") - log.console(" --intrinsic Find built-in intrinsic usage (e.g., print)") - log.console(" --decl Variable declarations by name") + log.console("Commands:") + log.console(" Find all references to ") + log.console(" --top Top-level references only") + log.console(" --fn Inside-function references only") + log.console(" --channels Show . usage summary") + log.console(" --decl Find declarations of ") + log.console(" --decl --fn Only function declarations") + log.console(" --intrinsic Find intrinsic usage") + log.console(" --excess-args Find call sites with >4 args") log.console("") log.console("Without a package argument, searches all installed packages.") } else if (mode == null || mode == "error") { if (mode != "error") { - log.error('Specify --this, --intrinsic, or --decl. Use --help for usage.') + log.error('Specify a name or --decl/--intrinsic/--excess-args. Use --help for usage.') } } else { all_files = shop.all_script_paths() @@ -98,22 +128,61 @@ if (show_help) { idx = safe_index(files[j].full_path) if (idx == null) continue - hits = null - if (mode == "this") { - hits = query_mod.find_this(idx, this_scope) + if (mode == "refs") { + hits = analyze_mod.find_refs(idx, name, scope_filter) + if (hits != null && length(hits) > 0) { + for (k = 0; k < length(hits); k++) { + hit = hits[k] + if (hit.span != null) { + if (hit.enclosing != null) { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.span.from_row)}:${text(hit.span.from_col)}: ${hit.name} (in: ${hit.enclosing})`) + } else { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.span.from_row)}:${text(hit.span.from_col)}: ${hit.name} (top-level)`) + } + } + } + } + } else if (mode == "channels") { + ch_result = analyze_mod.channels(idx, name) + if (ch_result != null && ch_result.summary != null) { + props = array(ch_result.summary) + if (length(props) > 0) { + parts = [] + for (k = 0; k < length(props); k++) { + prop = props[k] + parts[] = `${prop}(${text(ch_result.summary[prop])})` + } + log.console(`${files[j].package}:${files[j].rel_path}: ${text(parts, " ")}`) + } + } } else if (mode == "intrinsic") { - hits = query_mod.intrinsic(idx, name) + hits = analyze_mod.find_intrinsic(idx, name) + if (hits != null && length(hits) > 0) { + for (k = 0; k < length(hits); k++) { + hit = hits[k] + if (hit.span != null) { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.span.from_row)}:${text(hit.span.from_col)}: ${hit.name}`) + } + } + } } else if (mode == "decl") { - hits = query_mod.find_decl(idx, name, null) - } - - if (hits != null && length(hits) > 0) { - for (k = 0; k < length(hits); k++) { - hit = hits[k] - if (hit.span != null) { - log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.span.from_row)}:${text(hit.span.from_col)}: ${hit.name}`) - } else if (hit.decl_span != null) { - log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.decl_span.from_row)}:${text(hit.decl_span.from_col)}: ${hit.kind} ${hit.name}`) + hits = analyze_mod.find_decls(idx, name, kind_filter) + if (hits != null && length(hits) > 0) { + for (k = 0; k < length(hits); k++) { + hit = hits[k] + if (hit.decl_span != null) { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.decl_span.from_row)}:${text(hit.decl_span.from_col)}: ${hit.kind} ${hit.name}`) + } + } + } + } else if (mode == "excess_args") { + hits = analyze_mod.excess_args(idx) + if (hits != null && length(hits) > 0) { + for (k = 0; k < length(hits); k++) { + hit = hits[k] + if (hit.span != null) { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.span.from_row)}:${text(hit.span.from_col)}: ${hit.callee}() called with ${text(hit.args_count)} args (max 4)`) + } } } } diff --git a/query.cm b/query.cm index 96d30be1..b955e1e1 100644 --- a/query.cm +++ b/query.cm @@ -1,62 +1,19 @@ -// query.cm — Semantic queries over index data. -// -// All functions take an index object (from index.cm) and return arrays of hits. +// query.cm — Backward-compatible wrapper delegating to analyze.cm. + +var analyze = use('analyze') var query = {} -// Find this references. scope: "top" (top-level only), "fn" (in functions), null (all). query.find_this = function(idx, scope) { - var hits = [] - var i = 0 - var ref = null - while (i < length(idx.references)) { - ref = idx.references[i] - if (ref.name == "this") { - if (scope == null) { - hits[] = ref - } else if (scope == "top" && ref.enclosing == null) { - hits[] = ref - } else if (scope == "fn" && ref.enclosing != null) { - hits[] = ref - } - } - i = i + 1 - } - return hits + return analyze.find_refs(idx, "this", scope) } -// Intrinsic usage: find refs to a built-in name (e.g., print). query.intrinsic = function(idx, name) { - var hits = [] - var i = 0 - var ref = null - if (idx.intrinsic_refs == null) return hits - while (i < length(idx.intrinsic_refs)) { - ref = idx.intrinsic_refs[i] - if (ref.name == name) { - hits[] = ref - } - i = i + 1 - } - return hits + return analyze.find_intrinsic(idx, name) } -// Variable declarations matching a name and optional kind filter. -// kind is one of "var", "def", "fn", "param", or null (any). query.find_decl = function(idx, name, kind) { - var hits = [] - var i = 0 - var sym = null - while (i < length(idx.symbols)) { - sym = idx.symbols[i] - if (sym.name == name) { - if (kind == null || sym.kind == kind) { - hits[] = sym - } - } - i = i + 1 - } - return hits + return analyze.find_decls(idx, name, kind) } return query diff --git a/source/cell.c b/source/cell.c index 0bb7d4c2..95a8174e 100644 --- a/source/cell.c +++ b/source/cell.c @@ -30,6 +30,7 @@ cell_rt *root_cell = NULL; static char *shop_path = NULL; static char *core_path = NULL; static int native_mode = 0; +static int warn_mode = 1; static JSRuntime *g_runtime = NULL; // Compute blake2b hash of data and return hex string (caller must free) @@ -523,6 +524,9 @@ int cell_init(int argc, char **argv) } else if (strcmp(argv[arg_start], "--native") == 0) { native_mode = 1; arg_start++; + } else if (strcmp(argv[arg_start], "--no-warn") == 0) { + warn_mode = 0; + arg_start++; } else { break; } @@ -661,11 +665,14 @@ int cell_init(int argc, char **argv) JS_SetPropertyStr(ctx, env_ref.val, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val)); tmp = js_core_json_use(ctx); JS_SetPropertyStr(ctx, env_ref.val, "json", tmp); - if (native_mode) { + if (native_mode || !warn_mode) { JSGCRef init_ref; JS_AddGCRef(ctx, &init_ref); init_ref.val = JS_NewObject(ctx); - JS_SetPropertyStr(ctx, init_ref.val, "native_mode", JS_NewBool(ctx, 1)); + if (native_mode) + JS_SetPropertyStr(ctx, init_ref.val, "native_mode", JS_NewBool(ctx, 1)); + if (!warn_mode) + JS_SetPropertyStr(ctx, init_ref.val, "no_warn", JS_NewBool(ctx, 1)); JS_SetPropertyStr(ctx, env_ref.val, "init", init_ref.val); JS_DeleteGCRef(ctx, &init_ref); } else { diff --git a/streamline.ce b/streamline.ce index 1d82f97a..3e6294c5 100644 --- a/streamline.ce +++ b/streamline.ce @@ -6,6 +6,7 @@ // cell streamline --ir Human-readable IR // cell streamline --check Warnings only (e.g. high slot count) // cell streamline --types Optimized IR with type annotations +// cell streamline --diagnose Run diagnostics (type errors/warnings) var fd = use("fd") var json = use("json") @@ -15,8 +16,11 @@ var show_stats = false var show_ir = false var show_check = false var show_types = false +var show_diagnose = false var filename = null var i = 0 +var di = 0 +var diag = null for (i = 0; i < length(args); i++) { if (args[i] == '--stats') { @@ -27,8 +31,10 @@ for (i = 0; i < length(args); i++) { show_check = true } else if (args[i] == '--types') { show_types = true + } else if (args[i] == '--diagnose') { + show_diagnose = true } else if (args[i] == '--help' || args[i] == '-h') { - log.console("Usage: cell streamline [--stats] [--ir] [--check] [--types] ") + log.console("Usage: cell streamline [--stats] [--ir] [--check] [--types] [--diagnose] ") $stop() } else if (!starts_with(args[i], '-')) { filename = args[i] @@ -36,7 +42,7 @@ for (i = 0; i < length(args); i++) { } if (!filename) { - print("usage: cell streamline [--stats] [--ir] [--check] [--types] ") + print("usage: cell streamline [--stats] [--ir] [--check] [--types] [--diagnose] ") $stop() } @@ -46,10 +52,19 @@ if (show_stats) { before = json.decode(json.encode(shop.mcode_file(filename))) } -var optimized = shop.compile_file(filename) +// For --diagnose, compile with _warn enabled to get diagnostics +var optimized = null +var compiled = null +if (show_diagnose) { + compiled = shop.mcode_file(filename) + compiled._warn = true + optimized = use('streamline')(compiled) +} else { + optimized = shop.compile_file(filename) +} // If no flags, default to full JSON output -if (!show_stats && !show_ir && !show_check && !show_types) { +if (!show_stats && !show_ir && !show_check && !show_types && !show_diagnose) { print(json.encode(optimized, true)) $stop() } @@ -343,4 +358,18 @@ if (show_stats) { print('---') } +if (show_diagnose) { + if (optimized._diagnostics != null && length(optimized._diagnostics) > 0) { + di = 0 + while (di < length(optimized._diagnostics)) { + diag = optimized._diagnostics[di] + print(`${diag.file}:${text(diag.line)}:${text(diag.col)}: ${diag.severity}: ${diag.message}`) + di = di + 1 + } + print(`\n${text(length(optimized._diagnostics))} diagnostic(s)`) + } else { + print("No diagnostics.") + } +} + $stop() diff --git a/streamline.cm b/streamline.cm index 9fe9adb5..0d0c2064 100644 --- a/streamline.cm +++ b/streamline.cm @@ -1829,6 +1829,227 @@ var streamline = function(ir, log) { return null } + // ========================================================= + // Pass: diagnose_function — emit diagnostics from type info + // Runs after dead code elimination on surviving instructions. + // ========================================================= + var diagnose_function = function(func, ctx, ir) { + var param_types = ctx.param_types + var write_types = ctx.write_types + var instructions = func.instructions + var nr_args = func.nr_args != null ? func.nr_args : 0 + var num_instr = 0 + var base_types = null + var cur_types = null + var i = 0 + var j = 0 + var instr = null + var op = null + var n = 0 + var line = 0 + var col = 0 + var known = null + var filename = ir.filename != null ? ir.filename : "" + var frame_callee = {} + var frame_argc = {} + var callee_slot = null + var obj_type = null + var key_type = null + var module_slots = {} + var slot_arity = {} + var ms_i = 0 + var ms = null + var exp_info = null + var f_slot_key = null + var cs = null + var argc = null + var known_arity = null + + // Build module_slots map from ir._module_summaries + if (ir._module_summaries != null) { + ms_i = 0 + while (ms_i < length(ir._module_summaries)) { + ms = ir._module_summaries[ms_i] + module_slots[text(ms.slot)] = ms.summary + ms_i = ms_i + 1 + } + } + + if (instructions == null || length(instructions) == 0) return null + + num_instr = length(instructions) + + // Pre-compute base types from params + write-invariant types + base_types = array(func.nr_slots) + j = 1 + while (j <= nr_args) { + if (param_types != null && param_types[j] != null) { + base_types[j] = param_types[j] + } + j = j + 1 + } + if (write_types != null) { + j = 0 + while (j < length(write_types)) { + if (write_types[j] != null) { + base_types[j] = write_types[j] + } + j = j + 1 + } + } + + cur_types = array(base_types) + + var emit = function(severity, line, col, message) { + ir._diagnostics[] = { + severity: severity, + file: filename, + line: line, + col: col, + message: message + } + } + + i = 0 + while (i < num_instr) { + instr = instructions[i] + + if (is_text(instr)) { + // Label — reset types to base + if (!starts_with(instr, "_nop_")) { + cur_types = array(base_types) + } + i = i + 1 + continue + } + + if (!is_array(instr)) { + i = i + 1 + continue + } + + op = instr[0] + n = length(instr) + line = instr[n - 2] + col = instr[n - 1] + + // Track frame/invoke correlation + if (op == "frame" || op == "goframe") { + frame_callee[text(instr[1])] = instr[2] + if (n > 4) { + frame_argc[text(instr[1])] = instr[3] + } + } + + // --- Error checks (proven to always disrupt) --- + + if (op == "frame" || op == "goframe") { + callee_slot = instr[2] + known = cur_types[callee_slot] + if (known == T_NULL) { + emit("error", line, col, "invoking null — will always disrupt") + } else if (known != null && known != T_UNKNOWN && known != T_FUNCTION) { + emit("error", line, col, `invoking ${known} — will always disrupt`) + } + } + + if (op == "invoke" || op == "tail_invoke") { + f_slot_key = text(instr[1]) + cs = frame_callee[f_slot_key] + argc = frame_argc[f_slot_key] + if (cs != null && argc != null) { + known_arity = slot_arity[text(cs)] + if (known_arity != null) { + if (argc > known_arity) { + emit("error", line, col, `function expects ${text(known_arity)} args, called with ${text(argc)}`) + } else if (argc < known_arity) { + emit("warning", line, col, `function expects ${text(known_arity)} args, called with ${text(argc)}`) + } + } + } + } + + if (op == "store_field") { + obj_type = cur_types[instr[1]] + if (obj_type == T_TEXT) { + emit("error", line, col, "storing property on text — text is immutable") + } else if (obj_type == T_ARRAY) { + emit("error", line, col, "storing named property on array — use index or push") + } + } + + if (op == "store_index") { + obj_type = cur_types[instr[1]] + if (obj_type == T_TEXT) { + emit("error", line, col, "storing index on text — text is immutable") + } else if (obj_type == T_RECORD) { + emit("error", line, col, "storing numeric index on record — use text key") + } + } + + if (op == "store_dynamic") { + obj_type = cur_types[instr[1]] + if (obj_type == T_TEXT) { + emit("error", line, col, "storing on text — text is immutable") + } + } + + if (op == "push") { + obj_type = cur_types[instr[1]] + if (obj_type != null && obj_type != T_UNKNOWN && obj_type != T_ARRAY) { + emit("error", line, col, `push on ${obj_type} — only arrays support push`) + } + } + + // Note: arithmetic (add/subtract/etc), bitwise, and concat ops are NOT + // checked here because the mcode generator emits type-dispatch guards + // before these instructions. The guards ensure correct types at runtime. + + // --- Warning checks (likely bug) --- + + if (op == "load_field") { + obj_type = cur_types[instr[2]] + if (obj_type == T_ARRAY) { + emit("warning", line, col, "named property access on array — always returns null") + } else if (obj_type == T_TEXT) { + emit("warning", line, col, "named property access on text — always returns null") + } + // Cross-module: check if obj is a module with known exports + ms = module_slots[text(instr[2])] + if (ms != null && ms.exports != null && is_text(instr[3])) { + exp_info = ms.exports[instr[3]] + if (exp_info == null) { + emit("warning", line, col, `module does not export '${instr[3]}'`) + } else if (exp_info.type == "function") { + cur_types[instr[1]] = T_FUNCTION + slot_arity[text(instr[1])] = exp_info.arity + } + } + } + + if (op == "load_dynamic") { + obj_type = cur_types[instr[2]] + key_type = cur_types[instr[3]] + if (obj_type == T_ARRAY && key_type == T_TEXT) { + emit("warning", line, col, "text key on array — always returns null") + } + if (obj_type == T_TEXT && key_type != null && key_type != T_UNKNOWN && key_type != T_INT) { + emit("warning", line, col, `${key_type} key on text — requires integer index`) + } + if (obj_type == T_RECORD && key_type != null && key_type != T_UNKNOWN && key_type != T_TEXT) { + emit("warning", line, col, `${key_type} key on record — requires text key`) + } + } + + // Update types for this instruction + track_types(cur_types, instr) + + i = i + 1 + } + + return null + } + // ========================================================= // Compose all passes // ========================================================= @@ -1905,6 +2126,9 @@ var streamline = function(ir, log) { } run_cycle("") + if (ir._warn) { + diagnose_function(func, {param_types: param_types, write_types: write_types}, ir) + } return null } @@ -1914,6 +2138,10 @@ var streamline = function(ir, log) { // eliminator to mis-optimize comparisons on closure-written variables. mark_closure_writes(ir) + if (ir._warn) { + ir._diagnostics = [] + } + // Process main function if (ir.main != null) { optimize_function(ir.main, log)