From bd7f9f34ec48a88fc7668437d73b3df9bd5a82c6 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 18 Feb 2026 10:46:47 -0600 Subject: [PATCH] simplify compilation requestors --- build.cm | 31 ++--------- compare_aot.ce | 2 +- docs/semantic-index.md | 18 +++---- dump_ir.ce | 15 +----- index.cm | 15 +----- internal/shop.cm | 36 +++++++++---- mcode.ce | 4 +- query.ce | 116 +++++++++++++++++++++++++++++++++++++++++ query.cm | 56 ++++++++++++++++++++ streamline.ce | 10 ++-- 10 files changed, 219 insertions(+), 84 deletions(-) create mode 100644 query.ce create mode 100644 query.cm diff --git a/build.cm b/build.cm index 41ae6af0..8f343470 100644 --- a/build.cm +++ b/build.cm @@ -614,23 +614,11 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so') var cc = tc.c - // Step 1: Read source and compile through pipeline - var content = fd.slurp(src_path) - var src = text(content) - var tokenize = use('tokenize') - var parse = use('parse') - var fold = use('fold') - var mcode_mod = use('mcode') - var streamline_mod = use('streamline') + // Step 1: Compile through pipeline + var optimized = shop.compile_file(src_path) var qbe_macros = use('qbe') var qbe_emit = use('qbe_emit') - var tok_result = tokenize(src, src_path) - var ast = parse(tok_result.tokens, src, src_path, tokenize) - var folded = fold(ast) - var compiled = mcode_mod(folded) - var optimized = streamline_mod(compiled) - // Step 2: Generate QBE IL var sym_name = null if (pkg) { @@ -639,7 +627,7 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { var il_parts = qbe_emit(optimized, qbe_macros, sym_name) // Content hash for cache key - var hash = content_hash(src + '\n' + _target + '\nnative') + var hash = content_hash(text(fd.slurp(src_path)) + '\n' + _target + '\nnative') var build_dir = get_build_dir() ensure_dir(build_dir) @@ -788,19 +776,8 @@ Build.compile_cm_to_mach = function(src_path) { if (!fd.is_file(src_path)) { print('Source file not found: ' + src_path); disrupt } - var src = text(fd.slurp(src_path)) - var tokenize = use('tokenize') - var parse = use('parse') - var fold = use('fold') - var mcode_mod = use('mcode') - var streamline_mod = use('streamline') var json = use('json') - - var tok_result = tokenize(src, src_path) - var ast = parse(tok_result.tokens, src, src_path, tokenize) - var folded = fold(ast) - var compiled = mcode_mod(folded) - var optimized = streamline_mod(compiled) + var optimized = shop.compile_file(src_path) return mach_compile_mcode_bin(src_path, json.encode(optimized)) } diff --git a/compare_aot.ce b/compare_aot.ce index e0539e73..12a83b99 100644 --- a/compare_aot.ce +++ b/compare_aot.ce @@ -33,7 +33,7 @@ if (!fd_mod.is_file(file)) { var abs = fd_mod.realpath(file) -// Shared compilation front-end +// Shared compilation front-end — uses raw modules for per-stage timing var tokenize = use('tokenize') var parse_mod = use('parse') var fold = use('fold') diff --git a/docs/semantic-index.md b/docs/semantic-index.md index 09c183ec..6c77bf81 100644 --- a/docs/semantic-index.md +++ b/docs/semantic-index.md @@ -217,21 +217,21 @@ This tells you: `connect` is a function taking `(from, to, label)`, declared on The index and explain modules can be used directly from ƿit scripts: -### index.cm +### Via shop (recommended) ```javascript -var tokenize_mod = use('tokenize') -var parse_mod = use('parse') -var fold_mod = use('fold') -var index_mod = use('index') - -var pipeline = {tokenize: tokenize_mod, parse: parse_mod, fold: fold_mod} -var idx = index_mod.index_file(src, filename, pipeline) +var shop = use('internal/shop') +var idx = shop.index_file(path) ``` -`index_file` runs the full pipeline (tokenize, parse, fold) and returns the index. If you already have a parsed AST and tokens, use `index_ast` instead: +`shop.index_file` runs the full pipeline (tokenize, parse, index, resolve imports) and caches the result. + +### index.cm (direct) + +If you already have a parsed AST and tokens, use `index_ast` directly: ```javascript +var index_mod = use('index') var idx = index_mod.index_ast(ast, tokens, filename) ``` diff --git a/dump_ir.ce b/dump_ir.ce index b138000e..c54ee2bb 100644 --- a/dump_ir.ce +++ b/dump_ir.ce @@ -1,18 +1,7 @@ -var tokenize = use('tokenize') -var parse_mod = use('parse') -var fold = use('fold') -var mcode_mod = use('mcode') -var streamline_mod = use('streamline') +var shop = use('internal/shop') var json = use('json') -var fd = use('fd') -var file = args[0] -var src = text(fd.slurp(file)) -var tok = tokenize(src, file) -var ast = parse_mod(tok.tokens, src, file, tokenize) -var folded = fold(ast) -var compiled = mcode_mod(folded) -var optimized = streamline_mod(compiled) +var optimized = shop.compile_file(args[0]) var instrs = optimized.main.instructions var i = 0 diff --git a/index.cm b/index.cm index 4a5ed9f9..3b3e00d7 100644 --- a/index.cm +++ b/index.cm @@ -1,8 +1,7 @@ // index.cm — Core semantic indexing module. // Walks AST output from parse (+ optional fold) to build a semantic index. // -// Two entry points: -// index_file(src, filename, tokenize_mod, parse_mod, fold_mod) — full pipeline +// Entry point: // index_ast(ast, tokens, filename) — index a pre-parsed AST var make_span = function(node) { @@ -638,18 +637,6 @@ var index_ast = function(ast, tokens, filename) { } } -// Run the full pipeline (tokenize -> parse -> fold) and index. -// pipeline is {tokenize, parse, fold} — pass fold as null to skip folding. -var index_file = function(src, filename, pipeline) { - var tok_result = pipeline.tokenize(src, filename) - var ast = pipeline.parse(tok_result.tokens, src, filename, pipeline.tokenize) - if (pipeline.fold != null) { - ast = pipeline.fold(ast) - } - return index_ast(ast, tok_result.tokens, filename) -} - return { - index_file: index_file, index_ast: index_ast } diff --git a/internal/shop.cm b/internal/shop.cm index f78e46e4..a9620639 100644 --- a/internal/shop.cm +++ b/internal/shop.cm @@ -515,11 +515,14 @@ function inject_env(inject) { var _tokenize_mod = null var _parse_mod = null var _fold_mod = null +var _mcode_mod = null +var _streamline_mod = null var _index_mod = null var _token_cache = {} var _ast_cache = {} var _analyze_cache = {} +var _compile_cache = {} var _index_cache = {} var get_tokenize = function() { @@ -534,6 +537,14 @@ var get_fold = function() { if (!_fold_mod) _fold_mod = use_cache['core/fold'] || use_cache['fold'] return _fold_mod } +var get_mcode = function() { + if (!_mcode_mod) _mcode_mod = use_cache['core/mcode'] || use_cache['mcode'] + return _mcode_mod +} +var get_streamline = function() { + if (!_streamline_mod) _streamline_mod = use_cache['core/streamline'] || use_cache['streamline'] + return _streamline_mod +} var get_index = function() { if (!_index_mod) { _index_mod = use_cache['core/index'] || use_cache['index'] @@ -598,21 +609,26 @@ Shop.index_file = function(path) { var key = content_hash(stone(blob(src))) if (_index_cache[key]) return _index_cache[key] var tok = Shop.tokenize_file(path) - var pipeline = {tokenize: get_tokenize(), parse: get_parse(), fold: get_fold()} - var idx = get_index().index_file(src, path, pipeline) + var ast = get_parse()(tok.tokens, src, path, get_tokenize()) + var idx = get_index().index_ast(ast, tok.tokens, path) Shop.resolve_imports(idx, path) _index_cache[key] = idx return idx } -Shop.pipeline = function() { - return { - tokenize: get_tokenize(), - parse: get_parse(), - fold: get_fold(), - mcode: use_cache['core/mcode'] || use_cache['mcode'], - streamline: use_cache['core/streamline'] || use_cache['streamline'] - } +Shop.mcode_file = function(path) { + var folded = Shop.analyze_file(path) + return get_mcode()(folded) +} + +Shop.compile_file = function(path) { + var src = text(fd.slurp(path)) + var key = content_hash(stone(blob(src))) + if (_compile_cache[key]) return _compile_cache[key] + var compiled = Shop.mcode_file(path) + var optimized = get_streamline()(compiled) + _compile_cache[key] = optimized + return optimized } Shop.all_script_paths = function() { diff --git a/mcode.ce b/mcode.ce index 44d3e240..ca2e0fbd 100644 --- a/mcode.ce +++ b/mcode.ce @@ -28,9 +28,7 @@ if (!filename) { $stop() } -var folded = shop.analyze_file(filename) -var pl = shop.pipeline() -var compiled = pl.mcode(folded) +var compiled = shop.mcode_file(filename) if (!show_pretty) { print(json.encode(compiled)) diff --git a/query.ce b/query.ce new file mode 100644 index 00000000..ea7a790e --- /dev/null +++ b/query.ce @@ -0,0 +1,116 @@ +// cell query — Semantic queries across packages. +// +// Usage: +// cell query --this [] Top-level this references +// cell query --intrinsic [] Find built-in intrinsic usage +// cell query --decl [] Variable declarations by name +// cell query --help Show usage + +var shop = use('internal/shop') +var query_mod = use('query') +var fd = use('fd') + +var mode = null +var name = null +var pkg_filter = null +var show_help = false +var i = 0 + +for (i = 0; i < length(args); i++) { + if (args[i] == '--this') { + mode = "this" + } else if (args[i] == '--intrinsic') { + mode = "intrinsic" + if (i + 1 < length(args) && !starts_with(args[i + 1], '-')) { + name = args[i + 1] + i = i + 1 + } else { + log.error('--intrinsic requires a name') + mode = "error" + } + } else if (args[i] == '--decl') { + mode = "decl" + if (i + 1 < length(args) && !starts_with(args[i + 1], '-')) { + name = args[i + 1] + i = i + 1 + } else { + log.error('--decl requires a name') + mode = "error" + } + } else if (args[i] == '--help' || args[i] == '-h') { + show_help = true + } else if (!starts_with(args[i], '-')) { + pkg_filter = args[i] + } +} + +var all_files = null +var files = [] +var j = 0 +var idx = null +var hits = null +var hit = null +var k = 0 + +// Use return pattern to avoid closure-over-object issue with disruption. +var safe_index = function(path) { + return shop.index_file(path) +} disruption { + return null +} + +if (show_help) { + log.console("Usage: cell query [options] []") + log.console("") + log.console("Semantic queries across packages.") + log.console("") + log.console("Options:") + log.console(" --this Top-level this references") + log.console(" --intrinsic Find built-in intrinsic usage (e.g., print)") + log.console(" --decl Variable declarations by name") + log.console("") + log.console("Without a package argument, searches all installed packages.") +} else if (mode == null || mode == "error") { + if (mode != "error") { + log.error('Specify --this, --intrinsic, or --decl. Use --help for usage.') + } +} else { + all_files = shop.all_script_paths() + + if (pkg_filter != null) { + for (j = 0; j < length(all_files); j++) { + if (all_files[j].package == pkg_filter) { + files[] = all_files[j] + } + } + } else { + files = all_files + } + + for (j = 0; j < length(files); j++) { + idx = safe_index(files[j].full_path) + if (idx == null) continue + + hits = null + if (mode == "this") { + hits = query_mod.top_level_this(idx) + } else if (mode == "intrinsic") { + hits = query_mod.intrinsic(idx, name) + } else if (mode == "decl") { + hits = query_mod.find_decl(idx, name, null) + } + + if (hits != null && length(hits) > 0) { + for (k = 0; k < length(hits); k++) { + hit = hits[k] + if (hit.span != null) { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.span.from_row)}:${text(hit.span.from_col)}: ${hit.name}`) + } else if (hit.decl_span != null) { + log.console(`${files[j].package}:${files[j].rel_path}:${text(hit.decl_span.from_row)}:${text(hit.decl_span.from_col)}: ${hit.kind} ${hit.name}`) + } + } + } + } +} + +$stop() diff --git a/query.cm b/query.cm new file mode 100644 index 00000000..66fd6413 --- /dev/null +++ b/query.cm @@ -0,0 +1,56 @@ +// query.cm — Semantic queries over index data. +// +// All functions take an index object (from index.cm) and return arrays of hits. + +var query = {} + +// Top-level this: references where name=="this" and enclosing==null. +query.top_level_this = function(idx) { + var hits = [] + var i = 0 + var ref = null + while (i < length(idx.references)) { + ref = idx.references[i] + if (ref.name == "this" && ref.enclosing == null) { + hits[] = ref + } + i = i + 1 + } + return hits +} + +// Intrinsic usage: find refs to a built-in name (e.g., print). +query.intrinsic = function(idx, name) { + var hits = [] + var i = 0 + var ref = null + if (idx.intrinsic_refs == null) return hits + while (i < length(idx.intrinsic_refs)) { + ref = idx.intrinsic_refs[i] + if (ref.name == name) { + hits[] = ref + } + i = i + 1 + } + return hits +} + +// Variable declarations matching a name and optional kind filter. +// kind is one of "var", "def", "fn", "param", or null (any). +query.find_decl = function(idx, name, kind) { + var hits = [] + var i = 0 + var sym = null + while (i < length(idx.symbols)) { + sym = idx.symbols[i] + if (sym.name == name) { + if (kind == null || sym.kind == kind) { + hits[] = sym + } + } + i = i + 1 + } + return hits +} + +return query diff --git a/streamline.ce b/streamline.ce index 72ca5044..1d82f97a 100644 --- a/streamline.ce +++ b/streamline.ce @@ -40,17 +40,13 @@ if (!filename) { $stop() } -var folded = shop.analyze_file(filename) -var pl = shop.pipeline() -var compiled = pl.mcode(folded) - -// Deep copy for before snapshot (needed by --stats) +// Deep copy mcode for before snapshot (needed by --stats, streamline mutates) var before = null if (show_stats) { - before = json.decode(json.encode(compiled)) + before = json.decode(json.encode(shop.mcode_file(filename))) } -var optimized = pl.streamline(compiled) +var optimized = shop.compile_file(filename) // If no flags, default to full JSON output if (!show_stats && !show_ir && !show_check && !show_types) {