// index.cm — Core semantic indexing module. // Walks AST output from parse (+ optional fold) to build a semantic index. // // Two entry points: // index_file(src, filename, tokenize_mod, parse_mod, fold_mod) — full pipeline // index_ast(ast, tokens, filename) — index a pre-parsed AST var make_span = function(node) { return { from_row: node.from_row, from_col: node.from_column, to_row: node.to_row, to_col: node.to_column } } // Index an already-parsed AST. Tokens are optional (used for doc comments). var index_ast = function(ast, tokens, filename) { var is_actor = ends_with(filename, ".ce") var imports = [] var symbols = [] var references = [] var call_sites = [] var exports_list = [] var node_counter = 0 var fn_map = {} var _i = 0 var _j = 0 var fn = null var sym_id = null var params_list = null var scope = null var keys = null var key = null var entry = null var reverse = {} // Build function_nr -> {name, outer, from_row} map from ast.functions. if (ast.functions != null) { _i = 0 while (_i < length(ast.functions)) { fn = ast.functions[_i] fn_map[text(fn.function_nr)] = { name: fn.name, outer: fn.outer, from_row: fn.from_row } _i = _i + 1 } } // Walk scope chain upward by `lvl` levels from func_nr. var resolve_scope_nr = function(func_nr, lvl) { var current = func_nr var remaining = lvl var info = null if (remaining == null || remaining < 0) return null while (remaining > 0 && current != null) { info = fn_map[text(current)] if (info != null) { current = info.outer } else { return null } remaining = remaining - 1 } return current } // Resolve a name node to its symbol_id using scope chain. var resolve_symbol_id = function(name_node) { var decl_fn_nr = resolve_scope_nr(name_node.function_nr, name_node.level) var _si = 0 var s = null var e = null var kind_str = null if (decl_fn_nr == null) return null if (ast.scopes == null) return null _si = 0 while (_si < length(ast.scopes)) { s = ast.scopes[_si] if (s.function_nr == decl_fn_nr) { e = s[name_node.name] if (e != null) { kind_str = e.make if (kind_str == "function") kind_str = "fn" if (kind_str == "input") kind_str = "param" return filename + ":" + name_node.name + ":" + kind_str } } _si = _si + 1 } return null } // Get enclosing symbol id for a function_nr. var get_enclosing = function(func_nr) { var info = fn_map[text(func_nr)] if (info == null || func_nr == 0) return null if (info.name != null) return filename + ":" + info.name + ":fn" return null } // Find doc comment in tokens immediately before target_row. var find_doc_comment = function(target_row) { var _ti = 0 var tok = null var lines = [] var line_nr = null if (tokens == null) return null _ti = 0 while (_ti < length(tokens)) { tok = tokens[_ti] if (tok.kind == "comment" && tok.from_row >= target_row - 10 && tok.from_row < target_row) { lines[] = tok.value } if (tok.from_row >= target_row) break _ti = _ti + 1 } if (length(lines) > 0) return text(lines, "\n") return null } // Allocate a monotonic node id. var next_id = function() { node_counter = node_counter + 1 return node_counter } // Forward declarations for mutual recursion. var walk_expr = null var walk_stmts = null var walk_stmt = null // Walk an expression node, collecting references and call sites. walk_expr = function(node, enclosing, is_lhs) { var nid = 0 var ref_kind = null var callee_name = null var callee_sym = null var arg_count = 0 var _ai = 0 var enc = null var param_name = null if (node == null) return nid = next_id() // Name reference — has function_nr when it's a true variable reference. if (node.kind == "name" && node.name != null && node.function_nr != null) { if (node.intrinsic != true) { ref_kind = is_lhs ? "write" : "read" references[] = { node_id: nid, name: node.name, symbol_id: resolve_symbol_id(node), span: make_span(node), enclosing: enclosing, ref_kind: ref_kind } } } // Call expression. if (node.kind == "(") { callee_name = null callee_sym = null arg_count = (node.list != null) ? length(node.list) : 0 if (node.expression != null) { if (node.expression.kind == "name") { callee_name = node.expression.name if (node.expression.intrinsic != true && node.expression.function_nr != null) { callee_sym = resolve_symbol_id(node.expression) } } else if (node.expression.kind == ".") { if (node.expression.left != null && node.expression.left.kind == "name") { callee_name = node.expression.left.name } if (node.expression.right != null && node.expression.right.name != null) { callee_name = (callee_name != null ? callee_name + "." : "") + node.expression.right.name } } } if (callee_name != "use") { call_sites[] = { node_id: nid, callee: callee_name, callee_symbol_id: callee_sym, span: make_span(node), enclosing: enclosing, args_count: arg_count } } // Also record the callee name as a "call" reference. if (node.expression != null && node.expression.kind == "name" && node.expression.function_nr != null && node.expression.intrinsic != true) { references[] = { node_id: nid, name: node.expression.name, symbol_id: resolve_symbol_id(node.expression), span: make_span(node.expression), enclosing: enclosing, ref_kind: "call" } } // Walk callee expression (skip name — already recorded above). if (node.expression != null && node.expression.kind != "name") { walk_expr(node.expression, enclosing, false) } // Walk arguments. if (node.list != null) { _ai = 0 while (_ai < length(node.list)) { walk_expr(node.list[_ai], enclosing, false) _ai = _ai + 1 } } return } // Function / arrow function expression — walk body. if (node.kind == "function" || node.kind == "arrow function") { enc = enclosing if (node.name != null && node.function_nr != null) { enc = filename + ":" + node.name + ":fn" } // Record params as symbols. if (node.list != null) { _ai = 0 while (_ai < length(node.list)) { param_name = node.list[_ai].name if (param_name != null) { symbols[] = { symbol_id: filename + ":" + param_name + ":param", name: param_name, kind: "param", decl_span: make_span(node.list[_ai]), doc_comment: null, scope_fn_nr: node.function_nr, params: null } } _ai = _ai + 1 } } walk_stmts(node.statements, enc) walk_stmts(node.disruption, enc) return } // Assignment operators — left side is a write. if (node.kind == "=" || node.kind == "+=" || node.kind == "-=" || node.kind == "*=" || node.kind == "/=" || node.kind == "%=") { walk_expr(node.left, enclosing, true) walk_expr(node.right, enclosing, false) return } // Property access — only walk left (right is property name, not a ref). if (node.kind == ".") { walk_expr(node.left, enclosing, false) return } // Index access. if (node.kind == "[") { walk_expr(node.left, enclosing, false) walk_expr(node.right, enclosing, false) return } // Array literal. if (node.kind == "array" && node.list != null) { _ai = 0 while (_ai < length(node.list)) { walk_expr(node.list[_ai], enclosing, false) _ai = _ai + 1 } return } // Record literal — only walk values, not keys. if (node.kind == "record" && node.list != null) { _ai = 0 while (_ai < length(node.list)) { if (node.list[_ai] != null) { walk_expr(node.list[_ai].right, enclosing, false) } _ai = _ai + 1 } return } // Template literal. if (node.kind == "template" && node.list != null) { _ai = 0 while (_ai < length(node.list)) { walk_expr(node.list[_ai], enclosing, false) _ai = _ai + 1 } return } // Prefix/postfix increment/decrement — treat as write. if (node.kind == "++" || node.kind == "--") { walk_expr(node.expression, enclosing, true) return } // Ternary. if (node.kind == "?" || node.kind == "then") { walk_expr(node.expression, enclosing, false) walk_expr(node.then, enclosing, false) walk_expr(node.else, enclosing, false) return } // Generic fallthrough: walk left, right, expression. if (node.left != null) walk_expr(node.left, enclosing, is_lhs) if (node.right != null) walk_expr(node.right, enclosing, false) if (node.expression != null) walk_expr(node.expression, enclosing, false) } // Walk an array of statements. walk_stmts = function(stmts, enclosing) { var _wi = 0 if (stmts == null) return _wi = 0 while (_wi < length(stmts)) { walk_stmt(stmts[_wi], enclosing) _wi = _wi + 1 } } // Walk a single statement. walk_stmt = function(stmt, enclosing) { var sym_kind = null var s_id = null var p_list = null var _di = 0 var local_name = null if (stmt == null) return // Variable/constant declaration. if (stmt.kind == "var" || stmt.kind == "def") { if (stmt.left != null && stmt.left.name != null) { sym_kind = stmt.kind p_list = null // Check if RHS is a function expression. if (stmt.right != null && (stmt.right.kind == "function" || stmt.right.kind == "arrow function")) { sym_kind = "fn" p_list = [] if (stmt.right.list != null) { _di = 0 while (_di < length(stmt.right.list)) { if (stmt.right.list[_di].name != null) { p_list[] = stmt.right.list[_di].name } _di = _di + 1 } } } s_id = filename + ":" + stmt.left.name + ":" + sym_kind symbols[] = { symbol_id: s_id, name: stmt.left.name, kind: sym_kind, decl_span: make_span(stmt), doc_comment: find_doc_comment(stmt.from_row), scope_fn_nr: 0, params: p_list } // Check for import: var x = use('path'). if (stmt.right != null && stmt.right.kind == "(" && stmt.right.expression != null && stmt.right.expression.name == "use" && stmt.right.list != null && length(stmt.right.list) > 0 && stmt.right.list[0].kind == "text") { imports[] = { local_name: stmt.left.name, module_path: stmt.right.list[0].value, span: make_span(stmt) } } } walk_expr(stmt.right, enclosing, false) return } // Multiple declarations (var_list). if (stmt.kind == "var_list" && stmt.list != null) { _di = 0 while (_di < length(stmt.list)) { walk_stmt(stmt.list[_di], enclosing) _di = _di + 1 } return } // Expression statement. if (stmt.kind == "call") { // Check for bare use() as expression statement. if (stmt.expression != null && stmt.expression.kind == "(" && stmt.expression.expression != null && stmt.expression.expression.name == "use" && stmt.expression.list != null && length(stmt.expression.list) > 0 && stmt.expression.list[0].kind == "text") { imports[] = { local_name: null, module_path: stmt.expression.list[0].value, span: make_span(stmt) } } walk_expr(stmt.expression, enclosing, false) return } // If statement. if (stmt.kind == "if") { walk_expr(stmt.expression, enclosing, false) walk_stmts(stmt.then, enclosing) if (stmt.else != null) { walk_stmts(stmt.else, enclosing) } // else-if chain. if (stmt.list != null) { walk_stmts(stmt.list, enclosing) } return } // While loop. if (stmt.kind == "while") { walk_expr(stmt.expression, enclosing, false) walk_stmts(stmt.statements, enclosing) return } // For loop. if (stmt.kind == "for") { walk_expr(stmt.init, enclosing, false) walk_expr(stmt.test, enclosing, false) walk_expr(stmt.update, enclosing, false) walk_stmts(stmt.statements, enclosing) return } // Do-while loop. if (stmt.kind == "do") { walk_stmts(stmt.statements, enclosing) walk_expr(stmt.expression, enclosing, false) return } // Return statement. if (stmt.kind == "return") { walk_expr(stmt.expression, enclosing, false) return } // Disrupt. if (stmt.kind == "disrupt") { walk_expr(stmt.expression, enclosing, false) return } // Block. if (stmt.kind == "block") { walk_stmts(stmt.statements, enclosing) return } // Fallthrough: walk any sub-nodes. walk_expr(stmt.expression, enclosing, false) walk_expr(stmt.left, enclosing, false) walk_expr(stmt.right, enclosing, false) walk_stmts(stmt.statements, enclosing) } // --- 1. Process named functions from ast.functions --- if (ast.functions != null) { _i = 0 while (_i < length(ast.functions)) { fn = ast.functions[_i] sym_id = filename + ":" + (fn.name != null ? fn.name : "anon_" + text(fn.function_nr)) + ":fn" params_list = [] if (fn.list != null) { _j = 0 while (_j < length(fn.list)) { if (fn.list[_j].name != null) { params_list[] = fn.list[_j].name } _j = _j + 1 } } symbols[] = { symbol_id: sym_id, name: fn.name, kind: "fn", decl_span: make_span(fn), doc_comment: find_doc_comment(fn.from_row), scope_fn_nr: fn.outer != null ? fn.outer : 0, params: params_list } // Record params as symbols. if (fn.list != null) { _j = 0 while (_j < length(fn.list)) { if (fn.list[_j].name != null) { symbols[] = { symbol_id: filename + ":" + fn.list[_j].name + ":param", name: fn.list[_j].name, kind: "param", decl_span: make_span(fn.list[_j]), doc_comment: null, scope_fn_nr: fn.function_nr, params: null } } _j = _j + 1 } } // Walk function body. walk_stmts(fn.statements, sym_id) walk_stmts(fn.disruption, sym_id) _i = _i + 1 } } // --- 2. Walk top-level statements --- walk_stmts(ast.statements, null) // --- 3. Detect exports for .cm modules --- if (!is_actor && ast.statements != null) { _i = length(ast.statements) - 1 while (_i >= 0) { if (ast.statements[_i].kind == "return" && ast.statements[_i].expression != null) { // Check if the return expression is a record literal with key-value pairs. if (ast.statements[_i].expression.list != null) { _j = 0 while (_j < length(ast.statements[_i].expression.list)) { entry = ast.statements[_i].expression.list[_j] if (entry != null && entry.left != null && entry.left.name != null) { // Link the export to a symbol if the value is a name reference. sym_id = null if (entry.right != null && entry.right.kind == "name" && entry.right.function_nr != null) { sym_id = resolve_symbol_id(entry.right) } exports_list[] = { name: entry.left.name, symbol_id: sym_id } } _j = _j + 1 } } break } _i = _i - 1 } } // --- 4. Build reverse refs --- _i = 0 while (_i < length(references)) { key = references[_i].name if (reverse[key] == null) { reverse[key] = [] } reverse[key][] = { node_id: references[_i].node_id, span: references[_i].span, enclosing: references[_i].enclosing, ref_kind: references[_i].ref_kind } _i = _i + 1 } return { version: 1, path: filename, is_actor: is_actor, imports: imports, symbols: symbols, references: references, call_sites: call_sites, exports: exports_list, reverse_refs: reverse } } // Run the full pipeline (tokenize -> parse -> fold) and index. // pipeline is {tokenize, parse, fold} — pass fold as null to skip folding. var index_file = function(src, filename, pipeline) { var tok_result = pipeline.tokenize(src, filename) var ast = pipeline.parse(tok_result.tokens, src, filename, pipeline.tokenize) if (pipeline.fold != null) { ast = pipeline.fold(ast) } return index_ast(ast, tok_result.tokens, filename) } return { index_file: index_file, index_ast: index_ast }