Files
cell/index.cm
2026-02-18 11:00:51 -06:00

647 lines
18 KiB
Plaintext

// index.cm — Core semantic indexing module.
// Walks AST output from parse (+ optional fold) to build a semantic index.
//
// Entry point:
// index_ast(ast, tokens, filename) — index a pre-parsed AST
var make_span = function(node) {
return {
from_row: node.from_row,
from_col: node.from_column,
to_row: node.to_row,
to_col: node.to_column
}
}
// Index an already-parsed AST. Tokens are optional (used for doc comments).
var index_ast = function(ast, tokens, filename) {
var is_actor = ends_with(filename, ".ce")
var imports = []
var symbols = []
var references = []
var call_sites = []
var exports_list = []
var intrinsic_refs = []
var node_counter = 0
var fn_map = {}
var _i = 0
var _j = 0
var fn = null
var sym_id = null
var params_list = null
var scope = null
var keys = null
var key = null
var entry = null
var reverse = {}
// Build function_nr -> {name, outer, from_row} map from ast.functions.
if (ast.functions != null) {
_i = 0
while (_i < length(ast.functions)) {
fn = ast.functions[_i]
fn_map[text(fn.function_nr)] = {
name: fn.name,
outer: fn.outer,
from_row: fn.from_row
}
_i = _i + 1
}
}
// Walk scope chain upward by `lvl` levels from func_nr.
var resolve_scope_nr = function(func_nr, lvl) {
var current = func_nr
var remaining = lvl
var info = null
if (remaining == null || remaining < 0) return null
while (remaining > 0 && current != null) {
info = fn_map[text(current)]
if (info != null) {
current = info.outer
} else {
return null
}
remaining = remaining - 1
}
return current
}
// Resolve a name node to its symbol_id using scope chain.
var resolve_symbol_id = function(name_node) {
var decl_fn_nr = resolve_scope_nr(name_node.function_nr, name_node.level)
var _si = 0
var s = null
var e = null
var kind_str = null
if (decl_fn_nr == null) return null
if (ast.scopes == null) return null
_si = 0
while (_si < length(ast.scopes)) {
s = ast.scopes[_si]
if (s.function_nr == decl_fn_nr) {
e = s[name_node.name]
if (e != null) {
kind_str = e.make
if (kind_str == "function") kind_str = "fn"
if (kind_str == "input") kind_str = "param"
return filename + ":" + name_node.name + ":" + kind_str
}
}
_si = _si + 1
}
return null
}
// Get enclosing symbol id for a function_nr.
var get_enclosing = function(func_nr) {
var info = fn_map[text(func_nr)]
if (info == null || func_nr == 0) return null
if (info.name != null) return filename + ":" + info.name + ":fn"
return null
}
// Find doc comment in tokens immediately before target_row.
var find_doc_comment = function(target_row) {
var _ti = 0
var tok = null
var lines = []
var line_nr = null
if (tokens == null) return null
_ti = 0
while (_ti < length(tokens)) {
tok = tokens[_ti]
if (tok.kind == "comment" && tok.from_row >= target_row - 10 && tok.from_row < target_row) {
lines[] = tok.value
}
if (tok.from_row >= target_row) break
_ti = _ti + 1
}
if (length(lines) > 0) return text(lines, "\n")
return null
}
// Allocate a monotonic node id.
var next_id = function() {
node_counter = node_counter + 1
return node_counter
}
// Forward declarations for mutual recursion.
var walk_expr = null
var walk_stmts = null
var walk_stmt = null
// Walk an expression node, collecting references and call sites.
walk_expr = function(node, enclosing, is_lhs) {
var nid = 0
var ref_kind = null
var callee_name = null
var callee_sym = null
var arg_count = 0
var _ai = 0
var enc = null
var param_name = null
if (node == null) return
nid = next_id()
// this keyword
if (node.kind == "this") {
references[] = {
node_id: nid,
name: "this",
symbol_id: null,
span: make_span(node),
enclosing: enclosing,
ref_kind: "read"
}
return
}
// Capture intrinsic refs with positions (intrinsics lack function_nr).
if (node.kind == "name" && node.name != null && node.intrinsic == true) {
intrinsic_refs[] = {
node_id: nid,
name: node.name,
span: make_span(node),
enclosing: enclosing
}
}
// Name reference — has function_nr when it's a true variable reference.
if (node.kind == "name" && node.name != null && node.function_nr != null) {
if (node.intrinsic != true) {
ref_kind = is_lhs ? "write" : "read"
references[] = {
node_id: nid,
name: node.name,
symbol_id: resolve_symbol_id(node),
span: make_span(node),
enclosing: enclosing,
ref_kind: ref_kind
}
}
}
// Call expression.
if (node.kind == "(") {
callee_name = null
callee_sym = null
arg_count = (node.list != null) ? length(node.list) : 0
if (node.expression != null) {
if (node.expression.kind == "name") {
callee_name = node.expression.name
if (node.expression.intrinsic != true && node.expression.function_nr != null) {
callee_sym = resolve_symbol_id(node.expression)
}
} else if (node.expression.kind == ".") {
if (node.expression.left != null && node.expression.left.kind == "name") {
callee_name = node.expression.left.name
}
if (node.expression.right != null && node.expression.right.name != null) {
callee_name = (callee_name != null ? callee_name + "." : "") + node.expression.right.name
}
}
}
if (callee_name != "use") {
call_sites[] = {
node_id: nid,
callee: callee_name,
callee_symbol_id: callee_sym,
span: make_span(node),
enclosing: enclosing,
args_count: arg_count
}
}
// Also record the callee name as a "call" reference.
if (node.expression != null && node.expression.kind == "name" &&
node.expression.function_nr != null && node.expression.intrinsic != true) {
references[] = {
node_id: nid,
name: node.expression.name,
symbol_id: resolve_symbol_id(node.expression),
span: make_span(node.expression),
enclosing: enclosing,
ref_kind: "call"
}
}
// Capture intrinsic callee refs (e.g., print, length).
if (node.expression != null && node.expression.kind == "name" &&
node.expression.intrinsic == true && node.expression.name != null) {
intrinsic_refs[] = {
node_id: nid,
name: node.expression.name,
span: make_span(node.expression),
enclosing: enclosing
}
}
// Walk callee expression (skip name — already recorded above).
if (node.expression != null && node.expression.kind != "name") {
walk_expr(node.expression, enclosing, false)
}
// Walk arguments.
if (node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
walk_expr(node.list[_ai], enclosing, false)
_ai = _ai + 1
}
}
return
}
// Function / arrow function expression — walk body.
if (node.kind == "function" || node.kind == "arrow function") {
enc = enclosing
if (node.function_nr != null) {
if (node.name != null) {
enc = filename + ":" + node.name + ":fn"
} else {
enc = filename + ":anon_" + text(node.function_nr) + ":fn"
}
}
// Record params as symbols.
if (node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
param_name = node.list[_ai].name
if (param_name != null) {
symbols[] = {
symbol_id: filename + ":" + param_name + ":param",
name: param_name,
kind: "param",
decl_span: make_span(node.list[_ai]),
doc_comment: null,
scope_fn_nr: node.function_nr,
params: null
}
}
_ai = _ai + 1
}
}
walk_stmts(node.statements, enc)
walk_stmts(node.disruption, enc)
return
}
// Assignment operators — left side is a write.
if (node.kind == "=" || node.kind == "+=" || node.kind == "-=" ||
node.kind == "*=" || node.kind == "/=" || node.kind == "%=") {
walk_expr(node.left, enclosing, true)
walk_expr(node.right, enclosing, false)
return
}
// Property access — only walk left (right is property name, not a ref).
if (node.kind == ".") {
walk_expr(node.left, enclosing, false)
return
}
// Index access.
if (node.kind == "[") {
walk_expr(node.left, enclosing, false)
walk_expr(node.right, enclosing, false)
return
}
// Array literal.
if (node.kind == "array" && node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
walk_expr(node.list[_ai], enclosing, false)
_ai = _ai + 1
}
return
}
// Record literal — only walk values, not keys.
if (node.kind == "record" && node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
if (node.list[_ai] != null) {
walk_expr(node.list[_ai].right, enclosing, false)
}
_ai = _ai + 1
}
return
}
// Template literal.
if (node.kind == "template" && node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
walk_expr(node.list[_ai], enclosing, false)
_ai = _ai + 1
}
return
}
// Prefix/postfix increment/decrement — treat as write.
if (node.kind == "++" || node.kind == "--") {
walk_expr(node.expression, enclosing, true)
return
}
// Ternary.
if (node.kind == "?" || node.kind == "then") {
walk_expr(node.expression, enclosing, false)
walk_expr(node.then, enclosing, false)
walk_expr(node.else, enclosing, false)
return
}
// Generic fallthrough: walk left, right, expression.
if (node.left != null) walk_expr(node.left, enclosing, is_lhs)
if (node.right != null) walk_expr(node.right, enclosing, false)
if (node.expression != null) walk_expr(node.expression, enclosing, false)
}
// Walk an array of statements.
walk_stmts = function(stmts, enclosing) {
var _wi = 0
if (stmts == null) return
_wi = 0
while (_wi < length(stmts)) {
walk_stmt(stmts[_wi], enclosing)
_wi = _wi + 1
}
}
// Walk a single statement.
walk_stmt = function(stmt, enclosing) {
var sym_kind = null
var s_id = null
var p_list = null
var _di = 0
var local_name = null
if (stmt == null) return
// Variable/constant declaration.
if (stmt.kind == "var" || stmt.kind == "def") {
if (stmt.left != null && stmt.left.name != null) {
sym_kind = stmt.kind
p_list = null
// Check if RHS is a function expression.
if (stmt.right != null && (stmt.right.kind == "function" || stmt.right.kind == "arrow function")) {
sym_kind = "fn"
p_list = []
if (stmt.right.list != null) {
_di = 0
while (_di < length(stmt.right.list)) {
if (stmt.right.list[_di].name != null) {
p_list[] = stmt.right.list[_di].name
}
_di = _di + 1
}
}
}
s_id = filename + ":" + stmt.left.name + ":" + sym_kind
symbols[] = {
symbol_id: s_id,
name: stmt.left.name,
kind: sym_kind,
decl_span: make_span(stmt),
doc_comment: find_doc_comment(stmt.from_row),
scope_fn_nr: 0,
params: p_list
}
// Check for import: var x = use('path').
if (stmt.right != null && stmt.right.kind == "(" &&
stmt.right.expression != null && stmt.right.expression.name == "use" &&
stmt.right.list != null && length(stmt.right.list) > 0 &&
stmt.right.list[0].kind == "text") {
imports[] = {
local_name: stmt.left.name,
module_path: stmt.right.list[0].value,
span: make_span(stmt)
}
}
}
walk_expr(stmt.right, enclosing, false)
return
}
// Multiple declarations (var_list).
if (stmt.kind == "var_list" && stmt.list != null) {
_di = 0
while (_di < length(stmt.list)) {
walk_stmt(stmt.list[_di], enclosing)
_di = _di + 1
}
return
}
// Expression statement.
if (stmt.kind == "call") {
// Check for bare use() as expression statement.
if (stmt.expression != null && stmt.expression.kind == "(" &&
stmt.expression.expression != null && stmt.expression.expression.name == "use" &&
stmt.expression.list != null && length(stmt.expression.list) > 0 &&
stmt.expression.list[0].kind == "text") {
imports[] = {
local_name: null,
module_path: stmt.expression.list[0].value,
span: make_span(stmt)
}
}
walk_expr(stmt.expression, enclosing, false)
return
}
// If statement.
if (stmt.kind == "if") {
walk_expr(stmt.expression, enclosing, false)
walk_stmts(stmt.then, enclosing)
if (stmt.else != null) {
walk_stmts(stmt.else, enclosing)
}
// else-if chain.
if (stmt.list != null) {
walk_stmts(stmt.list, enclosing)
}
return
}
// While loop.
if (stmt.kind == "while") {
walk_expr(stmt.expression, enclosing, false)
walk_stmts(stmt.statements, enclosing)
return
}
// For loop.
if (stmt.kind == "for") {
walk_expr(stmt.init, enclosing, false)
walk_expr(stmt.test, enclosing, false)
walk_expr(stmt.update, enclosing, false)
walk_stmts(stmt.statements, enclosing)
return
}
// Do-while loop.
if (stmt.kind == "do") {
walk_stmts(stmt.statements, enclosing)
walk_expr(stmt.expression, enclosing, false)
return
}
// Return statement.
if (stmt.kind == "return") {
walk_expr(stmt.expression, enclosing, false)
return
}
// Disrupt.
if (stmt.kind == "disrupt") {
walk_expr(stmt.expression, enclosing, false)
return
}
// Block.
if (stmt.kind == "block") {
walk_stmts(stmt.statements, enclosing)
return
}
// Fallthrough: walk any sub-nodes.
walk_expr(stmt.expression, enclosing, false)
walk_expr(stmt.left, enclosing, false)
walk_expr(stmt.right, enclosing, false)
walk_stmts(stmt.statements, enclosing)
}
// --- 1. Process named functions from ast.functions ---
if (ast.functions != null) {
_i = 0
while (_i < length(ast.functions)) {
fn = ast.functions[_i]
sym_id = filename + ":" + (fn.name != null ? fn.name : "anon_" + text(fn.function_nr)) + ":fn"
params_list = []
if (fn.list != null) {
_j = 0
while (_j < length(fn.list)) {
if (fn.list[_j].name != null) {
params_list[] = fn.list[_j].name
}
_j = _j + 1
}
}
symbols[] = {
symbol_id: sym_id,
name: fn.name,
kind: "fn",
decl_span: make_span(fn),
doc_comment: find_doc_comment(fn.from_row),
scope_fn_nr: fn.outer != null ? fn.outer : 0,
params: params_list
}
// Record params as symbols.
if (fn.list != null) {
_j = 0
while (_j < length(fn.list)) {
if (fn.list[_j].name != null) {
symbols[] = {
symbol_id: filename + ":" + fn.list[_j].name + ":param",
name: fn.list[_j].name,
kind: "param",
decl_span: make_span(fn.list[_j]),
doc_comment: null,
scope_fn_nr: fn.function_nr,
params: null
}
}
_j = _j + 1
}
}
// Walk function body.
walk_stmts(fn.statements, sym_id)
walk_stmts(fn.disruption, sym_id)
_i = _i + 1
}
}
// --- 2. Walk top-level statements ---
walk_stmts(ast.statements, null)
// --- 3. Detect exports for .cm modules ---
if (!is_actor && ast.statements != null) {
_i = length(ast.statements) - 1
while (_i >= 0) {
if (ast.statements[_i].kind == "return" && ast.statements[_i].expression != null) {
// Check if the return expression is a record literal with key-value pairs.
if (ast.statements[_i].expression.list != null) {
_j = 0
while (_j < length(ast.statements[_i].expression.list)) {
entry = ast.statements[_i].expression.list[_j]
if (entry != null && entry.left != null && entry.left.name != null) {
// Link the export to a symbol if the value is a name reference.
sym_id = null
if (entry.right != null && entry.right.kind == "name" && entry.right.function_nr != null) {
sym_id = resolve_symbol_id(entry.right)
}
exports_list[] = {
name: entry.left.name,
symbol_id: sym_id
}
}
_j = _j + 1
}
}
break
}
_i = _i - 1
}
}
// --- 4. Build reverse refs ---
_i = 0
while (_i < length(references)) {
key = references[_i].name
if (reverse[key] == null) {
reverse[key] = []
}
reverse[key][] = {
node_id: references[_i].node_id,
span: references[_i].span,
enclosing: references[_i].enclosing,
ref_kind: references[_i].ref_kind
}
_i = _i + 1
}
return {
version: 1,
path: filename,
is_actor: is_actor,
imports: imports,
symbols: symbols,
references: references,
intrinsic_refs: intrinsic_refs,
call_sites: call_sites,
exports: exports_list,
reverse_refs: reverse
}
}
return {
index_ast: index_ast
}