647 lines
18 KiB
Plaintext
647 lines
18 KiB
Plaintext
// index.cm — Core semantic indexing module.
|
|
// Walks AST output from parse (+ optional fold) to build a semantic index.
|
|
//
|
|
// Entry point:
|
|
// index_ast(ast, tokens, filename) — index a pre-parsed AST
|
|
|
|
var make_span = function(node) {
|
|
return {
|
|
from_row: node.from_row,
|
|
from_col: node.from_column,
|
|
to_row: node.to_row,
|
|
to_col: node.to_column
|
|
}
|
|
}
|
|
|
|
// Index an already-parsed AST. Tokens are optional (used for doc comments).
|
|
var index_ast = function(ast, tokens, filename) {
|
|
var is_actor = ends_with(filename, ".ce")
|
|
var imports = []
|
|
var symbols = []
|
|
var references = []
|
|
var call_sites = []
|
|
var exports_list = []
|
|
var intrinsic_refs = []
|
|
var node_counter = 0
|
|
var fn_map = {}
|
|
var _i = 0
|
|
var _j = 0
|
|
var fn = null
|
|
var sym_id = null
|
|
var params_list = null
|
|
var scope = null
|
|
var keys = null
|
|
var key = null
|
|
var entry = null
|
|
var reverse = {}
|
|
|
|
// Build function_nr -> {name, outer, from_row} map from ast.functions.
|
|
if (ast.functions != null) {
|
|
_i = 0
|
|
while (_i < length(ast.functions)) {
|
|
fn = ast.functions[_i]
|
|
fn_map[text(fn.function_nr)] = {
|
|
name: fn.name,
|
|
outer: fn.outer,
|
|
from_row: fn.from_row
|
|
}
|
|
_i = _i + 1
|
|
}
|
|
}
|
|
|
|
// Walk scope chain upward by `lvl` levels from func_nr.
|
|
var resolve_scope_nr = function(func_nr, lvl) {
|
|
var current = func_nr
|
|
var remaining = lvl
|
|
var info = null
|
|
if (remaining == null || remaining < 0) return null
|
|
while (remaining > 0 && current != null) {
|
|
info = fn_map[text(current)]
|
|
if (info != null) {
|
|
current = info.outer
|
|
} else {
|
|
return null
|
|
}
|
|
remaining = remaining - 1
|
|
}
|
|
return current
|
|
}
|
|
|
|
// Resolve a name node to its symbol_id using scope chain.
|
|
var resolve_symbol_id = function(name_node) {
|
|
var decl_fn_nr = resolve_scope_nr(name_node.function_nr, name_node.level)
|
|
var _si = 0
|
|
var s = null
|
|
var e = null
|
|
var kind_str = null
|
|
if (decl_fn_nr == null) return null
|
|
if (ast.scopes == null) return null
|
|
_si = 0
|
|
while (_si < length(ast.scopes)) {
|
|
s = ast.scopes[_si]
|
|
if (s.function_nr == decl_fn_nr) {
|
|
e = s[name_node.name]
|
|
if (e != null) {
|
|
kind_str = e.make
|
|
if (kind_str == "function") kind_str = "fn"
|
|
if (kind_str == "input") kind_str = "param"
|
|
return filename + ":" + name_node.name + ":" + kind_str
|
|
}
|
|
}
|
|
_si = _si + 1
|
|
}
|
|
return null
|
|
}
|
|
|
|
// Get enclosing symbol id for a function_nr.
|
|
var get_enclosing = function(func_nr) {
|
|
var info = fn_map[text(func_nr)]
|
|
if (info == null || func_nr == 0) return null
|
|
if (info.name != null) return filename + ":" + info.name + ":fn"
|
|
return null
|
|
}
|
|
|
|
// Find doc comment in tokens immediately before target_row.
|
|
var find_doc_comment = function(target_row) {
|
|
var _ti = 0
|
|
var tok = null
|
|
var lines = []
|
|
var line_nr = null
|
|
if (tokens == null) return null
|
|
_ti = 0
|
|
while (_ti < length(tokens)) {
|
|
tok = tokens[_ti]
|
|
if (tok.kind == "comment" && tok.from_row >= target_row - 10 && tok.from_row < target_row) {
|
|
lines[] = tok.value
|
|
}
|
|
if (tok.from_row >= target_row) break
|
|
_ti = _ti + 1
|
|
}
|
|
if (length(lines) > 0) return text(lines, "\n")
|
|
return null
|
|
}
|
|
|
|
// Allocate a monotonic node id.
|
|
var next_id = function() {
|
|
node_counter = node_counter + 1
|
|
return node_counter
|
|
}
|
|
|
|
// Forward declarations for mutual recursion.
|
|
var walk_expr = null
|
|
var walk_stmts = null
|
|
var walk_stmt = null
|
|
|
|
// Walk an expression node, collecting references and call sites.
|
|
walk_expr = function(node, enclosing, is_lhs) {
|
|
var nid = 0
|
|
var ref_kind = null
|
|
var callee_name = null
|
|
var callee_sym = null
|
|
var arg_count = 0
|
|
var _ai = 0
|
|
var enc = null
|
|
var param_name = null
|
|
|
|
if (node == null) return
|
|
|
|
nid = next_id()
|
|
|
|
// this keyword
|
|
if (node.kind == "this") {
|
|
references[] = {
|
|
node_id: nid,
|
|
name: "this",
|
|
symbol_id: null,
|
|
span: make_span(node),
|
|
enclosing: enclosing,
|
|
ref_kind: "read"
|
|
}
|
|
return
|
|
}
|
|
|
|
// Capture intrinsic refs with positions (intrinsics lack function_nr).
|
|
if (node.kind == "name" && node.name != null && node.intrinsic == true) {
|
|
intrinsic_refs[] = {
|
|
node_id: nid,
|
|
name: node.name,
|
|
span: make_span(node),
|
|
enclosing: enclosing
|
|
}
|
|
}
|
|
|
|
// Name reference — has function_nr when it's a true variable reference.
|
|
if (node.kind == "name" && node.name != null && node.function_nr != null) {
|
|
if (node.intrinsic != true) {
|
|
ref_kind = is_lhs ? "write" : "read"
|
|
references[] = {
|
|
node_id: nid,
|
|
name: node.name,
|
|
symbol_id: resolve_symbol_id(node),
|
|
span: make_span(node),
|
|
enclosing: enclosing,
|
|
ref_kind: ref_kind
|
|
}
|
|
}
|
|
}
|
|
|
|
// Call expression.
|
|
if (node.kind == "(") {
|
|
callee_name = null
|
|
callee_sym = null
|
|
arg_count = (node.list != null) ? length(node.list) : 0
|
|
|
|
if (node.expression != null) {
|
|
if (node.expression.kind == "name") {
|
|
callee_name = node.expression.name
|
|
if (node.expression.intrinsic != true && node.expression.function_nr != null) {
|
|
callee_sym = resolve_symbol_id(node.expression)
|
|
}
|
|
} else if (node.expression.kind == ".") {
|
|
if (node.expression.left != null && node.expression.left.kind == "name") {
|
|
callee_name = node.expression.left.name
|
|
}
|
|
if (node.expression.right != null && node.expression.right.name != null) {
|
|
callee_name = (callee_name != null ? callee_name + "." : "") + node.expression.right.name
|
|
}
|
|
}
|
|
}
|
|
|
|
if (callee_name != "use") {
|
|
call_sites[] = {
|
|
node_id: nid,
|
|
callee: callee_name,
|
|
callee_symbol_id: callee_sym,
|
|
span: make_span(node),
|
|
enclosing: enclosing,
|
|
args_count: arg_count
|
|
}
|
|
}
|
|
|
|
// Also record the callee name as a "call" reference.
|
|
if (node.expression != null && node.expression.kind == "name" &&
|
|
node.expression.function_nr != null && node.expression.intrinsic != true) {
|
|
references[] = {
|
|
node_id: nid,
|
|
name: node.expression.name,
|
|
symbol_id: resolve_symbol_id(node.expression),
|
|
span: make_span(node.expression),
|
|
enclosing: enclosing,
|
|
ref_kind: "call"
|
|
}
|
|
}
|
|
|
|
// Capture intrinsic callee refs (e.g., print, length).
|
|
if (node.expression != null && node.expression.kind == "name" &&
|
|
node.expression.intrinsic == true && node.expression.name != null) {
|
|
intrinsic_refs[] = {
|
|
node_id: nid,
|
|
name: node.expression.name,
|
|
span: make_span(node.expression),
|
|
enclosing: enclosing
|
|
}
|
|
}
|
|
|
|
// Walk callee expression (skip name — already recorded above).
|
|
if (node.expression != null && node.expression.kind != "name") {
|
|
walk_expr(node.expression, enclosing, false)
|
|
}
|
|
|
|
// Walk arguments.
|
|
if (node.list != null) {
|
|
_ai = 0
|
|
while (_ai < length(node.list)) {
|
|
walk_expr(node.list[_ai], enclosing, false)
|
|
_ai = _ai + 1
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// Function / arrow function expression — walk body.
|
|
if (node.kind == "function" || node.kind == "arrow function") {
|
|
enc = enclosing
|
|
if (node.function_nr != null) {
|
|
if (node.name != null) {
|
|
enc = filename + ":" + node.name + ":fn"
|
|
} else {
|
|
enc = filename + ":anon_" + text(node.function_nr) + ":fn"
|
|
}
|
|
}
|
|
// Record params as symbols.
|
|
if (node.list != null) {
|
|
_ai = 0
|
|
while (_ai < length(node.list)) {
|
|
param_name = node.list[_ai].name
|
|
if (param_name != null) {
|
|
symbols[] = {
|
|
symbol_id: filename + ":" + param_name + ":param",
|
|
name: param_name,
|
|
kind: "param",
|
|
decl_span: make_span(node.list[_ai]),
|
|
doc_comment: null,
|
|
scope_fn_nr: node.function_nr,
|
|
params: null
|
|
}
|
|
}
|
|
_ai = _ai + 1
|
|
}
|
|
}
|
|
walk_stmts(node.statements, enc)
|
|
walk_stmts(node.disruption, enc)
|
|
return
|
|
}
|
|
|
|
// Assignment operators — left side is a write.
|
|
if (node.kind == "=" || node.kind == "+=" || node.kind == "-=" ||
|
|
node.kind == "*=" || node.kind == "/=" || node.kind == "%=") {
|
|
walk_expr(node.left, enclosing, true)
|
|
walk_expr(node.right, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Property access — only walk left (right is property name, not a ref).
|
|
if (node.kind == ".") {
|
|
walk_expr(node.left, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Index access.
|
|
if (node.kind == "[") {
|
|
walk_expr(node.left, enclosing, false)
|
|
walk_expr(node.right, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Array literal.
|
|
if (node.kind == "array" && node.list != null) {
|
|
_ai = 0
|
|
while (_ai < length(node.list)) {
|
|
walk_expr(node.list[_ai], enclosing, false)
|
|
_ai = _ai + 1
|
|
}
|
|
return
|
|
}
|
|
|
|
// Record literal — only walk values, not keys.
|
|
if (node.kind == "record" && node.list != null) {
|
|
_ai = 0
|
|
while (_ai < length(node.list)) {
|
|
if (node.list[_ai] != null) {
|
|
walk_expr(node.list[_ai].right, enclosing, false)
|
|
}
|
|
_ai = _ai + 1
|
|
}
|
|
return
|
|
}
|
|
|
|
// Template literal.
|
|
if (node.kind == "template" && node.list != null) {
|
|
_ai = 0
|
|
while (_ai < length(node.list)) {
|
|
walk_expr(node.list[_ai], enclosing, false)
|
|
_ai = _ai + 1
|
|
}
|
|
return
|
|
}
|
|
|
|
// Prefix/postfix increment/decrement — treat as write.
|
|
if (node.kind == "++" || node.kind == "--") {
|
|
walk_expr(node.expression, enclosing, true)
|
|
return
|
|
}
|
|
|
|
// Ternary.
|
|
if (node.kind == "?" || node.kind == "then") {
|
|
walk_expr(node.expression, enclosing, false)
|
|
walk_expr(node.then, enclosing, false)
|
|
walk_expr(node.else, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Generic fallthrough: walk left, right, expression.
|
|
if (node.left != null) walk_expr(node.left, enclosing, is_lhs)
|
|
if (node.right != null) walk_expr(node.right, enclosing, false)
|
|
if (node.expression != null) walk_expr(node.expression, enclosing, false)
|
|
}
|
|
|
|
// Walk an array of statements.
|
|
walk_stmts = function(stmts, enclosing) {
|
|
var _wi = 0
|
|
if (stmts == null) return
|
|
_wi = 0
|
|
while (_wi < length(stmts)) {
|
|
walk_stmt(stmts[_wi], enclosing)
|
|
_wi = _wi + 1
|
|
}
|
|
}
|
|
|
|
// Walk a single statement.
|
|
walk_stmt = function(stmt, enclosing) {
|
|
var sym_kind = null
|
|
var s_id = null
|
|
var p_list = null
|
|
var _di = 0
|
|
var local_name = null
|
|
|
|
if (stmt == null) return
|
|
|
|
// Variable/constant declaration.
|
|
if (stmt.kind == "var" || stmt.kind == "def") {
|
|
if (stmt.left != null && stmt.left.name != null) {
|
|
sym_kind = stmt.kind
|
|
p_list = null
|
|
|
|
// Check if RHS is a function expression.
|
|
if (stmt.right != null && (stmt.right.kind == "function" || stmt.right.kind == "arrow function")) {
|
|
sym_kind = "fn"
|
|
p_list = []
|
|
if (stmt.right.list != null) {
|
|
_di = 0
|
|
while (_di < length(stmt.right.list)) {
|
|
if (stmt.right.list[_di].name != null) {
|
|
p_list[] = stmt.right.list[_di].name
|
|
}
|
|
_di = _di + 1
|
|
}
|
|
}
|
|
}
|
|
|
|
s_id = filename + ":" + stmt.left.name + ":" + sym_kind
|
|
symbols[] = {
|
|
symbol_id: s_id,
|
|
name: stmt.left.name,
|
|
kind: sym_kind,
|
|
decl_span: make_span(stmt),
|
|
doc_comment: find_doc_comment(stmt.from_row),
|
|
scope_fn_nr: 0,
|
|
params: p_list
|
|
}
|
|
|
|
// Check for import: var x = use('path').
|
|
if (stmt.right != null && stmt.right.kind == "(" &&
|
|
stmt.right.expression != null && stmt.right.expression.name == "use" &&
|
|
stmt.right.list != null && length(stmt.right.list) > 0 &&
|
|
stmt.right.list[0].kind == "text") {
|
|
imports[] = {
|
|
local_name: stmt.left.name,
|
|
module_path: stmt.right.list[0].value,
|
|
span: make_span(stmt)
|
|
}
|
|
}
|
|
}
|
|
|
|
walk_expr(stmt.right, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Multiple declarations (var_list).
|
|
if (stmt.kind == "var_list" && stmt.list != null) {
|
|
_di = 0
|
|
while (_di < length(stmt.list)) {
|
|
walk_stmt(stmt.list[_di], enclosing)
|
|
_di = _di + 1
|
|
}
|
|
return
|
|
}
|
|
|
|
// Expression statement.
|
|
if (stmt.kind == "call") {
|
|
// Check for bare use() as expression statement.
|
|
if (stmt.expression != null && stmt.expression.kind == "(" &&
|
|
stmt.expression.expression != null && stmt.expression.expression.name == "use" &&
|
|
stmt.expression.list != null && length(stmt.expression.list) > 0 &&
|
|
stmt.expression.list[0].kind == "text") {
|
|
imports[] = {
|
|
local_name: null,
|
|
module_path: stmt.expression.list[0].value,
|
|
span: make_span(stmt)
|
|
}
|
|
}
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// If statement.
|
|
if (stmt.kind == "if") {
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
walk_stmts(stmt.then, enclosing)
|
|
if (stmt.else != null) {
|
|
walk_stmts(stmt.else, enclosing)
|
|
}
|
|
// else-if chain.
|
|
if (stmt.list != null) {
|
|
walk_stmts(stmt.list, enclosing)
|
|
}
|
|
return
|
|
}
|
|
|
|
// While loop.
|
|
if (stmt.kind == "while") {
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
walk_stmts(stmt.statements, enclosing)
|
|
return
|
|
}
|
|
|
|
// For loop.
|
|
if (stmt.kind == "for") {
|
|
walk_expr(stmt.init, enclosing, false)
|
|
walk_expr(stmt.test, enclosing, false)
|
|
walk_expr(stmt.update, enclosing, false)
|
|
walk_stmts(stmt.statements, enclosing)
|
|
return
|
|
}
|
|
|
|
// Do-while loop.
|
|
if (stmt.kind == "do") {
|
|
walk_stmts(stmt.statements, enclosing)
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Return statement.
|
|
if (stmt.kind == "return") {
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Disrupt.
|
|
if (stmt.kind == "disrupt") {
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
return
|
|
}
|
|
|
|
// Block.
|
|
if (stmt.kind == "block") {
|
|
walk_stmts(stmt.statements, enclosing)
|
|
return
|
|
}
|
|
|
|
// Fallthrough: walk any sub-nodes.
|
|
walk_expr(stmt.expression, enclosing, false)
|
|
walk_expr(stmt.left, enclosing, false)
|
|
walk_expr(stmt.right, enclosing, false)
|
|
walk_stmts(stmt.statements, enclosing)
|
|
}
|
|
|
|
// --- 1. Process named functions from ast.functions ---
|
|
if (ast.functions != null) {
|
|
_i = 0
|
|
while (_i < length(ast.functions)) {
|
|
fn = ast.functions[_i]
|
|
sym_id = filename + ":" + (fn.name != null ? fn.name : "anon_" + text(fn.function_nr)) + ":fn"
|
|
params_list = []
|
|
if (fn.list != null) {
|
|
_j = 0
|
|
while (_j < length(fn.list)) {
|
|
if (fn.list[_j].name != null) {
|
|
params_list[] = fn.list[_j].name
|
|
}
|
|
_j = _j + 1
|
|
}
|
|
}
|
|
|
|
symbols[] = {
|
|
symbol_id: sym_id,
|
|
name: fn.name,
|
|
kind: "fn",
|
|
decl_span: make_span(fn),
|
|
doc_comment: find_doc_comment(fn.from_row),
|
|
scope_fn_nr: fn.outer != null ? fn.outer : 0,
|
|
params: params_list
|
|
}
|
|
|
|
// Record params as symbols.
|
|
if (fn.list != null) {
|
|
_j = 0
|
|
while (_j < length(fn.list)) {
|
|
if (fn.list[_j].name != null) {
|
|
symbols[] = {
|
|
symbol_id: filename + ":" + fn.list[_j].name + ":param",
|
|
name: fn.list[_j].name,
|
|
kind: "param",
|
|
decl_span: make_span(fn.list[_j]),
|
|
doc_comment: null,
|
|
scope_fn_nr: fn.function_nr,
|
|
params: null
|
|
}
|
|
}
|
|
_j = _j + 1
|
|
}
|
|
}
|
|
|
|
// Walk function body.
|
|
walk_stmts(fn.statements, sym_id)
|
|
walk_stmts(fn.disruption, sym_id)
|
|
|
|
_i = _i + 1
|
|
}
|
|
}
|
|
|
|
// --- 2. Walk top-level statements ---
|
|
walk_stmts(ast.statements, null)
|
|
|
|
// --- 3. Detect exports for .cm modules ---
|
|
if (!is_actor && ast.statements != null) {
|
|
_i = length(ast.statements) - 1
|
|
while (_i >= 0) {
|
|
if (ast.statements[_i].kind == "return" && ast.statements[_i].expression != null) {
|
|
// Check if the return expression is a record literal with key-value pairs.
|
|
if (ast.statements[_i].expression.list != null) {
|
|
_j = 0
|
|
while (_j < length(ast.statements[_i].expression.list)) {
|
|
entry = ast.statements[_i].expression.list[_j]
|
|
if (entry != null && entry.left != null && entry.left.name != null) {
|
|
// Link the export to a symbol if the value is a name reference.
|
|
sym_id = null
|
|
if (entry.right != null && entry.right.kind == "name" && entry.right.function_nr != null) {
|
|
sym_id = resolve_symbol_id(entry.right)
|
|
}
|
|
exports_list[] = {
|
|
name: entry.left.name,
|
|
symbol_id: sym_id
|
|
}
|
|
}
|
|
_j = _j + 1
|
|
}
|
|
}
|
|
break
|
|
}
|
|
_i = _i - 1
|
|
}
|
|
}
|
|
|
|
// --- 4. Build reverse refs ---
|
|
_i = 0
|
|
while (_i < length(references)) {
|
|
key = references[_i].name
|
|
if (reverse[key] == null) {
|
|
reverse[key] = []
|
|
}
|
|
reverse[key][] = {
|
|
node_id: references[_i].node_id,
|
|
span: references[_i].span,
|
|
enclosing: references[_i].enclosing,
|
|
ref_kind: references[_i].ref_kind
|
|
}
|
|
_i = _i + 1
|
|
}
|
|
|
|
return {
|
|
version: 1,
|
|
path: filename,
|
|
is_actor: is_actor,
|
|
imports: imports,
|
|
symbols: symbols,
|
|
references: references,
|
|
intrinsic_refs: intrinsic_refs,
|
|
call_sites: call_sites,
|
|
exports: exports_list,
|
|
reverse_refs: reverse
|
|
}
|
|
}
|
|
|
|
return {
|
|
index_ast: index_ast
|
|
}
|