Merge branch 'pit_lsp' into fix_libs

This commit is contained in:
2026-02-16 21:53:11 -06:00
9 changed files with 1519 additions and 42 deletions

View File

@@ -57,7 +57,9 @@ Modules loaded with `use()`:
## Tools
- [**Command Line**](/docs/cli/) — the `pit` tool
- [**Semantic Index**](/docs/semantic-index/) — index and query symbols, references, and call sites
- [**Testing**](/docs/testing/) — writing and running tests
- [**Compiler Inspection**](/docs/compiler-tools/) — dump AST, mcode, and optimizer reports
- [**Writing C Modules**](/docs/c-modules/) — native extensions
## Architecture

270
docs/semantic-index.md Normal file
View File

@@ -0,0 +1,270 @@
---
title: "Semantic Index"
description: "Index and query symbols, references, and call sites in source files"
weight: 55
type: "docs"
---
ƿit includes a semantic indexer that extracts symbols, references, call sites, and imports from source files. The index powers the LSP (find references, rename) and is available as a CLI tool for scripting and debugging.
## Overview
The indexer walks the parsed AST without modifying it. It produces a JSON structure that maps every declaration, every reference to that declaration, and every call site in a file.
```
source → tokenize → parse → fold → index
symbols, references,
call sites, imports,
exports, reverse refs
```
Two CLI commands expose this:
| Command | Purpose |
|---------|---------|
| `pit index <file>` | Produce the full semantic index as JSON |
| `pit explain` | Query the index for a specific symbol or position |
## pit index
Index a source file and print the result as JSON.
```bash
pit index <file.ce|file.cm>
pit index <file> -o output.json
```
### Output
The index contains these sections:
| Section | Description |
|---------|-------------|
| `imports` | All `use()` calls with local name, module path, and span |
| `symbols` | Every declaration: vars, defs, functions, params |
| `references` | Every use of a name, classified as read, write, or call |
| `call_sites` | Every function call with callee, args count, and enclosing function |
| `exports` | For `.cm` modules, the keys of the top-level `return` record |
| `reverse_refs` | Inverted index: name to list of reference spans |
### Example
Given a file `graph.ce` with functions `make_node`, `connect`, and `build_graph`:
```bash
pit index graph.ce
```
```json
{
"version": 1,
"path": "graph.ce",
"is_actor": true,
"imports": [
{"local_name": "json", "module_path": "json", "span": {"from_row": 2, "from_col": 0, "to_row": 2, "to_col": 22}}
],
"symbols": [
{
"symbol_id": "graph.ce:make_node:fn",
"name": "make_node",
"kind": "fn",
"params": ["name", "kind"],
"doc_comment": "// A node in the graph.",
"decl_span": {"from_row": 6, "from_col": 0, "to_row": 8, "to_col": 1},
"scope_fn_nr": 0
}
],
"references": [
{"node_id": 20, "name": "make_node", "ref_kind": "call", "span": {"from_row": 17, "from_col": 13, "to_row": 17, "to_col": 22}}
],
"call_sites": [
{"node_id": 20, "callee": "make_node", "args_count": 2, "span": {"from_row": 17, "from_col": 22, "to_row": 17, "to_col": 40}}
],
"exports": [],
"reverse_refs": {
"make_node": [
{"node_id": 20, "ref_kind": "call", "span": {"from_row": 17, "from_col": 13, "to_row": 17, "to_col": 22}}
]
}
}
```
### Symbol Kinds
| Kind | Description |
|------|-------------|
| `fn` | Function (var or def with function value) |
| `var` | Mutable variable |
| `def` | Constant |
| `param` | Function parameter |
Each symbol has a `symbol_id` in the format `filename:name:kind` and a `decl_span` with `from_row`, `from_col`, `to_row`, `to_col` (0-based).
### Reference Kinds
| Kind | Description |
|------|-------------|
| `read` | Value is read |
| `write` | Value is assigned |
| `call` | Used as a function call target |
### Module Exports
For `.cm` files, the indexer detects the top-level `return` statement. If it returns a record literal, each key becomes an export linked to its symbol:
```javascript
// math_utils.cm
var add = function(a, b) { return a + b }
var sub = function(a, b) { return a - b }
return {add: add, sub: sub}
```
```bash
pit index math_utils.cm
```
The `exports` section will contain:
```json
[
{"name": "add", "symbol_id": "math_utils.cm:add:fn"},
{"name": "sub", "symbol_id": "math_utils.cm:sub:fn"}
]
```
## pit explain
Query the semantic index for a specific symbol or cursor position. This is the targeted query interface — instead of dumping the full index, it answers a specific question.
```bash
pit explain --span <file>:<line>:<col>
pit explain --symbol <name> <file>
```
### --span: What is at this position?
Point at a line and column (0-based) to find out what symbol or reference is there.
```bash
pit explain --span demo.ce:6:4
```
If the position lands on a declaration, that symbol is returned along with all its references and call sites. If it lands on a reference, the indexer traces back to the declaration and returns the same information.
The result includes:
| Field | Description |
|-------|-------------|
| `symbol` | The resolved declaration (name, kind, params, doc comment, span) |
| `reference` | The reference at the cursor, if the cursor was on a reference |
| `references` | All references to this symbol across the file |
| `call_sites` | All call sites for this symbol |
| `imports` | The file's imports (for context) |
```json
{
"symbol": {
"name": "build_graph",
"symbol_id": "demo.ce:build_graph:fn",
"kind": "fn",
"params": [],
"doc_comment": "// Build a sample graph and return it."
},
"references": [
{"node_id": 71, "ref_kind": "call", "span": {"from_row": 39, "from_col": 12, "to_row": 39, "to_col": 23}}
],
"call_sites": []
}
```
### --symbol: Find a symbol by name
Look up a symbol by name, returning all matching declarations and every reference.
```bash
pit explain --symbol connect demo.ce
```
```json
{
"symbols": [
{
"name": "connect",
"symbol_id": "demo.ce:connect:fn",
"kind": "fn",
"params": ["from", "to", "label"],
"doc_comment": "// Connect two nodes with a labeled edge."
}
],
"references": [
{"node_id": 29, "ref_kind": "call", "span": {"from_row": 21, "from_col": 2, "to_row": 21, "to_col": 9}},
{"node_id": 33, "ref_kind": "call", "span": {"from_row": 22, "from_col": 2, "to_row": 22, "to_col": 9}},
{"node_id": 37, "ref_kind": "call", "span": {"from_row": 23, "from_col": 2, "to_row": 23, "to_col": 9}}
],
"call_sites": [
{"callee": "connect", "args_count": 3, "span": {"from_row": 21, "from_col": 9, "to_row": 21, "to_col": 29}},
{"callee": "connect", "args_count": 3, "span": {"from_row": 22, "from_col": 9, "to_row": 22, "to_col": 31}},
{"callee": "connect", "args_count": 3, "span": {"from_row": 23, "from_col": 9, "to_row": 23, "to_col": 29}}
]
}
```
This tells you: `connect` is a function taking `(from, to, label)`, declared on line 11, and called 3 times inside `build_graph`.
## Programmatic Use
The index and explain modules can be used directly from ƿit scripts:
### index.cm
```javascript
var tokenize_mod = use('tokenize')
var parse_mod = use('parse')
var fold_mod = use('fold')
var index_mod = use('index')
var pipeline = {tokenize: tokenize_mod, parse: parse_mod, fold: fold_mod}
var idx = index_mod.index_file(src, filename, pipeline)
```
`index_file` runs the full pipeline (tokenize, parse, fold) and returns the index. If you already have a parsed AST and tokens, use `index_ast` instead:
```javascript
var idx = index_mod.index_ast(ast, tokens, filename)
```
### explain.cm
```javascript
var explain_mod = use('explain')
var expl = explain_mod.make(idx)
// What is at line 10, column 5?
var result = expl.at_span(10, 5)
// Find all symbols named "connect"
var result = expl.by_symbol("connect")
// Get callers and callees of a symbol
var chain = expl.call_chain("demo.ce:connect:fn", 2)
```
For cross-file queries:
```javascript
var result = explain_mod.explain_across([idx1, idx2, idx3], "connect")
```
## LSP Integration
The semantic index powers these LSP features:
| Feature | LSP Method | Description |
|---------|------------|-------------|
| Find References | `textDocument/references` | All references to the symbol under the cursor |
| Rename | `textDocument/rename` | Rename a symbol and all its references |
| Prepare Rename | `textDocument/prepareRename` | Validate that the cursor is on a renameable symbol |
| Go to Definition | `textDocument/definition` | Jump to a symbol's declaration (index-backed with AST fallback) |
These work automatically in any editor with ƿit LSP support. The index is rebuilt on every file change.

View File

@@ -1,10 +1,10 @@
// Document analysis module.
// Call make(tokenize_mod, parse_mod) to get an analysis object.
// Call make(tokenize_mod, parse_mod, index_mod) to get an analysis object.
var json = use('json')
// Create an analysis module bound to the tokenize and parse functions.
var make = function(tokenize_mod, parse_mod) {
// Create an analysis module bound to the tokenize, parse, and index functions.
var make = function(tokenize_mod, parse_mod, index_mod) {
// Tokenize and parse a document, storing the results.
var update = function(docs, uri, params) {
@@ -36,13 +36,24 @@ var make = function(tokenize_mod, parse_mod) {
}
}
var idx = null
var do_index = function() {
idx = index_mod.index_ast(ast, (tok_result != null) ? tok_result.tokens : [], uri)
} disruption {
// indexing failure is non-fatal
}
if (ast != null && index_mod != null) {
do_index()
}
doc = {
uri: uri,
text: src,
version: version,
tokens: (tok_result != null) ? tok_result.tokens : [],
ast: ast,
errors: errors
errors: errors,
index: idx
}
docs[uri] = doc
return doc

View File

@@ -13,9 +13,11 @@ var symbols = use('symbols')
// These are the same functions the compiler uses internally.
var tokenize_mod = use('tokenize')
var parse_mod = use('parse')
var index_mod = use('index')
var explain_mod = use('explain')
// Create analysis module bound to tokenize/parse
var analysis = analysis_make(tokenize_mod, parse_mod)
// Create analysis module bound to tokenize/parse/index
var analysis = analysis_make(tokenize_mod, parse_mod, index_mod)
// Document store: URI -> {text, version, ast, tokens, errors}
var docs = {}
@@ -54,7 +56,9 @@ var handle_initialize = function(id, params) {
},
hoverProvider: true,
definitionProvider: true,
documentSymbolProvider: true
documentSymbolProvider: true,
referencesProvider: true,
renameProvider: {prepareProvider: true}
},
serverInfo: {
name: "pit-lsp",
@@ -144,6 +148,159 @@ var handle_document_symbol = function(id, params) {
protocol.respond(id, result)
}
// Handle textDocument/references request.
var handle_references = function(id, params) {
var uri = params.textDocument.uri
var pos = params.position
var doc = docs[uri]
var result = []
var tok = null
var name = null
var refs = null
var _i = 0
var ref = null
var expl = null
var sym_result = null
if (doc != null && doc.index != null) {
tok = analysis.token_at(doc, pos.line, pos.character)
if (tok != null && tok.kind == "name" && tok.value != null) {
name = tok.value
refs = doc.index.reverse_refs[name]
if (refs != null) {
_i = 0
while (_i < length(refs)) {
ref = refs[_i]
if (ref.span != null) {
result[] = {
uri: uri,
range: {
start: {line: ref.span.from_row, character: ref.span.from_col},
end: {line: ref.span.to_row, character: ref.span.to_col}
}
}
}
_i = _i + 1
}
}
// Also include the declaration itself if found
expl = explain_mod.make(doc.index)
sym_result = expl.by_symbol(name)
if (sym_result != null && length(sym_result.symbols) > 0) {
_i = 0
while (_i < length(sym_result.symbols)) {
if (sym_result.symbols[_i].decl_span != null) {
result[] = {
uri: uri,
range: {
start: {line: sym_result.symbols[_i].decl_span.from_row, character: sym_result.symbols[_i].decl_span.from_col},
end: {line: sym_result.symbols[_i].decl_span.to_row, character: sym_result.symbols[_i].decl_span.to_col}
}
}
}
_i = _i + 1
}
}
}
}
protocol.respond(id, result)
}
// Handle textDocument/prepareRename request.
var handle_prepare_rename = function(id, params) {
var uri = params.textDocument.uri
var pos = params.position
var doc = docs[uri]
var tok = null
var name = null
var result = null
var expl = null
var sym_result = null
if (doc != null) {
tok = analysis.token_at(doc, pos.line, pos.character)
if (tok != null && tok.kind == "name" && tok.value != null) {
name = tok.value
// Don't allow renaming intrinsics
if (doc.index != null) {
expl = explain_mod.make(doc.index)
sym_result = expl.by_symbol(name)
if (sym_result != null && length(sym_result.symbols) > 0) {
result = {
range: {
start: {line: tok.from_row, character: tok.from_column},
end: {line: tok.to_row, character: tok.to_column}
},
placeholder: name
}
}
}
}
}
protocol.respond(id, result)
}
// Handle textDocument/rename request.
var handle_rename = function(id, params) {
var uri = params.textDocument.uri
var pos = params.position
var new_name = params.newName
var doc = docs[uri]
var tok = null
var name = null
var edits = []
var refs = null
var _i = 0
var ref = null
var expl = null
var sym_result = null
if (doc != null && doc.index != null) {
tok = analysis.token_at(doc, pos.line, pos.character)
if (tok != null && tok.kind == "name" && tok.value != null) {
name = tok.value
expl = explain_mod.make(doc.index)
sym_result = expl.by_symbol(name)
// Add edit for declaration
if (sym_result != null && length(sym_result.symbols) > 0) {
_i = 0
while (_i < length(sym_result.symbols)) {
if (sym_result.symbols[_i].decl_span != null) {
edits[] = {
range: {
start: {line: sym_result.symbols[_i].decl_span.from_row, character: sym_result.symbols[_i].decl_span.from_col},
end: {line: sym_result.symbols[_i].decl_span.to_row, character: sym_result.symbols[_i].decl_span.to_col}
},
newText: new_name
}
}
_i = _i + 1
}
}
// Add edits for all references
refs = doc.index.reverse_refs[name]
if (refs != null) {
_i = 0
while (_i < length(refs)) {
ref = refs[_i]
if (ref.span != null) {
edits[] = {
range: {
start: {line: ref.span.from_row, character: ref.span.from_col},
end: {line: ref.span.to_row, character: ref.span.to_col}
},
newText: new_name
}
}
_i = _i + 1
}
}
}
}
var changes = {}
if (length(edits) > 0) {
changes[uri] = edits
}
protocol.respond(id, {changes: changes})
}
// Dispatch a single message. Wrapped in a function for disruption handling.
var dispatch_message = function(msg) {
var method = msg.method
@@ -167,6 +324,12 @@ var dispatch_message = function(msg) {
handle_definition(msg.id, msg.params)
} else if (method == "textDocument/documentSymbol") {
handle_document_symbol(msg.id, msg.params)
} else if (method == "textDocument/references") {
handle_references(msg.id, msg.params)
} else if (method == "textDocument/prepareRename") {
handle_prepare_rename(msg.id, msg.params)
} else if (method == "textDocument/rename") {
handle_rename(msg.id, msg.params)
} else if (method == "shutdown") {
protocol.respond(msg.id, null)
return "shutdown"

View File

@@ -91,14 +91,12 @@ var document_symbols = function(doc) {
}
// Find the declaration location of a name at a given position.
// Uses the semantic index when available, falls back to AST walk.
var definition = function(doc, line, col, token_at) {
var tok = token_at(doc, line, col)
var ast = doc.ast
var name = null
var _i = 0
var _j = 0
var scope = null
var v = null
var sym = null
var decl = null
if (tok == null || tok.kind != "name" || tok.value == null) {
@@ -107,32 +105,18 @@ var definition = function(doc, line, col, token_at) {
name = tok.value
if (ast == null) {
return null
}
// Search through scopes for the variable declaration
if (ast.scopes != null) {
// Use the semantic index if available
if (doc.index != null) {
_i = 0
while (_i < length(ast.scopes)) {
scope = ast.scopes[_i]
if (scope.vars != null) {
_j = 0
while (_j < length(scope.vars)) {
v = scope.vars[_j]
if (v.name == name) {
decl = find_declaration(ast.statements, name)
if (decl != null) {
return {
uri: doc.uri,
range: {
start: {line: decl.from_row, character: decl.from_column},
end: {line: decl.to_row, character: decl.to_column}
}
}
}
while (_i < length(doc.index.symbols)) {
sym = doc.index.symbols[_i]
if (sym.name == name && sym.decl_span != null) {
return {
uri: doc.uri,
range: {
start: {line: sym.decl_span.from_row, character: sym.decl_span.from_col},
end: {line: sym.decl_span.to_row, character: sym.decl_span.to_col}
}
_j = _j + 1
}
}
_i = _i + 1
@@ -140,13 +124,15 @@ var definition = function(doc, line, col, token_at) {
}
// Fallback: walk statements for var/def with this name
decl = find_declaration(ast.statements, name)
if (decl != null) {
return {
uri: doc.uri,
range: {
start: {line: decl.from_row, character: decl.from_column},
end: {line: decl.to_row, character: decl.to_column}
if (doc.ast != null) {
decl = find_declaration(doc.ast.statements, name)
if (decl != null) {
return {
uri: doc.uri,
range: {
start: {line: decl.from_row, character: decl.from_column},
end: {line: decl.to_row, character: decl.to_column}
}
}
}
}

127
explain.ce Normal file
View File

@@ -0,0 +1,127 @@
// cell explain — Query the semantic index for a source file.
//
// Usage:
// cell explain --span file.ce:10:5 Find symbol at position
// cell explain --symbol add_node Find symbol by name
// cell explain --symbol add_node file.ce Limit to specific file
// cell explain --help Show this help
var fd = use('fd')
var json = use('json')
var tokenize_mod = use('tokenize')
var parse_mod = use('parse')
var fold_mod = use('fold')
var index_mod = use('index')
var explain_mod = use('explain')
var mode = null
var span_arg = null
var symbol_name = null
var file_arg = null
var i = 0
var parts = null
var filename = null
var line = null
var col = null
var src = null
var idx = null
var explain = null
var result = null
var pipeline = {tokenize: tokenize_mod, parse: parse_mod, fold: fold_mod}
for (i = 0; i < length(args); i++) {
if (args[i] == '--span') {
mode = "span"
if (i + 1 < length(args)) {
span_arg = args[i + 1]
i = i + 1
} else {
log.error('--span requires file:line:col')
$stop()
}
} else if (args[i] == '--symbol') {
mode = "symbol"
if (i + 1 < length(args)) {
symbol_name = args[i + 1]
i = i + 1
} else {
log.error('--symbol requires a name')
$stop()
}
} else if (args[i] == '--help' || args[i] == '-h') {
log.console("Usage: cell explain [options]")
log.console("")
log.console("Query the semantic index for a source file.")
log.console("")
log.console("Options:")
log.console(" --span file:line:col Find symbol at position")
log.console(" --symbol name [file] Find symbol by name")
$stop()
} else if (!starts_with(args[i], '-')) {
if (file_arg == null) {
file_arg = args[i]
}
}
}
if (mode == null) {
log.error('Specify --span or --symbol. Use --help for usage.')
$stop()
}
if (mode == "span") {
parts = array(span_arg, ":")
if (length(parts) < 3) {
log.error('--span requires file:line:col format')
$stop()
}
filename = parts[0]
line = number(parts[1])
col = number(parts[2])
if (!fd.is_file(filename)) {
log.error('File not found: ' + filename)
$stop()
}
src = text(fd.slurp(filename))
idx = index_mod.index_file(src, filename, pipeline)
explain = explain_mod.make(idx)
result = explain.at_span(line, col)
if (result == null) {
log.console("Nothing found at " + filename + ":" + text(line) + ":" + text(col))
} else {
print(json.encode(result, true))
print("\n")
}
}
if (mode == "symbol") {
filename = file_arg
if (filename == null) {
log.error('--symbol requires a file argument')
$stop()
}
if (!fd.is_file(filename)) {
log.error('File not found: ' + filename)
$stop()
}
src = text(fd.slurp(filename))
idx = index_mod.index_file(src, filename, pipeline)
explain = explain_mod.make(idx)
result = explain.by_symbol(symbol_name)
if (result == null || length(result.symbols) == 0) {
log.console("Symbol '" + symbol_name + "' not found in " + filename)
} else {
print(json.encode(result, true))
print("\n")
}
}
$stop()

235
explain.cm Normal file
View File

@@ -0,0 +1,235 @@
// explain.cm — Query module over a semantic index.
//
// Usage:
// var explain = use('explain').make(index)
// explain.at_span(line, col)
// explain.by_symbol(name)
// explain.call_chain(symbol_id, depth)
// Check if a position (line, col) falls inside a span.
var span_contains = function(span, line, col) {
if (line < span.from_row || line > span.to_row) return false
if (line == span.from_row && col < span.from_col) return false
if (line == span.to_row && col > span.to_col) return false
return true
}
// Create an explain interface bound to a single file index.
var make = function(index) {
// Find symbol or reference at a given line/col position.
var at_span = function(line, col) {
var _i = 0
var sym = null
var ref = null
var found_sym = null
var found_ref = null
var result_refs = []
var result_calls = []
// Search symbols for one whose decl_span contains (line, col).
_i = 0
while (_i < length(index.symbols)) {
sym = index.symbols[_i]
if (sym.decl_span != null && span_contains(sym.decl_span, line, col)) {
found_sym = sym
break
}
_i = _i + 1
}
// If no symbol found, search references.
if (found_sym == null) {
_i = 0
while (_i < length(index.references)) {
ref = index.references[_i]
if (ref.span != null && span_contains(ref.span, line, col)) {
found_ref = ref
// Look up the symbol this reference points to.
if (ref.symbol_id != null) {
_i = 0
while (_i < length(index.symbols)) {
if (index.symbols[_i].symbol_id == ref.symbol_id) {
found_sym = index.symbols[_i]
break
}
_i = _i + 1
}
}
break
}
_i = _i + 1
}
}
if (found_sym == null && found_ref == null) return null
// Gather all references to this symbol.
if (found_sym != null && index.reverse_refs[found_sym.name] != null) {
result_refs = index.reverse_refs[found_sym.name]
}
// Gather call sites.
_i = 0
while (_i < length(index.call_sites)) {
if (found_sym != null && index.call_sites[_i].callee_symbol_id == found_sym.symbol_id) {
result_calls[] = index.call_sites[_i]
}
_i = _i + 1
}
return {
symbol: found_sym,
reference: found_ref,
references: result_refs,
call_sites: result_calls,
imports: index.imports
}
}
// Find all symbols matching a name.
var by_symbol = function(name) {
var _i = 0
var matches = []
var result_refs = []
var result_calls = []
// Find matching symbols.
_i = 0
while (_i < length(index.symbols)) {
if (index.symbols[_i].name == name) {
matches[] = index.symbols[_i]
}
_i = _i + 1
}
// Gather all references to this name.
if (index.reverse_refs[name] != null) {
result_refs = index.reverse_refs[name]
}
// Gather call sites where this name is the callee.
_i = 0
while (_i < length(index.call_sites)) {
if (index.call_sites[_i].callee == name) {
result_calls[] = index.call_sites[_i]
}
_i = _i + 1
}
return {
symbols: matches,
references: result_refs,
call_sites: result_calls
}
}
// Build a call chain from/to a symbol.
var call_chain = function(symbol_id, depth) {
var max_depth = (depth != null) ? depth : 2
var callers = []
var callees = []
var _i = 0
var cs = null
// Callees: calls made FROM this symbol.
_i = 0
while (_i < length(index.call_sites)) {
cs = index.call_sites[_i]
if (cs.enclosing == symbol_id) {
callees[] = {
callee: cs.callee,
callee_symbol_id: cs.callee_symbol_id,
span: cs.span,
args_count: cs.args_count
}
}
_i = _i + 1
}
// Callers: calls TO this symbol.
_i = 0
while (_i < length(index.call_sites)) {
cs = index.call_sites[_i]
if (cs.callee_symbol_id == symbol_id) {
callers[] = {
from: cs.enclosing,
span: cs.span,
args_count: cs.args_count
}
}
_i = _i + 1
}
return {
symbol_id: symbol_id,
callers: callers,
callees: callees,
depth: max_depth
}
}
return {
at_span: at_span,
by_symbol: by_symbol,
call_chain: call_chain
}
}
// Search across multiple file indexes.
var explain_across = function(indexes, name) {
var _i = 0
var _j = 0
var all_symbols = []
var all_refs = []
var all_calls = []
var idx = null
var refs = null
_i = 0
while (_i < length(indexes)) {
idx = indexes[_i]
// Gather symbols.
_j = 0
while (_j < length(idx.symbols)) {
if (idx.symbols[_j].name == name) {
all_symbols[] = idx.symbols[_j]
}
_j = _j + 1
}
// Gather references.
refs = idx.reverse_refs[name]
if (refs != null) {
_j = 0
while (_j < length(refs)) {
all_refs[] = refs[_j]
_j = _j + 1
}
}
// Gather call sites.
_j = 0
while (_j < length(idx.call_sites)) {
if (idx.call_sites[_j].callee == name) {
all_calls[] = idx.call_sites[_j]
}
_j = _j + 1
}
_i = _i + 1
}
return {
symbols: all_symbols,
references: all_refs,
call_sites: all_calls
}
}
return {
make: make,
explain_across: explain_across,
span_contains: span_contains
}

64
index.ce Normal file
View File

@@ -0,0 +1,64 @@
// cell index <file> — Build semantic index for a source file.
//
// Usage:
// cell index <file.ce|file.cm> Index one file, output JSON to stdout
// cell index <file> -o <output.json> Index one file, write to file
// cell index --help Show this help
var fd = use('fd')
var json = use('json')
var tokenize_mod = use('tokenize')
var parse_mod = use('parse')
var fold_mod = use('fold')
var index_mod = use('index')
var filename = null
var output_path = null
var i = 0
for (i = 0; i < length(args); i++) {
if (args[i] == '-o' || args[i] == '--output') {
if (i + 1 < length(args)) {
output_path = args[i + 1]
i = i + 1
} else {
log.error('-o requires a file path')
$stop()
}
} else if (args[i] == '--help' || args[i] == '-h') {
log.console("Usage: cell index <file.ce|file.cm> [options]")
log.console("")
log.console("Build a semantic index for a source file.")
log.console("")
log.console("Options:")
log.console(" -o <path> Write output to file instead of stdout")
$stop()
} else if (!starts_with(args[i], '-')) {
filename = args[i]
}
}
if (filename == null) {
log.error('No file specified. Usage: cell index <file>')
$stop()
}
if (!fd.is_file(filename)) {
log.error('File not found: ' + filename)
$stop()
}
var src = text(fd.slurp(filename))
var pipeline = {tokenize: tokenize_mod, parse: parse_mod, fold: fold_mod}
var idx = index_mod.index_file(src, filename, pipeline)
var out = json.encode(idx, true)
if (output_path != null) {
fd.slurpwrite(output_path, out)
log.console('Wrote index to ' + output_path)
} else {
print(out)
print("\n")
}
$stop()

619
index.cm Normal file
View File

@@ -0,0 +1,619 @@
// index.cm — Core semantic indexing module.
// Walks AST output from parse (+ optional fold) to build a semantic index.
//
// Two entry points:
// index_file(src, filename, tokenize_mod, parse_mod, fold_mod) — full pipeline
// index_ast(ast, tokens, filename) — index a pre-parsed AST
var make_span = function(node) {
return {
from_row: node.from_row,
from_col: node.from_column,
to_row: node.to_row,
to_col: node.to_column
}
}
// Index an already-parsed AST. Tokens are optional (used for doc comments).
var index_ast = function(ast, tokens, filename) {
var is_actor = ends_with(filename, ".ce")
var imports = []
var symbols = []
var references = []
var call_sites = []
var exports_list = []
var node_counter = 0
var fn_map = {}
var _i = 0
var _j = 0
var fn = null
var sym_id = null
var params_list = null
var scope = null
var keys = null
var key = null
var entry = null
var reverse = {}
// Build function_nr -> {name, outer, from_row} map from ast.functions.
if (ast.functions != null) {
_i = 0
while (_i < length(ast.functions)) {
fn = ast.functions[_i]
fn_map[text(fn.function_nr)] = {
name: fn.name,
outer: fn.outer,
from_row: fn.from_row
}
_i = _i + 1
}
}
// Walk scope chain upward by `lvl` levels from func_nr.
var resolve_scope_nr = function(func_nr, lvl) {
var current = func_nr
var remaining = lvl
var info = null
if (remaining == null || remaining < 0) return null
while (remaining > 0 && current != null) {
info = fn_map[text(current)]
if (info != null) {
current = info.outer
} else {
return null
}
remaining = remaining - 1
}
return current
}
// Resolve a name node to its symbol_id using scope chain.
var resolve_symbol_id = function(name_node) {
var decl_fn_nr = resolve_scope_nr(name_node.function_nr, name_node.level)
var _si = 0
var s = null
var e = null
var kind_str = null
if (decl_fn_nr == null) return null
if (ast.scopes == null) return null
_si = 0
while (_si < length(ast.scopes)) {
s = ast.scopes[_si]
if (s.function_nr == decl_fn_nr) {
e = s[name_node.name]
if (e != null) {
kind_str = e.make
if (kind_str == "function") kind_str = "fn"
if (kind_str == "input") kind_str = "param"
return filename + ":" + name_node.name + ":" + kind_str
}
}
_si = _si + 1
}
return null
}
// Get enclosing symbol id for a function_nr.
var get_enclosing = function(func_nr) {
var info = fn_map[text(func_nr)]
if (info == null || func_nr == 0) return null
if (info.name != null) return filename + ":" + info.name + ":fn"
return null
}
// Find doc comment in tokens immediately before target_row.
var find_doc_comment = function(target_row) {
var _ti = 0
var tok = null
var lines = []
var line_nr = null
if (tokens == null) return null
_ti = 0
while (_ti < length(tokens)) {
tok = tokens[_ti]
if (tok.kind == "comment" && tok.from_row >= target_row - 10 && tok.from_row < target_row) {
lines[] = tok.value
}
if (tok.from_row >= target_row) break
_ti = _ti + 1
}
if (length(lines) > 0) return text(lines, "\n")
return null
}
// Allocate a monotonic node id.
var next_id = function() {
node_counter = node_counter + 1
return node_counter
}
// Forward declarations for mutual recursion.
var walk_expr = null
var walk_stmts = null
var walk_stmt = null
// Walk an expression node, collecting references and call sites.
walk_expr = function(node, enclosing, is_lhs) {
var nid = 0
var ref_kind = null
var callee_name = null
var callee_sym = null
var arg_count = 0
var _ai = 0
var enc = null
var param_name = null
if (node == null) return
nid = next_id()
// Name reference — has function_nr when it's a true variable reference.
if (node.kind == "name" && node.name != null && node.function_nr != null) {
if (node.intrinsic != true) {
ref_kind = is_lhs ? "write" : "read"
references[] = {
node_id: nid,
name: node.name,
symbol_id: resolve_symbol_id(node),
span: make_span(node),
enclosing: enclosing,
ref_kind: ref_kind
}
}
}
// Call expression.
if (node.kind == "(") {
callee_name = null
callee_sym = null
arg_count = (node.list != null) ? length(node.list) : 0
if (node.expression != null) {
if (node.expression.kind == "name") {
callee_name = node.expression.name
if (node.expression.intrinsic != true && node.expression.function_nr != null) {
callee_sym = resolve_symbol_id(node.expression)
}
} else if (node.expression.kind == ".") {
if (node.expression.left != null && node.expression.left.kind == "name") {
callee_name = node.expression.left.name
}
if (node.expression.right != null && node.expression.right.name != null) {
callee_name = (callee_name != null ? callee_name + "." : "") + node.expression.right.name
}
}
}
if (callee_name != "use") {
call_sites[] = {
node_id: nid,
callee: callee_name,
callee_symbol_id: callee_sym,
span: make_span(node),
enclosing: enclosing,
args_count: arg_count
}
}
// Also record the callee name as a "call" reference.
if (node.expression != null && node.expression.kind == "name" &&
node.expression.function_nr != null && node.expression.intrinsic != true) {
references[] = {
node_id: nid,
name: node.expression.name,
symbol_id: resolve_symbol_id(node.expression),
span: make_span(node.expression),
enclosing: enclosing,
ref_kind: "call"
}
}
// Walk callee expression (skip name — already recorded above).
if (node.expression != null && node.expression.kind != "name") {
walk_expr(node.expression, enclosing, false)
}
// Walk arguments.
if (node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
walk_expr(node.list[_ai], enclosing, false)
_ai = _ai + 1
}
}
return
}
// Function / arrow function expression — walk body.
if (node.kind == "function" || node.kind == "arrow function") {
enc = enclosing
if (node.name != null && node.function_nr != null) {
enc = filename + ":" + node.name + ":fn"
}
// Record params as symbols.
if (node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
param_name = node.list[_ai].name
if (param_name != null) {
symbols[] = {
symbol_id: filename + ":" + param_name + ":param",
name: param_name,
kind: "param",
decl_span: make_span(node.list[_ai]),
doc_comment: null,
scope_fn_nr: node.function_nr,
params: null
}
}
_ai = _ai + 1
}
}
walk_stmts(node.statements, enc)
walk_stmts(node.disruption, enc)
return
}
// Assignment operators — left side is a write.
if (node.kind == "=" || node.kind == "+=" || node.kind == "-=" ||
node.kind == "*=" || node.kind == "/=" || node.kind == "%=") {
walk_expr(node.left, enclosing, true)
walk_expr(node.right, enclosing, false)
return
}
// Property access — only walk left (right is property name, not a ref).
if (node.kind == ".") {
walk_expr(node.left, enclosing, false)
return
}
// Index access.
if (node.kind == "[") {
walk_expr(node.left, enclosing, false)
walk_expr(node.right, enclosing, false)
return
}
// Array literal.
if (node.kind == "array" && node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
walk_expr(node.list[_ai], enclosing, false)
_ai = _ai + 1
}
return
}
// Record literal — only walk values, not keys.
if (node.kind == "record" && node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
if (node.list[_ai] != null) {
walk_expr(node.list[_ai].right, enclosing, false)
}
_ai = _ai + 1
}
return
}
// Template literal.
if (node.kind == "template" && node.list != null) {
_ai = 0
while (_ai < length(node.list)) {
walk_expr(node.list[_ai], enclosing, false)
_ai = _ai + 1
}
return
}
// Prefix/postfix increment/decrement — treat as write.
if (node.kind == "++" || node.kind == "--") {
walk_expr(node.expression, enclosing, true)
return
}
// Ternary.
if (node.kind == "?" || node.kind == "then") {
walk_expr(node.expression, enclosing, false)
walk_expr(node.then, enclosing, false)
walk_expr(node.else, enclosing, false)
return
}
// Generic fallthrough: walk left, right, expression.
if (node.left != null) walk_expr(node.left, enclosing, is_lhs)
if (node.right != null) walk_expr(node.right, enclosing, false)
if (node.expression != null) walk_expr(node.expression, enclosing, false)
}
// Walk an array of statements.
walk_stmts = function(stmts, enclosing) {
var _wi = 0
if (stmts == null) return
_wi = 0
while (_wi < length(stmts)) {
walk_stmt(stmts[_wi], enclosing)
_wi = _wi + 1
}
}
// Walk a single statement.
walk_stmt = function(stmt, enclosing) {
var sym_kind = null
var s_id = null
var p_list = null
var _di = 0
var local_name = null
if (stmt == null) return
// Variable/constant declaration.
if (stmt.kind == "var" || stmt.kind == "def") {
if (stmt.left != null && stmt.left.name != null) {
sym_kind = stmt.kind
p_list = null
// Check if RHS is a function expression.
if (stmt.right != null && (stmt.right.kind == "function" || stmt.right.kind == "arrow function")) {
sym_kind = "fn"
p_list = []
if (stmt.right.list != null) {
_di = 0
while (_di < length(stmt.right.list)) {
if (stmt.right.list[_di].name != null) {
p_list[] = stmt.right.list[_di].name
}
_di = _di + 1
}
}
}
s_id = filename + ":" + stmt.left.name + ":" + sym_kind
symbols[] = {
symbol_id: s_id,
name: stmt.left.name,
kind: sym_kind,
decl_span: make_span(stmt),
doc_comment: find_doc_comment(stmt.from_row),
scope_fn_nr: 0,
params: p_list
}
// Check for import: var x = use('path').
if (stmt.right != null && stmt.right.kind == "(" &&
stmt.right.expression != null && stmt.right.expression.name == "use" &&
stmt.right.list != null && length(stmt.right.list) > 0 &&
stmt.right.list[0].kind == "text") {
imports[] = {
local_name: stmt.left.name,
module_path: stmt.right.list[0].value,
span: make_span(stmt)
}
}
}
walk_expr(stmt.right, enclosing, false)
return
}
// Multiple declarations (var_list).
if (stmt.kind == "var_list" && stmt.list != null) {
_di = 0
while (_di < length(stmt.list)) {
walk_stmt(stmt.list[_di], enclosing)
_di = _di + 1
}
return
}
// Expression statement.
if (stmt.kind == "call") {
// Check for bare use() as expression statement.
if (stmt.expression != null && stmt.expression.kind == "(" &&
stmt.expression.expression != null && stmt.expression.expression.name == "use" &&
stmt.expression.list != null && length(stmt.expression.list) > 0 &&
stmt.expression.list[0].kind == "text") {
imports[] = {
local_name: null,
module_path: stmt.expression.list[0].value,
span: make_span(stmt)
}
}
walk_expr(stmt.expression, enclosing, false)
return
}
// If statement.
if (stmt.kind == "if") {
walk_expr(stmt.expression, enclosing, false)
walk_stmts(stmt.then, enclosing)
if (stmt.else != null) {
walk_stmts(stmt.else, enclosing)
}
// else-if chain.
if (stmt.list != null) {
walk_stmts(stmt.list, enclosing)
}
return
}
// While loop.
if (stmt.kind == "while") {
walk_expr(stmt.expression, enclosing, false)
walk_stmts(stmt.statements, enclosing)
return
}
// For loop.
if (stmt.kind == "for") {
walk_expr(stmt.init, enclosing, false)
walk_expr(stmt.test, enclosing, false)
walk_expr(stmt.update, enclosing, false)
walk_stmts(stmt.statements, enclosing)
return
}
// Do-while loop.
if (stmt.kind == "do") {
walk_stmts(stmt.statements, enclosing)
walk_expr(stmt.expression, enclosing, false)
return
}
// Return statement.
if (stmt.kind == "return") {
walk_expr(stmt.expression, enclosing, false)
return
}
// Disrupt.
if (stmt.kind == "disrupt") {
walk_expr(stmt.expression, enclosing, false)
return
}
// Block.
if (stmt.kind == "block") {
walk_stmts(stmt.statements, enclosing)
return
}
// Fallthrough: walk any sub-nodes.
walk_expr(stmt.expression, enclosing, false)
walk_expr(stmt.left, enclosing, false)
walk_expr(stmt.right, enclosing, false)
walk_stmts(stmt.statements, enclosing)
}
// --- 1. Process named functions from ast.functions ---
if (ast.functions != null) {
_i = 0
while (_i < length(ast.functions)) {
fn = ast.functions[_i]
sym_id = filename + ":" + (fn.name != null ? fn.name : "anon_" + text(fn.function_nr)) + ":fn"
params_list = []
if (fn.list != null) {
_j = 0
while (_j < length(fn.list)) {
if (fn.list[_j].name != null) {
params_list[] = fn.list[_j].name
}
_j = _j + 1
}
}
symbols[] = {
symbol_id: sym_id,
name: fn.name,
kind: "fn",
decl_span: make_span(fn),
doc_comment: find_doc_comment(fn.from_row),
scope_fn_nr: fn.outer != null ? fn.outer : 0,
params: params_list
}
// Record params as symbols.
if (fn.list != null) {
_j = 0
while (_j < length(fn.list)) {
if (fn.list[_j].name != null) {
symbols[] = {
symbol_id: filename + ":" + fn.list[_j].name + ":param",
name: fn.list[_j].name,
kind: "param",
decl_span: make_span(fn.list[_j]),
doc_comment: null,
scope_fn_nr: fn.function_nr,
params: null
}
}
_j = _j + 1
}
}
// Walk function body.
walk_stmts(fn.statements, sym_id)
walk_stmts(fn.disruption, sym_id)
_i = _i + 1
}
}
// --- 2. Walk top-level statements ---
walk_stmts(ast.statements, null)
// --- 3. Detect exports for .cm modules ---
if (!is_actor && ast.statements != null) {
_i = length(ast.statements) - 1
while (_i >= 0) {
if (ast.statements[_i].kind == "return" && ast.statements[_i].expression != null) {
// Check if the return expression is a record literal with key-value pairs.
if (ast.statements[_i].expression.list != null) {
_j = 0
while (_j < length(ast.statements[_i].expression.list)) {
entry = ast.statements[_i].expression.list[_j]
if (entry != null && entry.left != null && entry.left.name != null) {
// Link the export to a symbol if the value is a name reference.
sym_id = null
if (entry.right != null && entry.right.kind == "name" && entry.right.function_nr != null) {
sym_id = resolve_symbol_id(entry.right)
}
exports_list[] = {
name: entry.left.name,
symbol_id: sym_id
}
}
_j = _j + 1
}
}
break
}
_i = _i - 1
}
}
// --- 4. Build reverse refs ---
_i = 0
while (_i < length(references)) {
key = references[_i].name
if (reverse[key] == null) {
reverse[key] = []
}
reverse[key][] = {
node_id: references[_i].node_id,
span: references[_i].span,
enclosing: references[_i].enclosing,
ref_kind: references[_i].ref_kind
}
_i = _i + 1
}
return {
version: 1,
path: filename,
is_actor: is_actor,
imports: imports,
symbols: symbols,
references: references,
call_sites: call_sites,
exports: exports_list,
reverse_refs: reverse
}
}
// Run the full pipeline (tokenize -> parse -> fold) and index.
// pipeline is {tokenize, parse, fold} — pass fold as null to skip folding.
var index_file = function(src, filename, pipeline) {
var tok_result = pipeline.tokenize(src, filename)
var ast = pipeline.parse(tok_result.tokens, src, filename, pipeline.tokenize)
if (pipeline.fold != null) {
ast = pipeline.fold(ast)
}
return index_ast(ast, tok_result.tokens, filename)
}
return {
index_file: index_file,
index_ast: index_ast
}