From 4ff9332d38f3735b8d239450e9fd0acbc35f5838 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Mon, 9 Feb 2026 18:53:13 -0600 Subject: [PATCH 1/2] lsp --- editors/ai/pit-context.md | 251 +++++++++++ editors/vscode/language-configuration.json | 30 ++ editors/vscode/lsp/analysis.cm | 113 +++++ editors/vscode/lsp/completions.cm | 133 ++++++ editors/vscode/lsp/hover.cm | 461 ++++++++++++++++++++ editors/vscode/lsp/lsp.ce | 209 +++++++++ editors/vscode/lsp/protocol.cm | 102 +++++ editors/vscode/lsp/symbols.cm | 238 ++++++++++ editors/vscode/package.json | 62 +++ editors/vscode/src/extension.ts | 44 ++ editors/vscode/syntaxes/pit.tmLanguage.json | 160 +++++++ editors/vscode/tsconfig.json | 13 + 12 files changed, 1816 insertions(+) create mode 100644 editors/ai/pit-context.md create mode 100644 editors/vscode/language-configuration.json create mode 100644 editors/vscode/lsp/analysis.cm create mode 100644 editors/vscode/lsp/completions.cm create mode 100644 editors/vscode/lsp/hover.cm create mode 100644 editors/vscode/lsp/lsp.ce create mode 100644 editors/vscode/lsp/protocol.cm create mode 100644 editors/vscode/lsp/symbols.cm create mode 100644 editors/vscode/package.json create mode 100644 editors/vscode/src/extension.ts create mode 100644 editors/vscode/syntaxes/pit.tmLanguage.json create mode 100644 editors/vscode/tsconfig.json diff --git a/editors/ai/pit-context.md b/editors/ai/pit-context.md new file mode 100644 index 00000000..c1833b52 --- /dev/null +++ b/editors/ai/pit-context.md @@ -0,0 +1,251 @@ +# ƿit Language — AI Context + +ƿit (pronounced "pit") is a safe, actor-based programming language. Its syntax resembles JavaScript but with significant differences. Scripts use `.ce` (actors) and `.cm` (modules) file extensions. + +## Key Differences from JavaScript + +- **`var` / `def`** — `var` is mutable, `def` is constant. No `let` or `const`. +- **`==` is strict** — No `===` or `!==`. `==` and `!=` are always strict comparison. +- **No `undefined`** — Only `null`. Division by zero produces `null`, not `Infinity`. +- **No classes** — Use `meme()`, `proto()`, `isa()` for prototype chains. +- **No `for...in`, `for...of`, spread, rest, or default params.** +- **Variables declared at function body level only** — Not inside `if`/`while`/`for` blocks. +- **All variables must be initialized** — `var x` alone is an error; use `var x = null`. +- **`disrupt` / `disruption`** — No `try`/`catch`/`throw`. Error handling uses: + ```javascript + var fn = function() { + disrupt // raise an error (bare keyword, no value) + } disruption { + // handle the error + } + ``` +- **No arraybuffers** — Use `blob` (works with bits; `stone(blob)` before reading). +- **Identifiers can contain `?` and `!`** — e.g., `nil?`, `set!`, `is?valid`. +- **4-parameter limit** — Functions take at most 4 named parameters. +- **Everything lowercase** — Convention is all-lowercase identifiers with underscores. + +## Variable Declaration + +```javascript +var count = 0 // mutable +def MAX = 100 // constant (cannot be reassigned) +var x = null // must initialize (var x alone is an error) +``` + +## Functions + +```javascript +var greet = function(name) { + print(`hello ${name}`) +} + +// Arrow functions +var double = x => x * 2 +var add = (a, b) => a + b +``` + +## Push / Pop Syntax + +```javascript +var a = [1, 2] +a[] = 3 // push: a is now [1, 2, 3] +var v = a[] // pop: v is 3, a is [1, 2] +``` + +## Control Flow + +```javascript +if (x > 0) { + print("positive") +} else { + print("non-positive") +} + +while (i < 10) { + i = i + 1 +} + +for (var i = 0; i < 10; i = i + 1) { + print(i) +} + +// do-while +do { + i = i + 1 +} while (i < 10) +``` + +## Error Handling + +```javascript +var safe_divide = function(a, b) { + if (b == 0) { + disrupt + } + return a / b +} disruption { + return null +} +``` + +## Creator Functions (Polymorphic) + +These examine argument types to decide behavior: + +### array() +- `array(5)` — `[null, null, null, null, null]` +- `array(3, 0)` — `[0, 0, 0]` +- `array(5, i => i * 2)` — `[0, 2, 4, 6, 8]` +- `array([1,2])` — copy +- `array([1,2,3], x => x * 10)` — map: `[10, 20, 30]` +- `array([1,2], [3,4])` — concat: `[1, 2, 3, 4]` +- `array([1,2,3,4,5], 1, 4)` — slice: `[2, 3, 4]` +- `array({a: 1, b: 2})` — keys: `["a", "b"]` +- `array("hello")` — characters: `["h", "e", "l", "l", "o"]` +- `array("a,b,c", ",")` — split: `["a", "b", "c"]` + +### text() +- `text([1, 2, 3], ", ")` — join: `"1, 2, 3"` +- `text(255, 16)` — radix: `"ff"` +- `text("hello", 0, 3)` — substring: `"hel"` + +### number() +- `number("42")` — parse: `42` +- `number("ff", 16)` — radix: `255` +- `number(true)` — `1` + +### record() +- `record({a: 1})` — copy +- `record({a: 1}, {b: 2})` — merge: `{a: 1, b: 2}` +- `record(["x", "y"])` — from keys: `{x: true, y: true}` + +## All Intrinsic Functions + +**Constants:** `false`, `true`, `null`, `pi` + +**Type checks:** `is_array`, `is_blob`, `is_character`, `is_data`, `is_digit`, `is_false`, `is_fit`, `is_function`, `is_integer`, `is_letter`, `is_logical`, `is_lower`, `is_null`, `is_number`, `is_object`, `is_pattern`, `is_stone`, `is_text`, `is_true`, `is_upper`, `is_whitespace` + +**Creators:** `array`, `logical`, `number`, `record`, `text` + +**Math:** `abs`, `ceiling`, `floor`, `fraction`, `max`, `min`, `modulo`, `neg`, `remainder`, `round`, `sign`, `trunc`, `whole` + +**Text:** `character`, `codepoint`, `ends_with`, `extract`, `format`, `lower`, `normalize`, `replace`, `search`, `starts_with`, `trim`, `upper` + +**Array:** `every`, `filter`, `find`, `for`, `length`, `reduce`, `reverse`, `some`, `sort` + +**Objects:** `meme`, `proto`, `isa`, `stone` + +**Functions:** `apply`, `splat` + +**I/O:** `print` + +**Async:** `fallback`, `parallel`, `race`, `sequence` + +**Misc:** `logical`, `not`, `use` + +## Variable Scoping + +Variables are scoped to the function body in which they are declared. There is no block scoping. All declarations must be at the top level of a function body (not nested inside `if`/`while`/`for`). + +```javascript +var outer = function() { + var x = 10 + var inner = function() { + // x is visible here via closure + print(x) + } + inner() +} +``` + +## Modules (.cm files) + +Modules return a value (typically a record of exports). They are loaded with `use()`, cached, and frozen. + +```javascript +// math_utils.cm +var square = x => x * x +var cube = x => x * x * x +return {square: square, cube: cube} + +// main.ce +var utils = use('math_utils') +print(utils.square(5)) // 25 +``` + +## Standard Library (loaded with use()) + +- `blob` — binary data (works with bits, not bytes) +- `time` — time constants and conversions +- `math` — trig, logarithms, roots (sub-modules: `math/radians`, `math/turns`) +- `json` — JSON encoding/decoding (`json.encode`, `json.decode`) +- `random` — random number generation +- `fd` — file descriptor operations (`fd.read`, `fd.write`, `fd.slurp`, `fd.stat`) + +## Actor Model (.ce files) + +Actors are independent execution units that never share memory. They communicate via message passing. + +```javascript +// greeter.ce +$receiver(function(msg) { + $send(msg.from, {greeting: `hello ${msg.name}`}) +}) +``` + +### Actor Intrinsics ($ prefix) + +- `$me` — this actor's address +- `$send(address, message)` — send a message +- `$start(script, env)` — start a new actor +- `$stop()` — stop this actor +- `$delay(ms)` — delay processing +- `$receiver(fn)` — set message handler +- `$clock(interval, message)` — periodic self-message +- `$portal(name)` — create named portal +- `$contact(name)` — connect to portal +- `$couple(address)` — lifecycle coupling +- `$unneeded(fn)` — cleanup callback +- `$connection(address)` — establish connection +- `$time_limit(ms)` — execution time limit + +## Common Patterns + +### Iteration +```javascript +// Preferred: use for() intrinsic +for([1, 2, 3], function(item, index) { + print(`${text(index)}: ${text(item)}`) +}) + +// C-style for loop +for (var i = 0; i < length(items); i = i + 1) { + print(items[i]) +} +``` + +### String Building +```javascript +// Use backtick interpolation +var msg = `hello ${name}, you are ${text(age)} years old` + +// Join array +var csv = text(values, ",") +``` + +### Record Manipulation +```javascript +var obj = {name: "alice", age: 30} +var keys = array(obj) // ["name", "age"] +var copy = record(obj) // mutable copy +var merged = record(obj, {role: "admin"}) +``` + +### Error-Safe Operations +```javascript +var safe_parse = function(input) { + return number(input) +} disruption { + return null +} +``` diff --git a/editors/vscode/language-configuration.json b/editors/vscode/language-configuration.json new file mode 100644 index 00000000..fb984644 --- /dev/null +++ b/editors/vscode/language-configuration.json @@ -0,0 +1,30 @@ +{ + "comments": { + "lineComment": "//", + "blockComment": ["/*", "*/"] + }, + "brackets": [ + ["{", "}"], + ["[", "]"], + ["(", ")"] + ], + "autoClosingPairs": [ + { "open": "{", "close": "}" }, + { "open": "[", "close": "]" }, + { "open": "(", "close": ")" }, + { "open": "\"", "close": "\"", "notIn": ["string"] }, + { "open": "`", "close": "`", "notIn": ["string"] } + ], + "surroundingPairs": [ + ["{", "}"], + ["[", "]"], + ["(", ")"], + ["\"", "\""], + ["`", "`"] + ], + "indentationRules": { + "increaseIndentPattern": "^.*\\{[^}\"'`]*$", + "decreaseIndentPattern": "^\\s*\\}" + }, + "wordPattern": "[a-zA-Z_$][a-zA-Z0-9_$?!]*" +} diff --git a/editors/vscode/lsp/analysis.cm b/editors/vscode/lsp/analysis.cm new file mode 100644 index 00000000..9f3d7b4b --- /dev/null +++ b/editors/vscode/lsp/analysis.cm @@ -0,0 +1,113 @@ +// Document analysis module. +// Call make(tokenize_mod, parse_mod) to get an analysis object. + +var json = use('json') + +// Create an analysis module bound to the tokenize and parse functions. +var make = function(tokenize_mod, parse_mod) { + + // Tokenize and parse a document, storing the results. + var update = function(docs, uri, params) { + var src = params.src + var version = params.version + var tok_result = null + var ast = null + var errors = [] + var doc = null + + var do_tokenize = function() { + tok_result = tokenize_mod(src, uri) + } disruption { + errors = [{message: "Tokenize failed", line: 1, column: 1}] + } + var do_parse = function() { + ast = parse_mod(tok_result.tokens, src, uri, tokenize_mod) + } disruption { + // parse_mod may set errors on ast even on partial failure + } + + do_tokenize() + + if (tok_result != null) { + do_parse() + + if (ast != null && ast.errors != null) { + errors = ast.errors + } + } + + doc = { + uri: uri, + text: src, + version: version, + tokens: (tok_result != null) ? tok_result.tokens : [], + ast: ast, + errors: errors + } + docs[uri] = doc + return doc + } + + // Remove a document from the store. + var remove = function(docs, uri) { + delete docs[uri] + } + + // Convert parse errors to LSP diagnostics. + var diagnostics = function(doc) { + var result = [] + var _i = 0 + var e = null + var line = null + var col = null + while (_i < length(doc.errors)) { + e = doc.errors[_i] + line = (e.line != null) ? e.line - 1 : 0 + col = (e.column != null) ? e.column - 1 : 0 + result[] = { + range: { + start: {line: line, character: col}, + end: {line: line, character: col + 1} + }, + severity: 1, + source: "pit", + message: e.message + } + _i = _i + 1 + } + return result + } + + // Find the token at a given line/column (0-based). + var token_at = function(doc, line, col) { + var tokens = doc.tokens + var _i = 0 + var tok = null + while (_i < length(tokens)) { + tok = tokens[_i] + if (tok.from_row == line && tok.from_column <= col && tok.to_column >= col) { + return tok + } + if (tok.from_row < line && tok.to_row > line) { + return tok + } + if (tok.from_row < line && tok.to_row == line && tok.to_column >= col) { + return tok + } + if (tok.from_row == line && tok.to_row > line && tok.from_column <= col) { + return tok + } + _i = _i + 1 + } + return null + } + + return { + update: update, + remove: remove, + diagnostics: diagnostics, + token_at: token_at + } +} + +return make diff --git a/editors/vscode/lsp/completions.cm b/editors/vscode/lsp/completions.cm new file mode 100644 index 00000000..475cb73f --- /dev/null +++ b/editors/vscode/lsp/completions.cm @@ -0,0 +1,133 @@ +// Completion provider for the ƿit LSP. + +// CompletionItemKind constants (LSP spec) +def KIND_FUNCTION = 3 +def KIND_VARIABLE = 6 +def KIND_KEYWORD = 14 +def KIND_CONSTANT = 21 + +// All intrinsic function names +def intrinsic_functions = [ + "abs", "apply", "array", "ceiling", "character", "codepoint", + "ends_with", "every", "extract", "fallback", "filter", "find", + "floor", "format", "fraction", + "is_array", "is_blob", "is_character", "is_data", "is_digit", + "is_false", "is_fit", "is_function", "is_integer", "is_letter", + "is_logical", "is_lower", "is_null", "is_number", "is_object", + "is_pattern", "is_stone", "is_text", "is_true", "is_upper", + "is_whitespace", + "length", "logical", "lower", "max", "min", "modulo", + "neg", "normalize", "not", "number", + "parallel", "print", "race", "record", "reduce", "remainder", + "replace", "reverse", "round", + "search", "sequence", "sign", "some", "sort", "starts_with", + "stone", "text", "trim", "trunc", "upper", "whole", + "meme", "proto", "isa", "splat", "use" +] + +// Keywords that can be completed +def keywords = [ + "var", "def", "if", "else", "for", "while", "do", + "function", "return", "go", "break", "continue", + "disrupt", "disruption", "delete", "in", "this", + "null", "true", "false" +] + +// Actor intrinsics (only in .ce files) +def actor_intrinsics = [ + "$me", "$send", "$start", "$stop", "$delay", + "$receiver", "$clock", "$portal", "$contact", + "$couple", "$unneeded", "$connection", "$time_limit" +] + +// Walk AST scopes to find variables visible at a position. +var collect_scope_vars = function(doc, line, col) { + var vars = [] + var ast = doc.ast + var _i = 0 + var _j = 0 + var scope = null + var v = null + + if (ast == null || ast.scopes == null) { + return vars + } + + // Collect variables from all scopes (simplified: return all declared vars) + while (_i < length(ast.scopes)) { + scope = ast.scopes[_i] + if (scope.vars != null) { + _j = 0 + while (_j < length(scope.vars)) { + v = scope.vars[_j] + if (v.name != null) { + vars[] = { + label: v.name, + kind: (v.is_const == true) ? KIND_CONSTANT : KIND_VARIABLE, + detail: (v.is_const == true) ? "def" : "var" + } + } + _j = _j + 1 + } + } + _i = _i + 1 + } + + return vars +} + +// Provide completions for a document at a position. +var complete = function(doc, line, col) { + var items = [] + var _i = 0 + var is_actor = ends_with(doc.uri, ".ce") + + // Intrinsic functions + _i = 0 + while (_i < length(intrinsic_functions)) { + items[] = { + label: intrinsic_functions[_i], + kind: KIND_FUNCTION, + detail: "intrinsic" + } + _i = _i + 1 + } + + // Keywords + _i = 0 + while (_i < length(keywords)) { + items[] = { + label: keywords[_i], + kind: KIND_KEYWORD, + detail: "keyword" + } + _i = _i + 1 + } + + // Actor intrinsics (only for .ce files) + if (is_actor) { + _i = 0 + while (_i < length(actor_intrinsics)) { + items[] = { + label: actor_intrinsics[_i], + kind: KIND_FUNCTION, + detail: "actor intrinsic" + } + _i = _i + 1 + } + } + + // Variables from scope analysis + var scope_vars = collect_scope_vars(doc, line, col) + _i = 0 + while (_i < length(scope_vars)) { + items[] = scope_vars[_i] + _i = _i + 1 + } + + return items +} + +return { + complete: complete +} diff --git a/editors/vscode/lsp/hover.cm b/editors/vscode/lsp/hover.cm new file mode 100644 index 00000000..9f77472c --- /dev/null +++ b/editors/vscode/lsp/hover.cm @@ -0,0 +1,461 @@ +// Hover provider for the ƿit LSP. +// Shows documentation for intrinsic functions and variable info. + +// Intrinsic function documentation database. +// Each entry: {signature, description} +def intrinsic_docs = { + abs: { + signature: "abs(number)", + description: "Absolute value. Returns null for non-numbers." + }, + apply: { + signature: "apply(function, array)", + description: "Execute the function, passing array elements as input values." + }, + array: { + signature: "array(value, ...)", + description: "Create arrays. Polymorphic: array(number) creates sized array, array(array) copies, array(array, fn) maps, array(text) splits into characters, array(text, sep) splits by separator." + }, + ceiling: { + signature: "ceiling(number, place)", + description: "Round up. If place is 0 or null, round to smallest integer >= number." + }, + character: { + signature: "character(value)", + description: "If text, returns the first character. If a non-negative integer, returns the character from that codepoint." + }, + codepoint: { + signature: "codepoint(text)", + description: "Returns the codepoint number of the first character." + }, + ends_with: { + signature: "ends_with(text, suffix)", + description: "Returns true if the text ends with the given suffix." + }, + every: { + signature: "every(array, function)", + description: "Returns true if every element satisfies the predicate." + }, + extract: { + signature: "extract(text, pattern, from, to)", + description: "Match text to pattern. Returns a record of saved fields, or null if no match." + }, + fallback: { + signature: "fallback(requestor_array)", + description: "Returns a requestor that tries each requestor in order until one succeeds." + }, + filter: { + signature: "filter(array, function)", + description: "Returns a new array containing elements for which function returns true." + }, + find: { + signature: "find(array, function, reverse, from)", + description: "Returns the element number where function returns true, or null if not found. If second arg is not a function, compares directly." + }, + floor: { + signature: "floor(number, place)", + description: "Round down. If place is 0 or null, round to greatest integer <= number." + }, + format: { + signature: "format(text, collection, transformer)", + description: "Substitute {key} placeholders in text with values from a collection (array or record)." + }, + fraction: { + signature: "fraction(number)", + description: "Returns the fractional part of a number." + }, + is_array: { + signature: "is_array(value)", + description: "Returns true if the value is an array." + }, + is_blob: { + signature: "is_blob(value)", + description: "Returns true if the value is a blob." + }, + is_character: { + signature: "is_character(value)", + description: "Returns true if the value is a single character." + }, + is_data: { + signature: "is_data(value)", + description: "Returns true if the value is data (not a function)." + }, + is_digit: { + signature: "is_digit(value)", + description: "Returns true if the value is a digit character." + }, + is_false: { + signature: "is_false(value)", + description: "Returns true if the value is false." + }, + is_fit: { + signature: "is_fit(value)", + description: "Returns true if the value is a fit integer." + }, + is_function: { + signature: "is_function(value)", + description: "Returns true if the value is a function." + }, + is_integer: { + signature: "is_integer(value)", + description: "Returns true if the value is an integer." + }, + is_letter: { + signature: "is_letter(value)", + description: "Returns true if the value is a letter character." + }, + is_logical: { + signature: "is_logical(value)", + description: "Returns true if the value is a logical (boolean)." + }, + is_lower: { + signature: "is_lower(value)", + description: "Returns true if the value is a lowercase character." + }, + is_null: { + signature: "is_null(value)", + description: "Returns true if the value is null." + }, + is_number: { + signature: "is_number(value)", + description: "Returns true if the value is a number." + }, + is_object: { + signature: "is_object(value)", + description: "Returns true if the value is an object (record)." + }, + is_pattern: { + signature: "is_pattern(value)", + description: "Returns true if the value is a pattern (regex)." + }, + is_stone: { + signature: "is_stone(value)", + description: "Returns true if the value is frozen (stoned)." + }, + is_text: { + signature: "is_text(value)", + description: "Returns true if the value is text." + }, + is_true: { + signature: "is_true(value)", + description: "Returns true if the value is true." + }, + is_upper: { + signature: "is_upper(value)", + description: "Returns true if the value is an uppercase character." + }, + is_whitespace: { + signature: "is_whitespace(value)", + description: "Returns true if the value is a whitespace character." + }, + length: { + signature: "length(value)", + description: "Array: number of elements. Text: number of codepoints. Function: arity. Blob: number of bits. Record: record.length()." + }, + logical: { + signature: "logical(value)", + description: "Convert to logical. 0/false/null/\"false\" produce false; 1/true/\"true\" produce true." + }, + lower: { + signature: "lower(text)", + description: "Returns text with all uppercase characters converted to lowercase." + }, + max: { + signature: "max(number, number)", + description: "Returns the larger of two numbers." + }, + min: { + signature: "min(number, number)", + description: "Returns the smaller of two numbers." + }, + modulo: { + signature: "modulo(dividend, divisor)", + description: "Result has the sign of the divisor." + }, + neg: { + signature: "neg(number)", + description: "Negate. Reverse the sign of a number." + }, + normalize: { + signature: "normalize(text)", + description: "Unicode normalize." + }, + not: { + signature: "not(logical)", + description: "Returns the opposite logical. Returns null for non-logicals." + }, + number: { + signature: "number(value, radix_or_format)", + description: "Convert to number. Polymorphic: number(logical), number(text), number(text, radix), number(text, format)." + }, + parallel: { + signature: "parallel(requestor_array, throttle, need)", + description: "Start all requestors concurrently. Optional throttle limits concurrency; optional need specifies minimum successes." + }, + print: { + signature: "print(value)", + description: "Print a value to standard output." + }, + race: { + signature: "race(requestor_array, throttle, need)", + description: "Like parallel but returns as soon as needed results are obtained. Default need is 1." + }, + record: { + signature: "record(value, ...)", + description: "Create records. Polymorphic: record(record) copies, record(record, record) merges, record(array) creates from keys." + }, + reduce: { + signature: "reduce(array, function, initial, reverse)", + description: "Reduce an array to a single value by applying a function to pairs of elements." + }, + remainder: { + signature: "remainder(dividend, divisor)", + description: "For fit integers: dividend - ((dividend // divisor) * divisor)." + }, + replace: { + signature: "replace(text, target, replacement, limit)", + description: "Return text with target replaced. Target can be text or pattern. Replacement can be text or function." + }, + reverse: { + signature: "reverse(array)", + description: "Returns a new array with elements in the opposite order." + }, + round: { + signature: "round(number, place)", + description: "Round to nearest." + }, + search: { + signature: "search(text, target, from)", + description: "Search text for target. Returns character position or null." + }, + sequence: { + signature: "sequence(requestor_array)", + description: "Process requestors in order. Each result becomes input to the next." + }, + sign: { + signature: "sign(number)", + description: "Returns -1, 0, or 1." + }, + some: { + signature: "some(array, function)", + description: "Returns true if any element satisfies the predicate." + }, + sort: { + signature: "sort(array, select)", + description: "Returns a new sorted array. Sort keys must be all numbers or all texts. Ascending and stable." + }, + starts_with: { + signature: "starts_with(text, prefix)", + description: "Returns true if the text starts with the given prefix." + }, + stone: { + signature: "stone(value)", + description: "Petrify the value, making it permanently immutable. Deep freeze." + }, + text: { + signature: "text(value, ...)", + description: "Convert to text. Polymorphic: text(array, sep) joins, text(number, radix/format) formats, text(text, from, to) substrings." + }, + trim: { + signature: "trim(text, reject)", + description: "Remove characters from both ends. Default removes whitespace." + }, + trunc: { + signature: "trunc(number, place)", + description: "Truncate toward zero." + }, + upper: { + signature: "upper(text)", + description: "Returns text with all lowercase characters converted to uppercase." + }, + whole: { + signature: "whole(number)", + description: "Returns the whole part of a number." + }, + meme: { + signature: "meme()", + description: "Create a new meme (prototype chain marker)." + }, + proto: { + signature: "proto(object, meme)", + description: "Set the prototype meme of an object." + }, + isa: { + signature: "isa(object, meme)", + description: "Returns true if the object has the given meme in its prototype chain." + }, + splat: { + signature: "splat(function, array)", + description: "Call function with array elements as separate arguments." + }, + use: { + signature: "use(path)", + description: "Load a module. Returns the module's exported value. Modules are cached and frozen." + }, + pi: { + signature: "pi", + description: "An approximation of circumference / diameter: 3.1415926535897932." + } +} + +// Actor intrinsic documentation +def actor_docs = { + "$me": { + signature: "$me", + description: "The address of this actor." + }, + "$send": { + signature: "$send(address, message)", + description: "Send a message to another actor." + }, + "$start": { + signature: "$start(script, env)", + description: "Start a new actor from a script path." + }, + "$stop": { + signature: "$stop()", + description: "Stop this actor." + }, + "$delay": { + signature: "$delay(milliseconds)", + description: "Delay processing for a number of milliseconds." + }, + "$receiver": { + signature: "$receiver(function)", + description: "Set the message receiver function for this actor." + }, + "$clock": { + signature: "$clock(interval, message)", + description: "Send a message to self at regular intervals." + }, + "$portal": { + signature: "$portal(name)", + description: "Create a named portal for inter-actor communication." + }, + "$contact": { + signature: "$contact(portal_name)", + description: "Connect to a named portal." + }, + "$couple": { + signature: "$couple(address)", + description: "Couple with another actor for lifecycle management." + }, + "$unneeded": { + signature: "$unneeded(function)", + description: "Set a function to be called when this actor is no longer needed." + }, + "$connection": { + signature: "$connection(address)", + description: "Establish a connection with another actor." + }, + "$time_limit": { + signature: "$time_limit(milliseconds)", + description: "Set a time limit for this actor's execution." + } +} + +// Provide hover info for a token. +var hover = function(doc, line, col, token_at) { + var tok = token_at(doc, line, col) + var info = null + var name = null + var _i = 0 + var _j = 0 + var scope = null + var v = null + + if (tok == null) { + return null + } + + // Check intrinsic functions + if (tok.kind == "name" && tok.value != null) { + name = tok.value + info = intrinsic_docs[name] + if (info != null) { + return { + contents: { + kind: "markdown", + value: `**${info.signature}**\n\n${info.description}` + } + } + } + } + + // Check actor intrinsics ($name) + if (tok.value != null && starts_with(tok.value, "$")) { + info = actor_docs[tok.value] + if (info != null) { + return { + contents: { + kind: "markdown", + value: `**${info.signature}**\n\n${info.description}` + } + } + } + } + + // Check keywords + if (tok.kind == "var" || tok.kind == "def") { + return { + contents: { + kind: "markdown", + value: (tok.kind == "var") + ? "**var** — Declare a mutable variable." + : "**def** — Declare a constant." + } + } + } + + if (tok.kind == "disrupt") { + return { + contents: { + kind: "markdown", + value: "**disrupt** — Raise an error. Use with **disruption** block to handle errors." + } + } + } + + if (tok.kind == "disruption") { + return { + contents: { + kind: "markdown", + value: "**disruption** — Error handling block. Catches errors raised by **disrupt**." + } + } + } + + // User variable: show declaration info from scope + if (tok.kind == "name" && tok.value != null && doc.ast != null && doc.ast.scopes != null) { + _i = 0 + while (_i < length(doc.ast.scopes)) { + scope = doc.ast.scopes[_i] + if (scope.vars != null) { + _j = 0 + while (_j < length(scope.vars)) { + v = scope.vars[_j] + if (v.name == tok.value) { + return { + contents: { + kind: "markdown", + value: (v.is_const == true) + ? `**def** ${v.name}` + : `**var** ${v.name}` + } + } + } + _j = _j + 1 + } + } + _i = _i + 1 + } + } + + return null +} + +return { + hover: hover, + intrinsic_docs: intrinsic_docs, + actor_docs: actor_docs +} diff --git a/editors/vscode/lsp/lsp.ce b/editors/vscode/lsp/lsp.ce new file mode 100644 index 00000000..f0233091 --- /dev/null +++ b/editors/vscode/lsp/lsp.ce @@ -0,0 +1,209 @@ +// ƿit Language Server Protocol (LSP) main loop. +// Communicates via JSON-RPC over stdin/stdout. + +var fd = use('fd') +var json_mod = use('json') +var protocol = use('protocol') +var analysis_make = use('analysis') +var completions = use('completions') +var hover_mod = use('hover') +var symbols = use('symbols') + +// Get tokenize_mod and parse_mod from the environment. +// These are the same functions the compiler uses internally. +var tokenize_mod = use('tokenize') +var parse_mod = use('parse') + +// Create analysis module bound to tokenize/parse +var analysis = analysis_make(tokenize_mod, parse_mod) + +// Document store: URI -> {text, version, ast, tokens, errors} +var docs = {} + +// Log to stderr for debugging (does not interfere with protocol). +var log = function(msg) { + fd.write(2, `[pit-lsp] ${msg}\n`) +} + +// Publish diagnostics for a document. +var publish_diagnostics = function(uri, doc) { + var diags = analysis.diagnostics(doc) + protocol.notify("textDocument/publishDiagnostics", { + uri: uri, + diagnostics: diags + }) +} + +// Parse a document and publish diagnostics. +var parse_and_notify = function(uri, src, version) { + var doc = analysis.update(docs, uri, {src: src, version: version}) + publish_diagnostics(uri, doc) +} + +// Handle initialize request. +var handle_initialize = function(id, params) { + protocol.respond(id, { + capabilities: { + textDocumentSync: { + openClose: true, + change: 1, + save: {includeText: true} + }, + completionProvider: { + triggerCharacters: [".", "$"] + }, + hoverProvider: true, + definitionProvider: true, + documentSymbolProvider: true + }, + serverInfo: { + name: "pit-lsp", + version: "0.1.0" + } + }) +} + +// Handle textDocument/didOpen notification. +var handle_did_open = function(params) { + var td = params.textDocument + parse_and_notify(td.uri, td.text, td.version) +} + +// Handle textDocument/didChange notification (full text sync). +var handle_did_change = function(params) { + var td = params.textDocument + var changes = params.contentChanges + if (length(changes) > 0) { + parse_and_notify(td.uri, changes[0].text, td.version) + } +} + +// Handle textDocument/didClose notification. +var handle_did_close = function(params) { + var uri = params.textDocument.uri + analysis.remove(docs, uri) + // Clear diagnostics + protocol.notify("textDocument/publishDiagnostics", { + uri: uri, + diagnostics: [] + }) +} + +// Handle textDocument/didSave notification. +var handle_did_save = function(params) { + var td = params.textDocument + if (params.text != null) { + parse_and_notify(td.uri, params.text, td.version) + } +} + +// Handle textDocument/completion request. +var handle_completion = function(id, params) { + var uri = params.textDocument.uri + var pos = params.position + var doc = docs[uri] + var items = [] + if (doc != null) { + items = completions.complete(doc, pos.line, pos.character) + } + protocol.respond(id, items) +} + +// Handle textDocument/hover request. +var handle_hover = function(id, params) { + var uri = params.textDocument.uri + var pos = params.position + var doc = docs[uri] + var result = null + if (doc != null) { + result = hover_mod.hover(doc, pos.line, pos.character, analysis.token_at) + } + protocol.respond(id, result) +} + +// Handle textDocument/definition request. +var handle_definition = function(id, params) { + var uri = params.textDocument.uri + var pos = params.position + var doc = docs[uri] + var result = null + if (doc != null) { + result = symbols.definition(doc, pos.line, pos.character, analysis.token_at) + } + protocol.respond(id, result) +} + +// Handle textDocument/documentSymbol request. +var handle_document_symbol = function(id, params) { + var uri = params.textDocument.uri + var doc = docs[uri] + var result = [] + if (doc != null) { + result = symbols.document_symbols(doc) + } + protocol.respond(id, result) +} + +// Dispatch a single message. Wrapped in a function for disruption handling. +var dispatch_message = function(msg) { + var method = msg.method + if (method == "initialize") { + handle_initialize(msg.id, msg.params) + } else if (method == "initialized") { + // no-op + } else if (method == "textDocument/didOpen") { + handle_did_open(msg.params) + } else if (method == "textDocument/didChange") { + handle_did_change(msg.params) + } else if (method == "textDocument/didClose") { + handle_did_close(msg.params) + } else if (method == "textDocument/didSave") { + handle_did_save(msg.params) + } else if (method == "textDocument/completion") { + handle_completion(msg.id, msg.params) + } else if (method == "textDocument/hover") { + handle_hover(msg.id, msg.params) + } else if (method == "textDocument/definition") { + handle_definition(msg.id, msg.params) + } else if (method == "textDocument/documentSymbol") { + handle_document_symbol(msg.id, msg.params) + } else if (method == "shutdown") { + protocol.respond(msg.id, null) + return "shutdown" + } else if (method == "exit") { + return "exit" + } else { + if (msg.id != null) { + protocol.respond_error(msg.id, -32601, `Method not found: ${method}`) + } + } + return null +} disruption { + log(`error handling ${msg.method}`) + if (msg.id != null) { + protocol.respond_error(msg.id, -32603, `Internal error handling ${msg.method}`) + } + return null +} + +// Main loop. +log("starting") + +var running = true +var msg = null +var result = null + +while (running) { + msg = protocol.read_message() + if (msg == null) { + running = false + break + } + + result = dispatch_message(msg) + if (result == "exit") { + running = false + } +} + +log("stopped") diff --git a/editors/vscode/lsp/protocol.cm b/editors/vscode/lsp/protocol.cm new file mode 100644 index 00000000..1206afde --- /dev/null +++ b/editors/vscode/lsp/protocol.cm @@ -0,0 +1,102 @@ +// JSON-RPC protocol helpers for LSP communication over stdin/stdout. +// Reads Content-Length framed messages from stdin, writes to stdout. + +var fd = use('fd') +var json = use('json') + +// Read a single JSON-RPC message from stdin. +// Protocol: "Content-Length: N\r\n\r\n" followed by N bytes of JSON. +var read_message = function() { + var header = "" + var ch = null + var content_length = null + var body = null + var total = 0 + var chunk = null + + // Read header byte by byte until we hit \r\n\r\n + while (true) { + ch = fd.read(0, 1) + if (ch == null) { + return null + } + header = header + text(ch) + if (ends_with(header, "\r\n\r\n")) { + break + } + } + + // Parse Content-Length from header + var lines = array(header, "\r\n") + var _i = 0 + while (_i < length(lines)) { + if (starts_with(lines[_i], "Content-Length:")) { + content_length = number(trim(text(lines[_i], 16))) + } + _i = _i + 1 + } + + if (content_length == null) { + return null + } + + // Read exactly content_length bytes + body = "" + total = 0 + while (total < content_length) { + chunk = fd.read(0, content_length - total) + if (chunk == null) { + return null + } + chunk = text(chunk) + body = body + chunk + total = total + length(chunk) + } + + return json.decode(body) +} + +// Send a JSON-RPC message to stdout. +var send_message = function(msg) { + var body = json.encode(msg) + var header = `Content-Length: ${text(length(body))}\r\n\r\n` + fd.write(1, header + body) +} + +// Send a JSON-RPC response for a request. +var respond = function(id, result) { + send_message({ + jsonrpc: "2.0", + id: id, + result: result + }) +} + +// Send a JSON-RPC error response. +var respond_error = function(id, code, message) { + send_message({ + jsonrpc: "2.0", + id: id, + error: { + code: code, + message: message + } + }) +} + +// Send a JSON-RPC notification (no id). +var notify = function(method, params) { + send_message({ + jsonrpc: "2.0", + method: method, + params: params + }) +} + +return { + read_message: read_message, + send_message: send_message, + respond: respond, + respond_error: respond_error, + notify: notify +} diff --git a/editors/vscode/lsp/symbols.cm b/editors/vscode/lsp/symbols.cm new file mode 100644 index 00000000..86b95044 --- /dev/null +++ b/editors/vscode/lsp/symbols.cm @@ -0,0 +1,238 @@ +// Document symbols and go-to-definition provider for the ƿit LSP. + +// SymbolKind constants (LSP spec) +def KIND_FUNCTION = 12 +def KIND_VARIABLE = 13 +def KIND_CONSTANT = 14 + +// Walk AST to extract document symbols (top-level vars/defs and functions). +var document_symbols = function(doc) { + var symbols = [] + var ast = doc.ast + var _i = 0 + var _j = 0 + var stmt = null + var decl = null + var name = null + var kind = null + var range = null + + if (ast == null || ast.statements == null) { + return symbols + } + + while (_i < length(ast.statements)) { + stmt = ast.statements[_i] + + if (stmt.kind == "var" || stmt.kind == "def") { + name = null + kind = KIND_VARIABLE + + if (stmt.left != null && stmt.left.name != null) { + name = stmt.left.name + } + + if (stmt.kind == "def") { + kind = KIND_CONSTANT + } + + if (stmt.right != null && (stmt.right.kind == "function" || stmt.right.kind == "arrow function")) { + kind = KIND_FUNCTION + } + + if (name != null) { + range = { + start: {line: stmt.from_row, character: stmt.from_column}, + end: {line: stmt.to_row, character: stmt.to_column} + } + symbols[] = { + name: name, + kind: kind, + range: range, + selectionRange: { + start: {line: stmt.left.from_row, character: stmt.left.from_column}, + end: {line: stmt.left.to_row, character: stmt.left.to_column} + } + } + } + } + + if (stmt.kind == "var_list" && stmt.list != null) { + _j = 0 + while (_j < length(stmt.list)) { + decl = stmt.list[_j] + if (decl.left != null && decl.left.name != null) { + kind = (decl.kind == "def") ? KIND_CONSTANT : KIND_VARIABLE + if (decl.right != null && (decl.right.kind == "function" || decl.right.kind == "arrow function")) { + kind = KIND_FUNCTION + } + range = { + start: {line: decl.from_row, character: decl.from_column}, + end: {line: decl.to_row, character: decl.to_column} + } + symbols[] = { + name: decl.left.name, + kind: kind, + range: range, + selectionRange: { + start: {line: decl.left.from_row, character: decl.left.from_column}, + end: {line: decl.left.to_row, character: decl.left.to_column} + } + } + } + _j = _j + 1 + } + } + + _i = _i + 1 + } + + return symbols +} + +// Find the declaration location of a name at a given position. +var definition = function(doc, line, col, token_at) { + var tok = token_at(doc, line, col) + var ast = doc.ast + var name = null + var _i = 0 + var _j = 0 + var scope = null + var v = null + var decl = null + + if (tok == null || tok.kind != "name" || tok.value == null) { + return null + } + + name = tok.value + + if (ast == null) { + return null + } + + // Search through scopes for the variable declaration + if (ast.scopes != null) { + _i = 0 + while (_i < length(ast.scopes)) { + scope = ast.scopes[_i] + if (scope.vars != null) { + _j = 0 + while (_j < length(scope.vars)) { + v = scope.vars[_j] + if (v.name == name) { + decl = find_declaration(ast.statements, name) + if (decl != null) { + return { + uri: doc.uri, + range: { + start: {line: decl.from_row, character: decl.from_column}, + end: {line: decl.to_row, character: decl.to_column} + } + } + } + } + _j = _j + 1 + } + } + _i = _i + 1 + } + } + + // Fallback: walk statements for var/def with this name + decl = find_declaration(ast.statements, name) + if (decl != null) { + return { + uri: doc.uri, + range: { + start: {line: decl.from_row, character: decl.from_column}, + end: {line: decl.to_row, character: decl.to_column} + } + } + } + + return null +} + +// Recursively search statements for a var/def declaration of a given name. +var find_declaration = function(statements, name) { + var _i = 0 + var _j = 0 + var stmt = null + var result = null + + if (statements == null) { + return null + } + + while (_i < length(statements)) { + stmt = statements[_i] + + // Direct var/def + if ((stmt.kind == "var" || stmt.kind == "def") + && stmt.left != null && stmt.left.name == name) { + return stmt + } + + // var_list + if (stmt.kind == "var_list" && stmt.list != null) { + _j = 0 + while (_j < length(stmt.list)) { + if (stmt.list[_j].left != null && stmt.list[_j].left.name == name) { + return stmt.list[_j] + } + _j = _j + 1 + } + } + + // Recurse into blocks + if (stmt.statements != null) { + result = find_declaration(stmt.statements, name) + if (result != null) { + return result + } + } + + // if/else + if (stmt.kind == "if") { + if (stmt.then != null && stmt.then.statements != null) { + result = find_declaration(stmt.then.statements, name) + if (result != null) { + return result + } + } + if (stmt.else != null && stmt.else.statements != null) { + result = find_declaration(stmt.else.statements, name) + if (result != null) { + return result + } + } + } + + // Function body + if ((stmt.kind == "function" || stmt.kind == "arrow function") && stmt.statements != null) { + result = find_declaration(stmt.statements, name) + if (result != null) { + return result + } + } + + // var/def with function right side + if ((stmt.kind == "var" || stmt.kind == "def") && stmt.right != null) { + if ((stmt.right.kind == "function" || stmt.right.kind == "arrow function") && stmt.right.statements != null) { + result = find_declaration(stmt.right.statements, name) + if (result != null) { + return result + } + } + } + + _i = _i + 1 + } + return null +} + +return { + document_symbols: document_symbols, + definition: definition +} diff --git a/editors/vscode/package.json b/editors/vscode/package.json new file mode 100644 index 00000000..4b3e875b --- /dev/null +++ b/editors/vscode/package.json @@ -0,0 +1,62 @@ +{ + "name": "pit-language", + "displayName": "ƿit Language", + "description": "Language support for ƿit (.ce/.cm) — syntax highlighting, diagnostics, completions, hover, and go-to-definition", + "version": "0.1.0", + "publisher": "pit-lang", + "engines": { + "vscode": "^1.75.0" + }, + "categories": [ + "Programming Languages" + ], + "activationEvents": [ + "onLanguage:pit" + ], + "main": "./out/extension.js", + "contributes": { + "languages": [ + { + "id": "pit", + "aliases": [ + "ƿit", + "pit" + ], + "extensions": [ + ".ce", + ".cm" + ], + "configuration": "./language-configuration.json" + } + ], + "grammars": [ + { + "language": "pit", + "scopeName": "source.pit", + "path": "./syntaxes/pit.tmLanguage.json" + } + ], + "configuration": { + "title": "ƿit", + "properties": { + "pit.cellPath": { + "type": "string", + "default": "cell", + "description": "Path to the cell executable" + } + } + } + }, + "scripts": { + "compile": "tsc -p ./", + "watch": "tsc -watch -p ./" + }, + "dependencies": { + "vscode-languageclient": "^9.0.0", + "vscode-languageserver-protocol": "^3.17.0" + }, + "devDependencies": { + "@types/vscode": "^1.75.0", + "typescript": "^5.0.0" + } +} diff --git a/editors/vscode/src/extension.ts b/editors/vscode/src/extension.ts new file mode 100644 index 00000000..6a1fa2c2 --- /dev/null +++ b/editors/vscode/src/extension.ts @@ -0,0 +1,44 @@ +import * as path from "path"; +import { workspace, ExtensionContext } from "vscode"; +import { + LanguageClient, + LanguageClientOptions, + ServerOptions, +} from "vscode-languageclient/node"; + +let client: LanguageClient; + +export function activate(context: ExtensionContext) { + const config = workspace.getConfiguration("pit"); + const cellPath = config.get("cellPath", "cell"); + const lspDir = path.join(context.extensionPath, "lsp"); + + const serverOptions: ServerOptions = { + command: cellPath, + args: ["lsp/lsp"], + options: { cwd: lspDir }, + }; + + const clientOptions: LanguageClientOptions = { + documentSelector: [{ scheme: "file", language: "pit" }], + synchronize: { + fileEvents: workspace.createFileSystemWatcher("**/*.{ce,cm}"), + }, + }; + + client = new LanguageClient( + "pitLanguageServer", + "ƿit Language Server", + serverOptions, + clientOptions + ); + + client.start(); +} + +export function deactivate(): Thenable | undefined { + if (!client) { + return undefined; + } + return client.stop(); +} diff --git a/editors/vscode/syntaxes/pit.tmLanguage.json b/editors/vscode/syntaxes/pit.tmLanguage.json new file mode 100644 index 00000000..205d14a6 --- /dev/null +++ b/editors/vscode/syntaxes/pit.tmLanguage.json @@ -0,0 +1,160 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "pit", + "scopeName": "source.pit", + "patterns": [ + { "include": "#comment-line" }, + { "include": "#comment-block" }, + { "include": "#string-template" }, + { "include": "#string-double" }, + { "include": "#regexp" }, + { "include": "#keyword-control" }, + { "include": "#keyword-error" }, + { "include": "#storage-type" }, + { "include": "#constant-language" }, + { "include": "#variable-language" }, + { "include": "#actor-intrinsic" }, + { "include": "#keyword-operator" }, + { "include": "#arrow-function" }, + { "include": "#support-function" }, + { "include": "#constant-numeric-hex" }, + { "include": "#constant-numeric-binary" }, + { "include": "#constant-numeric-octal" }, + { "include": "#constant-numeric" }, + { "include": "#punctuation" } + ], + "repository": { + "comment-line": { + "name": "comment.line.double-slash.pit", + "match": "//.*$" + }, + "comment-block": { + "name": "comment.block.pit", + "begin": "/\\*", + "end": "\\*/", + "beginCaptures": { "0": { "name": "punctuation.definition.comment.begin.pit" } }, + "endCaptures": { "0": { "name": "punctuation.definition.comment.end.pit" } } + }, + "string-double": { + "name": "string.quoted.double.pit", + "begin": "\"", + "end": "\"", + "beginCaptures": { "0": { "name": "punctuation.definition.string.begin.pit" } }, + "endCaptures": { "0": { "name": "punctuation.definition.string.end.pit" } }, + "patterns": [ + { + "name": "constant.character.escape.pit", + "match": "\\\\(?:[\"\\\\bfnrt/]|u[0-9a-fA-F]{4})" + } + ] + }, + "string-template": { + "name": "string.template.pit", + "begin": "`", + "end": "`", + "beginCaptures": { "0": { "name": "punctuation.definition.string.template.begin.pit" } }, + "endCaptures": { "0": { "name": "punctuation.definition.string.template.end.pit" } }, + "patterns": [ + { + "name": "constant.character.escape.pit", + "match": "\\\\(?:[`\\\\bfnrt/$]|u[0-9a-fA-F]{4})" + }, + { + "name": "meta.template.expression.pit", + "begin": "\\$\\{", + "end": "\\}", + "beginCaptures": { "0": { "name": "punctuation.definition.template-expression.begin.pit" } }, + "endCaptures": { "0": { "name": "punctuation.definition.template-expression.end.pit" } }, + "patterns": [ + { "include": "source.pit" } + ] + } + ] + }, + "regexp": { + "name": "string.regexp.pit", + "begin": "(?<=[=(:,;!&|?~^>]|^|return|disrupt)\\s*(/(?![/*]))", + "end": "/([gimsuvy]*)", + "beginCaptures": { "1": { "name": "punctuation.definition.string.begin.pit" } }, + "endCaptures": { "1": { "name": "keyword.other.pit" } }, + "patterns": [ + { + "name": "constant.character.escape.pit", + "match": "\\\\." + } + ] + }, + "keyword-control": { + "name": "keyword.control.pit", + "match": "\\b(if|else|for|while|do|break|continue|return|go)\\b" + }, + "keyword-error": { + "name": "keyword.control.error.pit", + "match": "\\b(disrupt|disruption)\\b" + }, + "storage-type": { + "patterns": [ + { + "name": "storage.type.pit", + "match": "\\b(var|def)\\b" + }, + { + "name": "storage.type.function.pit", + "match": "\\bfunction\\b" + } + ] + }, + "constant-language": { + "name": "constant.language.pit", + "match": "\\b(null|true|false)\\b" + }, + "variable-language": { + "name": "variable.language.this.pit", + "match": "\\bthis\\b" + }, + "actor-intrinsic": { + "name": "variable.language.actor.pit", + "match": "\\$[a-zA-Z_][a-zA-Z0-9_]*" + }, + "keyword-operator": { + "name": "keyword.operator.pit", + "match": "\\b(delete|in|typeof)\\b" + }, + "arrow-function": { + "name": "storage.type.function.arrow.pit", + "match": "=>" + }, + "support-function": { + "name": "support.function.pit", + "match": "\\b(abs|apply|array|ceiling|character|codepoint|ends_with|every|extract|fallback|filter|find|floor|for|format|fraction|is_array|is_blob|is_character|is_data|is_digit|is_false|is_fit|is_function|is_integer|is_letter|is_logical|is_lower|is_null|is_number|is_object|is_pattern|is_stone|is_text|is_true|is_upper|is_whitespace|length|logical|lower|max|min|modulo|neg|normalize|not|number|parallel|print|race|record|reduce|remainder|replace|reverse|round|search|sequence|sign|some|sort|starts_with|stone|text|trim|trunc|upper|whole|meme|proto|isa|splat|use)(?=\\s*\\()" + }, + "constant-numeric-hex": { + "name": "constant.numeric.hex.pit", + "match": "\\b0[xX][0-9a-fA-F]+\\b" + }, + "constant-numeric-binary": { + "name": "constant.numeric.binary.pit", + "match": "\\b0[bB][01]+\\b" + }, + "constant-numeric-octal": { + "name": "constant.numeric.octal.pit", + "match": "\\b0[oO][0-7]+\\b" + }, + "constant-numeric": { + "name": "constant.numeric.pit", + "match": "\\b[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)?\\b" + }, + "punctuation": { + "patterns": [ + { + "name": "punctuation.separator.comma.pit", + "match": "," + }, + { + "name": "punctuation.terminator.statement.pit", + "match": ";" + } + ] + } + } +} diff --git a/editors/vscode/tsconfig.json b/editors/vscode/tsconfig.json new file mode 100644 index 00000000..6dc4da45 --- /dev/null +++ b/editors/vscode/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "ES2020", + "outDir": "out", + "lib": ["ES2020"], + "sourceMap": true, + "rootDir": "src", + "strict": true + }, + "include": ["src"], + "exclude": ["node_modules", "out"] +} From 4b76728230aa6291266d9063d6d4a3e044f3e11b Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Mon, 9 Feb 2026 20:04:40 -0600 Subject: [PATCH 2/2] ast folding --- fold.ce | 13 + fold.cm | 968 ++++++++++++++++++++++++++++++++++++++++++ internal/bootstrap.cm | 3 + parse.cm | 77 +--- 4 files changed, 1002 insertions(+), 59 deletions(-) create mode 100644 fold.ce create mode 100644 fold.cm diff --git a/fold.ce b/fold.ce new file mode 100644 index 00000000..0881f8b8 --- /dev/null +++ b/fold.ce @@ -0,0 +1,13 @@ +var fd = use("fd") +var json = use("json") + +var filename = args[0] +var src = text(fd.slurp(filename)) +var tokenize = use("tokenize") +var parse = use("parse") +var fold = use("fold") + +var tok_result = tokenize(src, filename) +var ast = parse(tok_result.tokens, src, filename, tokenize) +var folded = fold(ast) +print(json.encode(folded)) diff --git a/fold.cm b/fold.cm new file mode 100644 index 00000000..bc3d114e --- /dev/null +++ b/fold.cm @@ -0,0 +1,968 @@ +// fold.cm — AST optimization pass +// Constant folding, constant propagation, dead code elimination + +var fold = function(ast) { + var scopes = ast.scopes + var nr_scopes = length(scopes) + + // ============================================================ + // Helpers + // ============================================================ + + var is_literal = function(expr) { + if (expr == null) return false + var k = expr.kind + return k == "number" || k == "text" || k == "true" || k == "false" || k == "null" + } + + var is_pure = function(expr) { + if (expr == null) return true + var k = expr.kind + var i = 0 + if (k == "number" || k == "text" || k == "true" || k == "false" || + k == "null" || k == "name" || k == "this") return true + if (k == "function") return true + if (k == "!" || k == "~" || k == "-unary" || k == "+unary") { + return is_pure(expr.expression) + } + if (k == "array") { + i = 0 + while (i < length(expr.list)) { + if (!is_pure(expr.list[i])) return false + i = i + 1 + } + return true + } + if (k == "record") { + i = 0 + while (i < length(expr.list)) { + if (!is_pure(expr.list[i].right)) return false + i = i + 1 + } + return true + } + if (k == "then") { + return is_pure(expr.expression) && is_pure(expr.then) && is_pure(expr.else) + } + if (k == "==" || k == "!=" || k == "&&" || k == "||") { + return is_pure(expr.left) && is_pure(expr.right) + } + return false + } + + var copy_loc = function(from, to) { + to.at = from.at + to.from_row = from.from_row + to.from_column = from.from_column + to.to_row = from.to_row + to.to_column = from.to_column + return to + } + + var make_number = function(val, src) { + return copy_loc(src, {kind: "number", value: text(val), number: val}) + } + + var make_text = function(val, src) { + return copy_loc(src, {kind: "text", value: val}) + } + + var make_bool = function(val, src) { + if (val) return copy_loc(src, {kind: "true"}) + return copy_loc(src, {kind: "false"}) + } + + var make_null = function(src) { + return copy_loc(src, {kind: "null"}) + } + + var is_truthy_literal = function(expr) { + if (expr == null) return null + var k = expr.kind + var nv = null + if (k == "true") return true + if (k == "false" || k == "null") return false + if (k == "number") { + nv = expr.number + if (nv == null) nv = number(expr.value) + return nv != 0 + } + if (k == "text") return length(expr.value) > 0 + return null + } + + // ============================================================ + // Scope helpers + // ============================================================ + + var find_scope = function(fn_nr) { + var i = 0 + while (i < nr_scopes) { + if (scopes[i].function_nr == fn_nr) return scopes[i] + i = i + 1 + } + return null + } + + var scope_var = function(fn_nr, name) { + var sc = find_scope(fn_nr) + if (sc == null) return null + return sc[name] + } + + var remove_scope_var = function(fn_nr, name) { + var sc = find_scope(fn_nr) + if (sc == null) return null + delete sc[name] + } + + // ============================================================ + // Pass 1: pre-scan for constants and function arities + // ============================================================ + + var const_defs = {} + var fn_arities = {} + + var register_const = function(fn_nr, name, lit_node) { + var key = text(fn_nr) + if (const_defs[key] == null) const_defs[key] = {} + const_defs[key][name] = lit_node + } + + var get_const = function(fn_nr, name) { + var key = text(fn_nr) + if (const_defs[key] == null) return null + return const_defs[key][name] + } + + var register_arity = function(fn_nr, name, count) { + var key = text(fn_nr) + if (fn_arities[key] == null) fn_arities[key] = {} + fn_arities[key][name] = count + } + + var pre_scan_stmts = null + var pre_scan_fn = null + + pre_scan_fn = function(node) { + if (node == null) return null + if (node.statements != null) pre_scan_stmts(node.statements, node.function_nr) + if (node.disruption != null) pre_scan_stmts(node.disruption, node.function_nr) + } + + pre_scan_stmts = function(stmts, fn_nr) { + var i = 0 + var j = 0 + var stmt = null + var kind = null + var name = null + var sv = null + var item = null + while (i < length(stmts)) { + stmt = stmts[i] + kind = stmt.kind + if (kind == "def") { + name = stmt.left.name + if (name != null && is_literal(stmt.right)) { + sv = scope_var(fn_nr, name) + if (sv != null && !sv.closure) { + register_const(fn_nr, name, stmt.right) + } + } + } else if (kind == "function") { + name = stmt.name + if (name != null && stmt.arity != null) { + register_arity(fn_nr, name, stmt.arity) + } + pre_scan_fn(stmt) + } else if (kind == "var") { + if (stmt.right != null && stmt.right.kind == "function" && stmt.right.arity != null) { + name = stmt.left.name + if (name != null) { + sv = scope_var(fn_nr, name) + if (sv != null && sv.make == "var") { + register_arity(fn_nr, name, stmt.right.arity) + } + } + } + } else if (kind == "var_list") { + j = 0 + while (j < length(stmt.list)) { + item = stmt.list[j] + if (item.kind == "var" && item.right != null && item.right.kind == "function" && item.right.arity != null) { + name = item.left.name + if (name != null) { + sv = scope_var(fn_nr, name) + if (sv != null && sv.make == "var") { + register_arity(fn_nr, name, item.right.arity) + } + } + } + j = j + 1 + } + } + i = i + 1 + } + } + + var pre_scan_expr_fns = null + pre_scan_expr_fns = function(expr) { + if (expr == null) return null + var k = expr.kind + var i = 0 + if (k == "function") { + pre_scan_fn(expr) + } + if (expr.left != null) pre_scan_expr_fns(expr.left) + if (expr.right != null) pre_scan_expr_fns(expr.right) + if (expr.expression != null) pre_scan_expr_fns(expr.expression) + if (expr.then != null) pre_scan_expr_fns(expr.then) + if (expr.else != null) pre_scan_expr_fns(expr.else) + if (k == "(" || k == "array") { + i = 0 + while (i < length(expr.list)) { + pre_scan_expr_fns(expr.list[i]) + i = i + 1 + } + } + if (k == "record") { + i = 0 + while (i < length(expr.list)) { + pre_scan_expr_fns(expr.list[i].right) + i = i + 1 + } + } + } + + var pre_scan_stmt_exprs = null + pre_scan_stmt_exprs = function(stmts, fn_nr) { + var i = 0 + var j = 0 + var stmt = null + var kind = null + while (i < length(stmts)) { + stmt = stmts[i] + kind = stmt.kind + if (kind == "var" || kind == "def") { + pre_scan_expr_fns(stmt.right) + } else if (kind == "var_list") { + j = 0 + while (j < length(stmt.list)) { + pre_scan_expr_fns(stmt.list[j].right) + j = j + 1 + } + } else if (kind == "call") { + pre_scan_expr_fns(stmt.expression) + } else if (kind == "if") { + pre_scan_expr_fns(stmt.expression) + pre_scan_stmt_exprs(stmt.then, fn_nr) + pre_scan_stmt_exprs(stmt.list, fn_nr) + if (stmt.else != null) pre_scan_stmt_exprs(stmt.else, fn_nr) + } else if (kind == "while" || kind == "do") { + pre_scan_expr_fns(stmt.expression) + pre_scan_stmt_exprs(stmt.statements, fn_nr) + } else if (kind == "for") { + if (stmt.init != null) { + if (stmt.init.kind == "var" || stmt.init.kind == "def") { + pre_scan_expr_fns(stmt.init.right) + } else { + pre_scan_expr_fns(stmt.init) + } + } + pre_scan_expr_fns(stmt.test) + pre_scan_expr_fns(stmt.update) + pre_scan_stmt_exprs(stmt.statements, fn_nr) + } else if (kind == "return" || kind == "go") { + pre_scan_expr_fns(stmt.expression) + } else if (kind == "block") { + pre_scan_stmt_exprs(stmt.statements, fn_nr) + } else if (kind == "label") { + if (stmt.statement != null) { + pre_scan_stmt_exprs([stmt.statement], fn_nr) + } + } else if (kind == "function") { + // already handled in pre_scan_stmts + null + } + i = i + 1 + } + } + + var pre_scan = function() { + pre_scan_stmts(ast.statements, 0) + pre_scan_stmts(ast.functions, 0) + pre_scan_stmt_exprs(ast.statements, 0) + pre_scan_stmt_exprs(ast.functions, 0) + } + + // ============================================================ + // Pass 2: fold expressions and statements + // ============================================================ + + var fold_expr = null + var fold_stmt = null + var fold_stmts = null + + fold_expr = function(expr, fn_nr) { + if (expr == null) return null + var k = expr.kind + var left = null + var right = null + var lv = null + var rv = null + var result = null + var i = 0 + var sv = null + var lit = null + var cond_k = null + var ek = null + var target = null + var ar = null + var akey = null + var tv = null + + // Recurse into children first (bottom-up) + if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" || + k == "**" || k == "==" || k == "!=" || k == "<" || k == ">" || + k == "<=" || k == ">=" || k == "&" || k == "|" || k == "^" || + k == "<<" || k == ">>" || k == ">>>" || k == "&&" || k == "||" || + k == "," || k == "in") { + expr.left = fold_expr(expr.left, fn_nr) + expr.right = fold_expr(expr.right, fn_nr) + } else if (k == "." || k == "[") { + expr.left = fold_expr(expr.left, fn_nr) + if (k == "[" && expr.right != null) expr.right = fold_expr(expr.right, fn_nr) + } else if (k == "!" || k == "~" || k == "-unary" || k == "+unary" || k == "delete") { + expr.expression = fold_expr(expr.expression, fn_nr) + } else if (k == "++" || k == "--") { + return expr + } else if (k == "then") { + expr.expression = fold_expr(expr.expression, fn_nr) + expr.then = fold_expr(expr.then, fn_nr) + expr.else = fold_expr(expr.else, fn_nr) + } else if (k == "(") { + expr.expression = fold_expr(expr.expression, fn_nr) + i = 0 + while (i < length(expr.list)) { + expr.list[i] = fold_expr(expr.list[i], fn_nr) + i = i + 1 + } + } else if (k == "array") { + i = 0 + while (i < length(expr.list)) { + expr.list[i] = fold_expr(expr.list[i], fn_nr) + i = i + 1 + } + } else if (k == "record") { + i = 0 + while (i < length(expr.list)) { + expr.list[i].right = fold_expr(expr.list[i].right, fn_nr) + i = i + 1 + } + } else if (k == "text literal") { + i = 0 + while (i < length(expr.list)) { + expr.list[i] = fold_expr(expr.list[i], fn_nr) + i = i + 1 + } + } else if (k == "function") { + fold_fn(expr) + return expr + } else if (k == "assign" || k == "+=" || k == "-=" || k == "*=" || + k == "/=" || k == "%=" || k == "<<=" || k == ">>=" || + k == ">>>=" || k == "&=" || k == "^=" || k == "|=" || + k == "**=" || k == "&&=" || k == "||=") { + expr.right = fold_expr(expr.right, fn_nr) + return expr + } + + // Constant propagation: name → literal + if (k == "name" && expr.level == 0) { + lit = get_const(fn_nr, expr.name) + if (lit != null) { + sv = scope_var(fn_nr, expr.name) + if (sv != null && !sv.closure) { + return copy_loc(expr, {kind: lit.kind, value: lit.value, number: lit.number}) + } + } + return expr + } + + // Binary constant folding + if (k == "+" || k == "-" || k == "*" || k == "/" || k == "%" || k == "**") { + left = expr.left + right = expr.right + if (left != null && right != null && left.kind == "number" && right.kind == "number") { + lv = left.number + rv = right.number + if (lv == null) lv = number(left.value) + if (rv == null) rv = number(right.value) + if (k == "/") { + if (rv == 0) return make_null(expr) + } + if (k == "%") { + if (rv == 0) return make_null(expr) + } + result = null + if (k == "+") result = lv + rv + else if (k == "-") result = lv - rv + else if (k == "*") result = lv * rv + else if (k == "/") result = lv / rv + else if (k == "%") result = lv % rv + else if (k == "**") result = lv ** rv + if (result == null) return make_null(expr) + return make_number(result, expr) + } + // text + text + if (k == "+" && left != null && right != null && left.kind == "text" && right.kind == "text") { + return make_text(left.value + right.value, expr) + } + return expr + } + + // Comparison folding + if (k == "==" || k == "!=" || k == "<" || k == ">" || k == "<=" || k == ">=") { + left = expr.left + right = expr.right + if (left != null && right != null) { + if (left.kind == "number" && right.kind == "number") { + lv = left.number + rv = right.number + if (lv == null) lv = number(left.value) + if (rv == null) rv = number(right.value) + if (k == "==") return make_bool(lv == rv, expr) + if (k == "!=") return make_bool(lv != rv, expr) + if (k == "<") return make_bool(lv < rv, expr) + if (k == ">") return make_bool(lv > rv, expr) + if (k == "<=") return make_bool(lv <= rv, expr) + if (k == ">=") return make_bool(lv >= rv, expr) + } + if (left.kind == "text" && right.kind == "text") { + if (k == "==") return make_bool(left.value == right.value, expr) + if (k == "!=") return make_bool(left.value != right.value, expr) + } + } + return expr + } + + // Bitwise folding + if (k == "&" || k == "|" || k == "^" || k == "<<" || k == ">>") { + left = expr.left + right = expr.right + if (left != null && right != null && left.kind == "number" && right.kind == "number") { + lv = left.number + rv = right.number + if (lv == null) lv = number(left.value) + if (rv == null) rv = number(right.value) + if (k == "&") return make_number(lv & rv, expr) + if (k == "|") return make_number(lv | rv, expr) + if (k == "^") return make_number(lv ^ rv, expr) + if (k == "<<") return make_number(lv << rv, expr) + if (k == ">>") return make_number(lv >> rv, expr) + } + return expr + } + + // Unary folding + if (k == "!") { + if (expr.expression != null) { + ek = expr.expression.kind + if (ek == "true") return make_bool(false, expr) + if (ek == "false") return make_bool(true, expr) + } + return expr + } + if (k == "~") { + if (expr.expression != null && expr.expression.kind == "number") { + lv = expr.expression.number + if (lv == null) lv = number(expr.expression.value) + return make_number(~lv, expr) + } + return expr + } + if (k == "-unary") { + if (expr.expression != null && expr.expression.kind == "number") { + lv = expr.expression.number + if (lv == null) lv = number(expr.expression.value) + return make_number(0 - lv, expr) + } + return expr + } + + // Ternary with literal condition + if (k == "then") { + tv = is_truthy_literal(expr.expression) + if (tv == true) return expr.then + if (tv == false) return expr.else + return expr + } + + // Call: stamp arity + if (k == "(") { + target = expr.expression + if (target != null && target.kind == "name" && target.level == 0) { + ar = null + akey = text(fn_nr) + if (fn_arities[akey] != null) ar = fn_arities[akey][target.name] + if (ar != null) expr.arity = ar + } + return expr + } + + return expr + } + + var fold_fn = null + + fold_stmt = function(stmt, fn_nr) { + if (stmt == null) return null + var k = stmt.kind + var i = 0 + var sv = null + var cond_k = null + var ik = null + var tv = null + + if (k == "var" || k == "def") { + stmt.right = fold_expr(stmt.right, fn_nr) + return stmt + } + if (k == "var_list") { + i = 0 + while (i < length(stmt.list)) { + stmt.list[i] = fold_stmt(stmt.list[i], fn_nr) + i = i + 1 + } + return stmt + } + if (k == "call") { + stmt.expression = fold_expr(stmt.expression, fn_nr) + return stmt + } + if (k == "if") { + stmt.expression = fold_expr(stmt.expression, fn_nr) + tv = is_truthy_literal(stmt.expression) + if (tv == true) { + stmt.then = fold_stmts(stmt.then, fn_nr) + return {kind: "block", statements: stmt.then, + at: stmt.at, from_row: stmt.from_row, from_column: stmt.from_column, + to_row: stmt.to_row, to_column: stmt.to_column} + } + if (tv == false) { + if (stmt.else != null && length(stmt.else) > 0) { + stmt.else = fold_stmts(stmt.else, fn_nr) + return {kind: "block", statements: stmt.else, + at: stmt.at, from_row: stmt.from_row, from_column: stmt.from_column, + to_row: stmt.to_row, to_column: stmt.to_column} + } + if (stmt.list != null && length(stmt.list) > 0) { + return fold_stmt(stmt.list[0], fn_nr) + } + return null + } + stmt.then = fold_stmts(stmt.then, fn_nr) + stmt.list = fold_stmts(stmt.list, fn_nr) + if (stmt.else != null) stmt.else = fold_stmts(stmt.else, fn_nr) + return stmt + } + if (k == "while") { + stmt.expression = fold_expr(stmt.expression, fn_nr) + if (stmt.expression.kind == "false" || stmt.expression.kind == "null") return null + stmt.statements = fold_stmts(stmt.statements, fn_nr) + return stmt + } + if (k == "do") { + stmt.statements = fold_stmts(stmt.statements, fn_nr) + stmt.expression = fold_expr(stmt.expression, fn_nr) + return stmt + } + if (k == "for") { + if (stmt.init != null) { + ik = stmt.init.kind + if (ik == "var" || ik == "def") { + stmt.init = fold_stmt(stmt.init, fn_nr) + } else { + stmt.init = fold_expr(stmt.init, fn_nr) + } + } + if (stmt.test != null) stmt.test = fold_expr(stmt.test, fn_nr) + if (stmt.update != null) stmt.update = fold_expr(stmt.update, fn_nr) + stmt.statements = fold_stmts(stmt.statements, fn_nr) + return stmt + } + if (k == "return" || k == "go") { + stmt.expression = fold_expr(stmt.expression, fn_nr) + return stmt + } + if (k == "block") { + stmt.statements = fold_stmts(stmt.statements, fn_nr) + return stmt + } + if (k == "label") { + stmt.statement = fold_stmt(stmt.statement, fn_nr) + return stmt + } + if (k == "function") { + fold_fn(stmt) + return stmt + } + return stmt + } + + fold_stmts = function(stmts, fn_nr) { + var i = 0 + var stmt = null + var out = [] + var sv = null + var name = null + while (i < length(stmts)) { + stmt = fold_stmt(stmts[i], fn_nr) + if (stmt == null) { + i = i + 1 + continue + } + // Dead code elimination: unused pure var/def + if (stmt.kind == "var" || stmt.kind == "def") { + name = stmt.left.name + if (name != null) { + sv = scope_var(fn_nr, name) + if (sv != null && sv.nr_uses == 0 && is_pure(stmt.right)) { + stmt.dead = true + } + } + } + // Dead function elimination + if (stmt.kind == "function" && stmt.name != null) { + sv = scope_var(fn_nr, stmt.name) + if (sv != null && sv.nr_uses == 0) { + stmt.dead = true + } + } + if (stmt.dead != true) push(out, stmt) + i = i + 1 + } + return out + } + + fold_fn = function(node) { + if (node == null) return null + var fn_nr = node.function_nr + if (fn_nr == null) return null + // Fold param defaults + var i = 0 + while (i < length(node.list)) { + if (node.list[i].expression != null) { + node.list[i].expression = fold_expr(node.list[i].expression, fn_nr) + } + i = i + 1 + } + if (node.statements != null) node.statements = fold_stmts(node.statements, fn_nr) + if (node.disruption != null) node.disruption = fold_stmts(node.disruption, fn_nr) + } + + // ============================================================ + // Pass 3: cleanup scopes + // ============================================================ + + var cleanup = function() { + var i = 0 + var sc = null + var keys = null + var j = 0 + var key = null + var entry = null + var slots = 0 + var close_slots = 0 + + // Remove dead vars from scope records and recalculate slot counts + while (i < nr_scopes) { + sc = scopes[i] + keys = array(sc) + slots = 0 + close_slots = 0 + j = 0 + while (j < length(keys)) { + key = keys[j] + if (key != "function_nr") { + entry = sc[key] + if (entry != null && entry.nr_uses == 0 && entry.make != "input") { + delete sc[key] + } else if (entry != null) { + slots = slots + 1 + if (entry.closure) close_slots = close_slots + 1 + } + } + j = j + 1 + } + i = i + 1 + } + + // Update nr_slots and nr_close_slots on function nodes + var update_fn_slots = null + update_fn_slots = function(node) { + if (node == null) return null + var fn_nr = node.function_nr + if (fn_nr == null) return null + var sc = find_scope(fn_nr) + if (sc == null) return null + var keys = array(sc) + var s = 0 + var cs = 0 + var ki = 0 + var ent = null + while (ki < length(keys)) { + if (keys[ki] != "function_nr") { + ent = sc[keys[ki]] + if (ent != null) { + s = s + 1 + if (ent.closure) cs = cs + 1 + } + } + ki = ki + 1 + } + node.nr_slots = s + node.nr_close_slots = cs + } + + var walk_stmts_for_fns = null + var walk_expr_for_fns = null + + walk_expr_for_fns = function(expr) { + if (expr == null) return null + var k = expr.kind + var i = 0 + if (k == "function") { + update_fn_slots(expr) + walk_stmts_for_fns(expr.statements) + walk_stmts_for_fns(expr.disruption) + return null + } + if (expr.left != null) walk_expr_for_fns(expr.left) + if (expr.right != null) walk_expr_for_fns(expr.right) + if (expr.expression != null) walk_expr_for_fns(expr.expression) + if (expr.then != null) walk_expr_for_fns(expr.then) + if (expr.else != null) walk_expr_for_fns(expr.else) + if (k == "(" || k == "array" || k == "text literal") { + i = 0 + while (i < length(expr.list)) { + walk_expr_for_fns(expr.list[i]) + i = i + 1 + } + } + if (k == "record") { + i = 0 + while (i < length(expr.list)) { + walk_expr_for_fns(expr.list[i].right) + i = i + 1 + } + } + } + + walk_stmts_for_fns = function(stmts) { + if (stmts == null) return null + var i = 0 + var j = 0 + var stmt = null + var k = null + while (i < length(stmts)) { + stmt = stmts[i] + k = stmt.kind + if (k == "function") { + update_fn_slots(stmt) + walk_stmts_for_fns(stmt.statements) + walk_stmts_for_fns(stmt.disruption) + } else if (k == "var" || k == "def") { + walk_expr_for_fns(stmt.right) + } else if (k == "var_list") { + j = 0 + while (j < length(stmt.list)) { + walk_expr_for_fns(stmt.list[j].right) + j = j + 1 + } + } else if (k == "call") { + walk_expr_for_fns(stmt.expression) + } else if (k == "if") { + walk_expr_for_fns(stmt.expression) + walk_stmts_for_fns(stmt.then) + walk_stmts_for_fns(stmt.list) + if (stmt.else != null) walk_stmts_for_fns(stmt.else) + } else if (k == "while" || k == "do") { + walk_expr_for_fns(stmt.expression) + walk_stmts_for_fns(stmt.statements) + } else if (k == "for") { + if (stmt.init != null) { + if (stmt.init.kind == "var" || stmt.init.kind == "def") { + walk_expr_for_fns(stmt.init.right) + } else { + walk_expr_for_fns(stmt.init) + } + } + walk_expr_for_fns(stmt.test) + walk_expr_for_fns(stmt.update) + walk_stmts_for_fns(stmt.statements) + } else if (k == "return" || k == "go") { + walk_expr_for_fns(stmt.expression) + } else if (k == "block") { + walk_stmts_for_fns(stmt.statements) + } else if (k == "label") { + if (stmt.statement != null) walk_stmts_for_fns([stmt.statement]) + } + i = i + 1 + } + } + + walk_stmts_for_fns(ast.statements) + walk_stmts_for_fns(ast.functions) + + // Update intrinsics: collect what's still referenced + var used_intrinsics = {} + var collect_intrinsics = null + var collect_expr_intrinsics = null + + collect_expr_intrinsics = function(expr) { + if (expr == null) return null + var k = expr.kind + var i = 0 + if (k == "name" && expr.level == -1 && expr.name != null && expr.make != "functino") { + used_intrinsics[expr.name] = true + } + if (expr.left != null) collect_expr_intrinsics(expr.left) + if (expr.right != null) collect_expr_intrinsics(expr.right) + if (expr.expression != null) collect_expr_intrinsics(expr.expression) + if (expr.then != null) collect_expr_intrinsics(expr.then) + if (expr.else != null) collect_expr_intrinsics(expr.else) + if (k == "(" || k == "array" || k == "text literal") { + i = 0 + while (i < length(expr.list)) { + collect_expr_intrinsics(expr.list[i]) + i = i + 1 + } + } + if (k == "record") { + i = 0 + while (i < length(expr.list)) { + collect_expr_intrinsics(expr.list[i].right) + i = i + 1 + } + } + if (k == "function") { + collect_intrinsics(expr.statements) + collect_intrinsics(expr.disruption) + i = 0 + while (i < length(expr.list)) { + if (expr.list[i].expression != null) { + collect_expr_intrinsics(expr.list[i].expression) + } + i = i + 1 + } + } + } + + collect_intrinsics = function(stmts) { + if (stmts == null) return null + var i = 0 + var j = 0 + var pi = 0 + var stmt = null + var k = null + while (i < length(stmts)) { + stmt = stmts[i] + k = stmt.kind + if (k == "var" || k == "def") { + collect_expr_intrinsics(stmt.right) + } else if (k == "var_list") { + j = 0 + while (j < length(stmt.list)) { + collect_expr_intrinsics(stmt.list[j].right) + j = j + 1 + } + } else if (k == "call") { + collect_expr_intrinsics(stmt.expression) + } else if (k == "if") { + collect_expr_intrinsics(stmt.expression) + collect_intrinsics(stmt.then) + collect_intrinsics(stmt.list) + if (stmt.else != null) collect_intrinsics(stmt.else) + } else if (k == "while" || k == "do") { + collect_expr_intrinsics(stmt.expression) + collect_intrinsics(stmt.statements) + } else if (k == "for") { + if (stmt.init != null) { + if (stmt.init.kind == "var" || stmt.init.kind == "def") { + collect_expr_intrinsics(stmt.init.right) + } else { + collect_expr_intrinsics(stmt.init) + } + } + collect_expr_intrinsics(stmt.test) + collect_expr_intrinsics(stmt.update) + collect_intrinsics(stmt.statements) + } else if (k == "return" || k == "go") { + collect_expr_intrinsics(stmt.expression) + } else if (k == "function") { + collect_intrinsics(stmt.statements) + collect_intrinsics(stmt.disruption) + pi = 0 + while (pi < length(stmt.list)) { + if (stmt.list[pi].expression != null) { + collect_expr_intrinsics(stmt.list[pi].expression) + } + pi = pi + 1 + } + } else if (k == "block") { + collect_intrinsics(stmt.statements) + } else if (k == "label") { + if (stmt.statement != null) collect_intrinsics([stmt.statement]) + } + i = i + 1 + } + } + + collect_intrinsics(ast.statements) + collect_intrinsics(ast.functions) + + var new_intrinsics = [] + i = 0 + while (i < length(ast.intrinsics)) { + if (used_intrinsics[ast.intrinsics[i]] == true) { + push(new_intrinsics, ast.intrinsics[i]) + } + i = i + 1 + } + ast.intrinsics = new_intrinsics + } + + // ============================================================ + // Main + // ============================================================ + + pre_scan() + + // Pass 2: fold all statements and functions + ast.statements = fold_stmts(ast.statements, 0) + var fi = 0 + while (fi < length(ast.functions)) { + fold_fn(ast.functions[fi]) + fi = fi + 1 + } + + // Remove dead top-level functions + var live_fns = [] + var fn = null + fi = 0 + while (fi < length(ast.functions)) { + fn = ast.functions[fi] + if (fn.dead != true) { + push(live_fns, fn) + } + fi = fi + 1 + } + ast.functions = live_fns + + // Pass 3: cleanup + cleanup() + + return ast +} + +return fold diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm index 6118967a..3fe1aa97 100644 --- a/internal/bootstrap.cm +++ b/internal/bootstrap.cm @@ -24,8 +24,10 @@ function use_basic(path) { var tok_path = core_path + "/tokenize.cm" var par_path = core_path + "/parse.cm" +var fold_path = core_path + "/fold.cm" var tokenize_mod = mach_eval("tokenize", text(fd.slurp(tok_path)), {use: use_basic}) var parse_mod = mach_eval("parse", text(fd.slurp(par_path)), {use: use_basic}) +var fold_mod = mach_eval("fold", text(fd.slurp(fold_path)), {use: use_basic}) // Optionally load mcode compiler module var mcode_mod = null @@ -66,6 +68,7 @@ function analyze(src, filename) { } disrupt } + ast = fold_mod(ast) return ast } diff --git a/parse.cm b/parse.cm index 36a6f107..1f9dd688 100644 --- a/parse.cm +++ b/parse.cm @@ -403,6 +403,7 @@ var parse = function(tokens, src, filename, tokenizer) { if (tok.kind == ")") advance() else if (tok.kind == "eof") parse_error(tok, "unterminated method parameter list") if (length(params) > 4) parse_error(tok, "functions cannot have more than 4 parameters") + fn.arity = length(params) if (tok.kind == "{") { advance() fn.statements = parse_block_statements() @@ -843,6 +844,7 @@ var parse = function(tokens, src, filename, tokenizer) { } if (length(params) > 4) parse_error(tok, "functions cannot have more than 4 parameters") + node.arity = length(params) if (tok.kind == "{") { advance() @@ -935,6 +937,7 @@ var parse = function(tokens, src, filename, tokenizer) { } if (length(params) > 4) parse_error(tok, "functions cannot have more than 4 parameters") + node.arity = length(params) if (tok.kind != "=>") { parse_error(tok, "expected '=>' in arrow function") @@ -1274,7 +1277,6 @@ var parse = function(tokens, src, filename, tokenizer) { var sem_errors = [] var scopes_array = [] var intrinsics = [] - var block_var_counter = 0 var sem_error = function(node, msg) { var err = {message: msg} @@ -1289,15 +1291,13 @@ var parse = function(tokens, src, filename, tokenizer) { vars: [], in_loop: opts.in_loop == true, function_nr: fn_nr, - is_function_scope: opts.is_func == true, - block_depth: opts.bdepth != null ? opts.bdepth : 0 + is_function_scope: opts.is_func == true } } var sem_add_var = function(scope, name, make_opts) { push(scope.vars, { name: name, - scope_name: null, is_const: make_opts.is_const == true, make: make_opts.make, function_nr: make_opts.fn_nr, @@ -1364,23 +1364,10 @@ var parse = function(tokens, src, filename, tokenizer) { return functino_names[name] == true } - var sem_propagate_block_vars = function(parent, block) { + var sem_propagate_vars = function(parent, child) { var i = 0 - var v = null - var sn = null - while (i < length(block.vars)) { - v = block.vars[i] - sn = v.scope_name - if (sn == null) sn = v.name - push(parent.vars, { - name: sn, - scope_name: null, - is_const: v.is_const, - make: v.make, - function_nr: v.function_nr, - nr_uses: v.nr_uses, - closure: v.closure - }) + while (i < length(child.vars)) { + push(parent.vars, child.vars[i]) i = i + 1 } } @@ -1471,7 +1458,6 @@ var parse = function(tokens, src, filename, tokenizer) { if (r.v != null) { left_node.level = r.level left_node.function_nr = r.def_function_nr - if (r.v.scope_name != null) left_node.scope_name = r.v.scope_name } else { left_node.level = -1 } @@ -1525,7 +1511,6 @@ var parse = function(tokens, src, filename, tokenizer) { if (r.v != null) { operand.level = r.level operand.function_nr = r.def_function_nr - if (r.v.scope_name != null) operand.scope_name = r.v.scope_name } else { operand.level = -1 } @@ -1647,7 +1632,6 @@ var parse = function(tokens, src, filename, tokenizer) { expr.function_nr = r.def_function_nr r.v.nr_uses = r.v.nr_uses + 1 if (r.level > 0) r.v.closure = 1 - if (r.v.scope_name != null) expr.scope_name = r.v.scope_name } else { expr.level = -1 sem_add_intrinsic(name) @@ -1664,15 +1648,10 @@ var parse = function(tokens, src, filename, tokenizer) { var name = null var existing = null var i = 0 - var sn = null - var then_scope = null - var list_scope = null - var else_scope = null var loop_scope = null var do_scope = null var for_scope = null var init_kind = null - var blk_scope = null var fn_nr_val = null var fn_scope = null var pname = null @@ -1695,15 +1674,9 @@ var parse = function(tokens, src, filename, tokenizer) { if (existing != null && existing.is_const) { sem_error(stmt.left, "cannot redeclare constant '" + name + "'") } - if (existing == null || existing.function_nr != scope.function_nr || scope.block_depth > 0) { + if (existing == null || existing.function_nr != scope.function_nr) { sem_add_var(scope, name, {make: "var", fn_nr: scope.function_nr}) } - if (scope.block_depth > 0) { - sn = "_" + name + "_" + text(block_var_counter) - block_var_counter = block_var_counter + 1 - scope.vars[length(scope.vars) - 1].scope_name = sn - stmt.left.scope_name = sn - } } sem_check_expr(scope, stmt.right) return null @@ -1720,12 +1693,6 @@ var parse = function(tokens, src, filename, tokenizer) { existing.make = "def" } else { sem_add_var(scope, name, {is_const: true, make: "def", fn_nr: scope.function_nr}) - if (scope.block_depth > 0) { - sn = "_" + name + "_" + text(block_var_counter) - block_var_counter = block_var_counter + 1 - scope.vars[length(scope.vars) - 1].scope_name = sn - stmt.left.scope_name = sn - } } } sem_check_expr(scope, stmt.right) @@ -1739,58 +1706,52 @@ var parse = function(tokens, src, filename, tokenizer) { if (kind == "if") { sem_check_expr(scope, stmt.expression) - then_scope = make_scope(scope, scope.function_nr, {bdepth: scope.block_depth + 1}) i = 0 while (i < length(stmt.then)) { - sem_check_stmt(then_scope, stmt.then[i]) + sem_check_stmt(scope, stmt.then[i]) i = i + 1 } - sem_propagate_block_vars(scope, then_scope) - list_scope = make_scope(scope, scope.function_nr, {bdepth: scope.block_depth + 1}) i = 0 while (i < length(stmt.list)) { - sem_check_stmt(list_scope, stmt.list[i]) + sem_check_stmt(scope, stmt.list[i]) i = i + 1 } - sem_propagate_block_vars(scope, list_scope) if (stmt.else != null) { - else_scope = make_scope(scope, scope.function_nr, {bdepth: scope.block_depth + 1}) i = 0 while (i < length(stmt.else)) { - sem_check_stmt(else_scope, stmt.else[i]) + sem_check_stmt(scope, stmt.else[i]) i = i + 1 } - sem_propagate_block_vars(scope, else_scope) } return null } if (kind == "while") { sem_check_expr(scope, stmt.expression) - loop_scope = make_scope(scope, scope.function_nr, {in_loop: true, bdepth: scope.block_depth + 1}) + loop_scope = make_scope(scope, scope.function_nr, {in_loop: true}) i = 0 while (i < length(stmt.statements)) { sem_check_stmt(loop_scope, stmt.statements[i]) i = i + 1 } - sem_propagate_block_vars(scope, loop_scope) + sem_propagate_vars(scope, loop_scope) return null } if (kind == "do") { - do_scope = make_scope(scope, scope.function_nr, {in_loop: true, bdepth: scope.block_depth + 1}) + do_scope = make_scope(scope, scope.function_nr, {in_loop: true}) i = 0 while (i < length(stmt.statements)) { sem_check_stmt(do_scope, stmt.statements[i]) i = i + 1 } - sem_propagate_block_vars(scope, do_scope) + sem_propagate_vars(scope, do_scope) sem_check_expr(scope, stmt.expression) return null } if (kind == "for") { - for_scope = make_scope(scope, scope.function_nr, {in_loop: true, bdepth: scope.block_depth + 1}) + for_scope = make_scope(scope, scope.function_nr, {in_loop: true}) if (stmt.init != null) { init_kind = stmt.init.kind if (init_kind == "var" || init_kind == "def") { @@ -1806,7 +1767,7 @@ var parse = function(tokens, src, filename, tokenizer) { sem_check_stmt(for_scope, stmt.statements[i]) i = i + 1 } - sem_propagate_block_vars(scope, for_scope) + sem_propagate_vars(scope, for_scope) return null } @@ -1834,13 +1795,11 @@ var parse = function(tokens, src, filename, tokenizer) { } if (kind == "block") { - blk_scope = make_scope(scope, scope.function_nr, {bdepth: scope.block_depth + 1}) i = 0 while (i < length(stmt.statements)) { - sem_check_stmt(blk_scope, stmt.statements[i]) + sem_check_stmt(scope, stmt.statements[i]) i = i + 1 } - sem_propagate_block_vars(scope, blk_scope) return null }