parse.ce and tokenize.ce

This commit is contained in:
2026-02-09 11:56:09 -06:00
parent 45556c344d
commit 368511f666
6 changed files with 3002 additions and 19 deletions

View File

@@ -1,14 +1,17 @@
// Hidden vars (os, program) come from env
// Hidden vars (os, args) come from env
// args[0] = script filename, args[1..] = user args
var load_internal = os.load_internal
function use_embed(name) {
return load_internal("js_" + name + "_use")
}
var fd = use_embed('fd')
var json = use_embed('json')
var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
function use(path) {
if (use_cache[path])
@@ -34,7 +37,15 @@ function use(path) {
}
// Load and run the user's program
var program = args[0]
var user_args = []
var _i = 1
while (_i < length(args)) {
push(user_args, args[_i])
_i = _i + 1
}
var blob = fd.slurp(program)
stone(blob)
var script = text(blob)
mach_eval(program, script, {use: use})
mach_eval(program, script, {use: use, args: user_args, json: json})

2373
parse.ce Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -727,7 +727,6 @@ int cell_init(int argc, char **argv)
/* Check for --mach-run flag to compile and run through MACH VM */
if (argc >= 3 && strcmp(argv[1], "--mach-run") == 0) {
const char *filename = argv[2];
if (!find_cell_shop()) return 1;
size_t boot_size;
@@ -755,7 +754,7 @@ int cell_init(int argc, char **argv)
cJSON_Delete(boot_ast);
return 1;
}
JSContext *ctx = JS_NewContextWithHeapSize(rt, 256 * 1024);
JSContext *ctx = JS_NewContextWithHeapSize(rt, 16 * 1024 * 1024);
if (!ctx) {
printf("Failed to create JS context\n");
cJSON_Delete(boot_ast); JS_FreeRuntime(rt);
@@ -766,7 +765,12 @@ int cell_init(int argc, char **argv)
JSValue hidden_env = JS_NewObject(ctx);
JS_SetPropertyStr(ctx, hidden_env, "os", js_os_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "program", JS_NewString(ctx, filename));
JSValue args_arr = JS_NewArray(ctx);
for (int i = 2; i < argc; i++) {
JSValue str = JS_NewString(ctx, argv[i]);
JS_ArrayPush(ctx, &args_arr, str);
}
JS_SetPropertyStr(ctx, hidden_env, "args", args_arr);
hidden_env = JS_Stone(ctx, hidden_env);
JSValue result = JS_RunMachTree(ctx, boot_ast, hidden_env);
@@ -775,7 +779,9 @@ int cell_init(int argc, char **argv)
int exit_code = 0;
if (JS_IsException(result)) {
JSValue exc = JS_GetException(ctx);
const char *err_str = JS_ToCString(ctx, exc);
const char *err_str = NULL;
JSValue msg = JS_GetPropertyStr(ctx, exc, "message");
err_str = JS_ToCString(ctx, msg);
if (err_str) {
printf("Error: %s\n", err_str);
JS_FreeCString(ctx, err_str);
@@ -921,4 +927,4 @@ int uncaught_exception(JSContext *js, JSValue v)
JS_FreeValue(js, exp);
JS_FreeValue(js, v);
return 0;
}
}

View File

@@ -697,6 +697,9 @@ JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop);
// For records
JSValue JS_GetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop);
int JS_SetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop, JSValue val);
// Set property on the global object
int JS_SetGlobalStr (JSContext *ctx, const char *prop, JSValue val);
int JS_SetProperty (JSContext *ctx, JSValue this_obj, JSValue prop, JSValue val);
JSValue JS_GetPrototype (JSContext *ctx, JSValue val);

View File

@@ -6204,6 +6204,13 @@ static int js_json_to_str (JSContext *ctx, JSONStringifyContext *jsc, JSValue ho
goto exception;
}
/* Heap strings are JS_TAG_PTR but must be quoted, not iterated as objects */
if (JS_IsText (val_ref.val) && !MIST_IsImmediateASCII (val_ref.val)) {
val_ref.val = JS_ToQuotedString (ctx, val_ref.val);
if (JS_IsException (val_ref.val)) goto exception;
goto concat_value;
}
if (JS_IsObject (
val_ref.val)) { /* includes arrays (OBJ_ARRAY) since they have JS_TAG_PTR */
v = js_array_includes (ctx, jsc->stack, 1, &val_ref.val);
@@ -9013,17 +9020,28 @@ static JSValue js_cell_array (JSContext *ctx, JSValue this_val, int argc, JSValu
if (argc < 2 || JS_IsNull (argv[1])) {
/* Split into characters */
JSValue result = JS_NewArrayLen (ctx, len);
if (JS_IsException (result)) { return result; }
JSArray *out = JS_VALUE_GET_ARRAY (result);
JSGCRef arr_ref, str_ref;
JS_PushGCRef (ctx, &arr_ref);
JS_PushGCRef (ctx, &str_ref);
str_ref.val = arg;
arr_ref.val = JS_NewArray (ctx);
if (JS_IsException (arr_ref.val)) {
JS_PopGCRef (ctx, &str_ref);
JS_PopGCRef (ctx, &arr_ref);
return JS_EXCEPTION;
}
for (int i = 0; i < len; i++) {
JSValue ch = js_sub_string_val (ctx, arg, i, i + 1);
JSValue ch = js_sub_string_val (ctx, str_ref.val, i, i + 1);
if (JS_IsException (ch)) {
JS_PopGCRef (ctx, &str_ref);
JS_PopGCRef (ctx, &arr_ref);
return JS_EXCEPTION;
}
out->values[i] = ch;
JS_ArrayPush (ctx, &arr_ref.val, ch);
}
out->len = len;
JSValue result = arr_ref.val;
JS_PopGCRef (ctx, &str_ref);
JS_PopGCRef (ctx, &arr_ref);
return result;
}
@@ -11404,11 +11422,11 @@ static JSValue js_cell_length (JSContext *ctx, JSValue this_val, int argc, JSVal
int tag = JS_VALUE_GET_TAG (val);
/* Strings return codepoint count */
if (tag == JS_TAG_STRING_IMM) {
if (MIST_IsImmediateASCII (val)) {
return JS_NewInt32 (ctx, MIST_GetImmediateASCIILen (val));
}
if (tag == JS_TAG_STRING) {
JSText *p = JS_VALUE_GET_STRING (val);
if (JS_IsPtr (val) && objhdr_type (*chase (val)) == OBJ_TEXT) {
JSText *p = (JSText *)chase (val);
return JS_NewInt32 (ctx, (int)JSText_len (p));
}
@@ -11582,8 +11600,7 @@ static JSValue js_cell_is_stone (JSContext *ctx, JSValue this_val, int argc, JSV
/* is_text(val) */
static JSValue js_cell_is_text (JSContext *ctx, JSValue this_val, int argc, JSValue *argv) {
if (argc < 1) return JS_FALSE;
int tag = JS_VALUE_GET_TAG (argv[0]);
return JS_NewBool (ctx, tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM);
return JS_NewBool (ctx, JS_IsText (argv[0]));
}
/* is_proto(val, master) - check if val has master in prototype chain */
@@ -11737,6 +11754,10 @@ static JSValue js_cell_some(JSContext *ctx, JSValue this_val, int argc, JSValue
/* GC-SAFE: Helper to set a global function. Creates function first, then reads
ctx->global_obj to ensure it's not stale if GC ran during function creation. */
int JS_SetGlobalStr (JSContext *ctx, const char *prop, JSValue val) {
return JS_SetPropertyStr(ctx, ctx->global_obj, prop, val);
}
static void js_set_global_cfunc(JSContext *ctx, const char *name, JSCFunction *func, int length) {
JSGCRef ref;
JS_PushGCRef(ctx, &ref);
@@ -11799,7 +11820,7 @@ static void JS_AddIntrinsicBaseObjects (JSContext *ctx) {
/* Core functions - using GC-safe helper */
js_set_global_cfunc(ctx, "eval", js_cell_eval, 2);
js_set_global_cfunc(ctx, "mach_eval", js_mach_eval, 2);
js_set_global_cfunc(ctx, "mach_eval", js_mach_eval, 3);
js_set_global_cfunc(ctx, "stone", js_cell_stone, 1);
js_set_global_cfunc(ctx, "length", js_cell_length, 1);
js_set_global_cfunc(ctx, "call", js_cell_call, 3);

569
tokenize.ce Normal file
View File

@@ -0,0 +1,569 @@
var src = args[0]
var filename = length(args) > 1 ? args[1] : "<stdin>"
// Convert to codepoint array - integers are GC-safe immediate values
var len = length(src)
var cp = []
var _i = 0
while (_i < len) {
push(cp, codepoint(src[_i]))
_i = _i + 1
}
var pos = 0
var row = 0
var col = 0
var tokens = []
// Codepoint constants
def CP_LF = 10
def CP_CR = 13
def CP_TAB = 9
def CP_SPACE = 32
def CP_BANG = 33
def CP_DQUOTE = 34
def CP_HASH = 35
def CP_DOLLAR = 36
def CP_PERCENT = 37
def CP_AMP = 38
def CP_SQUOTE = 39
def CP_LPAREN = 40
def CP_RPAREN = 41
def CP_STAR = 42
def CP_PLUS = 43
def CP_COMMA = 44
def CP_MINUS = 45
def CP_DOT = 46
def CP_SLASH = 47
def CP_0 = 48
def CP_1 = 49
def CP_7 = 55
def CP_9 = 57
def CP_COLON = 58
def CP_SEMI = 59
def CP_LT = 60
def CP_EQ = 61
def CP_GT = 62
def CP_QMARK = 63
def CP_AT = 64
def CP_A = 65
def CP_B = 66
def CP_E = 69
def CP_F = 70
def CP_O = 79
def CP_X = 88
def CP_Z = 90
def CP_LBRACKET = 91
def CP_BSLASH = 92
def CP_RBRACKET = 93
def CP_CARET = 94
def CP_UNDERSCORE = 95
def CP_BACKTICK = 96
def CP_a = 97
def CP_b = 98
def CP_e = 101
def CP_f = 102
def CP_n = 110
def CP_o = 111
def CP_r = 114
def CP_t = 116
def CP_x = 120
def CP_z = 122
def CP_LBRACE = 123
def CP_PIPE = 124
def CP_RBRACE = 125
def CP_TILDE = 126
// Keywords lookup
var keywords = {
if: "if", in: "in", do: "do", go: "go",
var: "var", def: "def", for: "for",
else: "else", this: "this", null: "null", true: "true",
false: "false", while: "while", break: "break",
return: "return", delete: "delete",
disrupt: "disrupt", function: "function", continue: "continue",
disruption: "disruption"
}
function pk() {
if (pos >= len) return -1
return cp[pos]
}
function pk_at(n) {
var idx = pos + n
if (idx >= len) return -1
return cp[idx]
}
function adv() {
var c = cp[pos]
pos = pos + 1
if (c == CP_LF) {
row = row + 1
col = 0
} else {
col = col + 1
}
return c
}
function is_digit(c) {
return c >= CP_0 && c <= CP_9
}
function is_hex(c) {
return (c >= CP_0 && c <= CP_9) || (c >= CP_a && c <= CP_f) || (c >= CP_A && c <= CP_F)
}
function is_alpha(c) {
return (c >= CP_a && c <= CP_z) || (c >= CP_A && c <= CP_Z)
}
function is_alnum(c) {
return is_alpha(c) || is_digit(c)
}
function is_ident_start(c) {
return is_alpha(c) || c == CP_UNDERSCORE || c == CP_DOLLAR
}
function is_ident_char(c) {
return is_alnum(c) || c == CP_UNDERSCORE || c == CP_DOLLAR || c == CP_QMARK || c == CP_BANG
}
function substr(start, end) {
var s = ""
var i = start
while (i < end) {
s = s + character(cp[i])
i = i + 1
}
return s
}
function read_string(quote_cp) {
var start = pos
var start_row = row
var start_col = col
var value = ""
var esc = 0
adv() // skip opening quote
while (pos < len && pk() != quote_cp) {
if (pk() == CP_BSLASH) {
adv()
esc = adv()
if (esc == CP_n) { value = value + "\n" }
else if (esc == CP_t) { value = value + "\t" }
else if (esc == CP_r) { value = value + "\r" }
else if (esc == CP_BSLASH) { value = value + "\\" }
else if (esc == CP_SQUOTE) { value = value + "'" }
else if (esc == CP_DQUOTE) { value = value + "\"" }
else if (esc == CP_0) { value = value + character(0) }
else if (esc == CP_BACKTICK) { value = value + "`" }
else { value = value + character(esc) }
} else {
value = value + character(adv())
}
}
if (pos < len) adv() // skip closing quote
push(tokens, {
kind: "text", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: value
})
}
function read_template() {
var start = pos
var start_row = row
var start_col = col
var value = ""
var esc = 0
var depth = 0
var tc = 0
var q = 0
adv() // skip opening backtick
while (pos < len && pk() != CP_BACKTICK) {
if (pk() == CP_BSLASH && pos + 1 < len) {
adv()
esc = adv()
if (esc == CP_n) { value = value + "\n" }
else if (esc == CP_t) { value = value + "\t" }
else if (esc == CP_r) { value = value + "\r" }
else if (esc == CP_BSLASH) { value = value + "\\" }
else if (esc == CP_BACKTICK) { value = value + "`" }
else if (esc == CP_DOLLAR) { value = value + "$" }
else if (esc == CP_0) { value = value + character(0) }
else { value = value + character(esc) }
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
adv() // $
adv() // {
depth = 1
while (pos < len && depth > 0) {
tc = pk()
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
else if (tc == CP_RBRACE) { depth = depth - 1; adv() }
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
q = adv()
while (pos < len && pk() != q) {
if (pk() == CP_BSLASH && pos + 1 < len) adv()
adv()
}
if (pos < len) adv()
} else { adv() }
}
} else {
value = value + character(adv())
}
}
if (pos < len) adv() // skip closing backtick
push(tokens, {
kind: "text", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: value
})
}
function read_number() {
var start = pos
var start_row = row
var start_col = col
if (pk() == CP_0 && (pk_at(1) == CP_x || pk_at(1) == CP_X)) {
adv(); adv()
while (pos < len && (is_hex(pk()) || pk() == CP_UNDERSCORE)) adv()
} else if (pk() == CP_0 && (pk_at(1) == CP_b || pk_at(1) == CP_B)) {
adv(); adv()
while (pos < len && (pk() == CP_0 || pk() == CP_1 || pk() == CP_UNDERSCORE)) adv()
} else if (pk() == CP_0 && (pk_at(1) == CP_o || pk_at(1) == CP_O)) {
adv(); adv()
while (pos < len && pk() >= CP_0 && pk() <= CP_7) adv()
} else {
while (pos < len && (is_digit(pk()) || pk() == CP_UNDERSCORE)) adv()
if (pos < len && pk() == CP_DOT) {
adv()
while (pos < len && (is_digit(pk()) || pk() == CP_UNDERSCORE)) adv()
}
if (pos < len && (pk() == CP_e || pk() == CP_E)) {
adv()
if (pos < len && (pk() == CP_PLUS || pk() == CP_MINUS)) adv()
while (pos < len && is_digit(pk())) adv()
}
}
var raw = substr(start, pos)
push(tokens, {
kind: "number", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: raw, number: number(raw)
})
}
function read_name() {
var start = pos
var start_row = row
var start_col = col
while (pos < len && is_ident_char(pk())) adv()
var name = substr(start, pos)
var kw = keywords[name]
if (kw != null) {
push(tokens, {
kind: kw, at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col
})
} else {
push(tokens, {
kind: "name", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: name
})
}
}
function read_comment() {
var start = pos
var start_row = row
var start_col = col
if (pk_at(1) == CP_SLASH) {
while (pos < len && pk() != CP_LF && pk() != CP_CR) adv()
} else {
adv(); adv() // skip /*
while (pos < len) {
if (pk() == CP_STAR && pk_at(1) == CP_SLASH) {
adv(); adv()
break
}
adv()
}
}
var raw = substr(start, pos)
push(tokens, {
kind: "comment", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: raw
})
}
function emit_op(kind, count) {
var start = pos
var start_row = row
var start_col = col
var i = 0
while (i < count) { adv(); i = i + 1 }
push(tokens, {
kind: kind, at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col
})
}
function emit_ident(count) {
var start = pos
var start_row = row
var start_col = col
var val = ""
var i = 0
while (i < count) { val = val + character(adv()); i = i + 1 }
push(tokens, {
kind: "name", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: val
})
}
function tokenize_one() {
var c = pk()
var start = 0
var start_row = 0
var start_col = 0
var raw = ""
if (c == -1) return false
// Newline
if (c == CP_LF) {
start = pos
start_row = row
start_col = col
adv()
push(tokens, {
kind: "newline", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: "\n"
})
return true
}
if (c == CP_CR) {
start = pos
start_row = row
start_col = col
adv()
if (pos < len && pk() == CP_LF) adv()
push(tokens, {
kind: "newline", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: "\n"
})
return true
}
// Whitespace
if (c == CP_SPACE || c == CP_TAB) {
start = pos
start_row = row
start_col = col
while (pos < len && (pk() == CP_SPACE || pk() == CP_TAB)) adv()
raw = substr(start, pos)
push(tokens, {
kind: "space", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: raw
})
return true
}
// Strings
if (c == CP_SQUOTE || c == CP_DQUOTE) {
read_string(c)
return true
}
// Template
if (c == CP_BACKTICK) {
read_template()
return true
}
// Numbers
if (is_digit(c)) {
read_number()
return true
}
if (c == CP_DOT && is_digit(pk_at(1))) {
read_number()
return true
}
// Identifiers and keywords
if (is_ident_start(c)) {
read_name()
return true
}
// Comments and /
if (c == CP_SLASH) {
if (pk_at(1) == CP_SLASH || pk_at(1) == CP_STAR) {
read_comment()
return true
}
if (pk_at(1) == CP_EQ) { emit_op("/=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("/", 1)
return true
}
// Operators
if (c == CP_STAR) {
if (pk_at(1) == CP_STAR) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("**=", 3); return true }
emit_op("**", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("*=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("*", 1); return true
}
if (c == CP_PERCENT) {
if (pk_at(1) == CP_EQ) { emit_op("%=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("%", 1); return true
}
if (c == CP_PLUS) {
if (pk_at(1) == CP_EQ) { emit_op("+=", 2); return true }
if (pk_at(1) == CP_PLUS) { emit_op("++", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("+", 1); return true
}
if (c == CP_MINUS) {
if (pk_at(1) == CP_EQ) { emit_op("-=", 2); return true }
if (pk_at(1) == CP_MINUS) { emit_op("--", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("-", 1); return true
}
if (c == CP_LT) {
if (pk_at(1) == CP_EQ && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(1) == CP_EQ) { emit_op("<=", 2); return true }
if (pk_at(1) == CP_LT) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("<<=", 3); return true }
emit_op("<<", 2); return true
}
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("<", 1); return true
}
if (c == CP_GT) {
if (pk_at(1) == CP_EQ && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(1) == CP_EQ) { emit_op(">=", 2); return true }
if (pk_at(1) == CP_GT) {
if (pk_at(2) == CP_GT) {
if (pk_at(3) == CP_BANG) { emit_ident(4); return true }
if (pk_at(3) == CP_EQ) { emit_op(">>>=", 4); return true }
emit_op(">>>", 3); return true
}
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op(">>=", 3); return true }
emit_op(">>", 2); return true
}
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op(">", 1); return true
}
if (c == CP_EQ) {
if (pk_at(1) == CP_EQ) {
if (pk_at(2) == CP_EQ) { emit_op("===", 3); return true }
emit_op("==", 2); return true
}
if (pk_at(1) == CP_GT) { emit_op("=>", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("=", 1); return true
}
if (c == CP_BANG) {
if (pk_at(1) == CP_EQ) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("!==", 3); return true }
emit_op("!=", 2); return true
}
emit_op("!", 1); return true
}
if (c == CP_AMP) {
if (pk_at(1) == CP_AMP) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("&&=", 3); return true }
emit_op("&&", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("&=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("&", 1); return true
}
if (c == CP_PIPE) {
if (pk_at(1) == CP_PIPE) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("||=", 3); return true }
emit_op("||", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("|=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("|", 1); return true
}
if (c == CP_CARET) {
if (pk_at(1) == CP_EQ) { emit_op("^=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("^", 1); return true
}
if (c == CP_LBRACKET) {
if (pk_at(1) == CP_RBRACKET && pk_at(2) == CP_BANG) { emit_ident(3); return true }
emit_op("[", 1); return true
}
if (c == CP_TILDE) {
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
emit_op("~", 1); return true
}
// Single character tokens
emit_op(character(c), 1)
return true
}
// Main loop
while (pos < len) {
tokenize_one()
}
// EOF token
push(tokens, {
kind: "eof", at: pos,
from_row: row, from_column: col,
to_row: row, to_column: col
})
print(json.encode({filename: filename, tokens: tokens}))