fix issues with parse.cm and tokenize.cm

This commit is contained in:
2026-02-09 17:43:44 -06:00
parent 68e2395b92
commit d5209e1d59
5 changed files with 201 additions and 57 deletions

View File

@@ -38,7 +38,7 @@ if (use_mcode) {
// analyze: tokenize + parse, check for errors
function analyze(src, filename) {
var tok_result = tokenize_mod(src, filename)
var ast = parse_mod(tok_result.tokens, src, filename)
var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
var _i = 0
var prev_line = -1
var prev_msg = null

111
parse.cm
View File

@@ -5,7 +5,7 @@ var is_alpha = function(c) {
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
}
var parse = function(tokens, src, filename) {
var parse = function(tokens, src, filename, tokenizer) {
var _src_len = length(src)
var cp = []
var _i = 0
@@ -167,6 +167,23 @@ var parse = function(tokens, src, filename) {
var rpos = 0
var pattern_str = ""
var flags = ""
var tv = null
var has_interp = false
var ti = 0
var tpl_list = null
var fmt = null
var idx = 0
var tvi = 0
var tvlen = 0
var depth = 0
var expr_str = null
var tc = null
var tq = null
var esc_ch = null
var expr_tokens = null
var sub_ast = null
var sub_stmt = null
var sub_expr = null
if (k == "number") {
node = ast_node("number", start)
@@ -177,8 +194,96 @@ var parse = function(tokens, src, filename) {
return node
}
if (k == "text") {
node = ast_node("text", start)
node.value = tok.value
// Check for template interpolation: ${...}
tv = tok.value
has_interp = false
ti = 0
while (ti < length(tv) - 1) {
if (tv[ti] == "$" && tv[ti + 1] == "{") {
if (ti == 0 || tv[ti - 1] != "\\") {
has_interp = true
break
}
}
ti = ti + 1
}
if (!has_interp || tokenizer == null) {
node = ast_node("text", start)
node.value = tok.value
advance()
ast_node_end(node)
return node
}
// Template literal with interpolation
node = ast_node("text literal", start)
tpl_list = []
node.list = tpl_list
fmt = ""
idx = 0
tvi = 0
tvlen = length(tv)
while (tvi < tvlen) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
esc_ch = tv[tvi + 1]
if (esc_ch == "n") { fmt = fmt + "\n" }
else if (esc_ch == "t") { fmt = fmt + "\t" }
else if (esc_ch == "r") { fmt = fmt + "\r" }
else if (esc_ch == "\\") { fmt = fmt + "\\" }
else if (esc_ch == "`") { fmt = fmt + "`" }
else if (esc_ch == "$") { fmt = fmt + "$" }
else if (esc_ch == "0") { fmt = fmt + character(0) }
else { fmt = fmt + esc_ch }
tvi = tvi + 2
} else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") {
tvi = tvi + 2
depth = 1
expr_str = ""
while (tvi < tvlen && depth > 0) {
tc = tv[tvi]
if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 }
else if (tc == "}") {
depth = depth - 1
if (depth > 0) { expr_str = expr_str + tc }
tvi = tvi + 1
}
else if (tc == "'" || tc == "\"" || tc == "`") {
tq = tc
expr_str = expr_str + tc
tvi = tvi + 1
while (tvi < tvlen && tv[tvi] != tq) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
expr_str = expr_str + tv[tvi]
tvi = tvi + 1
}
expr_str = expr_str + tv[tvi]
tvi = tvi + 1
}
if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 }
} else {
expr_str = expr_str + tc
tvi = tvi + 1
}
}
expr_tokens = tokenizer(expr_str, "<template>").tokens
sub_ast = parse(expr_tokens, expr_str, "<template>", tokenizer)
if (sub_ast != null && sub_ast.statements != null && length(sub_ast.statements) > 0) {
sub_stmt = sub_ast.statements[0]
sub_expr = null
if (sub_stmt.kind == "call") {
sub_expr = sub_stmt.expression
} else {
sub_expr = sub_stmt
}
push(tpl_list, sub_expr)
}
fmt = fmt + "{" + text(idx) + "}"
idx = idx + 1
} else {
fmt = fmt + tv[tvi]
tvi = tvi + 1
}
}
node.value = fmt
advance()
ast_node_end(node)
return node

View File

@@ -552,59 +552,81 @@ int cell_init(int argc, char **argv)
/* Check for --mach-run flag to compile and run through MACH VM */
if (argc >= 3 && strcmp(argv[1], "--mach-run") == 0) {
if (!find_cell_shop()) return 1;
const char *script_name = argv[2];
char *script = NULL;
char *allocated_script = NULL;
const char *filename = script_name;
size_t boot_size;
char *boot_data = load_core_file("internal/bootstrap.cm", &boot_size);
if (!boot_data) {
printf("ERROR: Could not load internal/bootstrap.cm from %s\n", core_path);
struct stat st;
if (stat(script_name, &st) == 0 && S_ISREG(st.st_mode)) {
/* Exact name found */
} else {
/* Try .ce then .cm extension */
static char pathbuf[4096];
snprintf(pathbuf, sizeof(pathbuf), "%s.ce", script_name);
if (stat(pathbuf, &st) == 0 && S_ISREG(st.st_mode)) {
script_name = pathbuf;
filename = pathbuf;
} else {
snprintf(pathbuf, sizeof(pathbuf), "%s.cm", script_name);
if (stat(pathbuf, &st) == 0 && S_ISREG(st.st_mode)) {
script_name = pathbuf;
filename = pathbuf;
} else {
printf("Failed to find file: %s\n", argv[2]);
return 1;
}
}
}
FILE *f = fopen(script_name, "r");
if (!f) {
printf("Failed to open file: %s\n", script_name);
return 1;
}
allocated_script = malloc(st.st_size + 1);
if (!allocated_script) {
fclose(f);
printf("Failed to allocate memory for script\n");
return 1;
}
size_t read_size = fread(allocated_script, 1, st.st_size, f);
fclose(f);
allocated_script[read_size] = '\0';
script = allocated_script;
cJSON *ast = JS_ASTTree(script, read_size, filename);
free(allocated_script);
if (!ast) {
printf("Failed to parse %s\n", filename);
return 1;
}
cJSON *boot_ast = JS_ASTTree(boot_data, boot_size, "internal/bootstrap.cm");
free(boot_data);
if (!boot_ast) {
printf("Failed to parse internal/bootstrap.cm\n");
return 1;
}
if (print_tree_errors(boot_ast)) {
cJSON_Delete(boot_ast);
if (print_tree_errors(ast)) {
cJSON_Delete(ast);
return 1;
}
JSRuntime *rt = JS_NewRuntime();
if (!rt) {
printf("Failed to create JS runtime\n");
cJSON_Delete(boot_ast);
cJSON_Delete(ast);
return 1;
}
JSContext *ctx = JS_NewContextWithHeapSize(rt, 16 * 1024 * 1024);
if (!ctx) {
printf("Failed to create JS context\n");
cJSON_Delete(boot_ast); JS_FreeRuntime(rt);
cJSON_Delete(ast); JS_FreeRuntime(rt);
return 1;
}
JS_FreeValue(ctx, js_blob_use(ctx));
JSValue hidden_env = JS_NewObject(ctx);
JS_SetPropertyStr(ctx, hidden_env, "os", js_os_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "core_path", JS_NewString(ctx, core_path));
JSValue args_arr = JS_NewArray(ctx);
for (int i = 2; i < argc; i++) {
JSValue str = JS_NewString(ctx, argv[i]);
JS_ArrayPush(ctx, &args_arr, str);
}
JS_SetPropertyStr(ctx, hidden_env, "args", args_arr);
hidden_env = JS_Stone(ctx, hidden_env);
JSValue result = JS_RunMachTree(ctx, boot_ast, hidden_env);
cJSON_Delete(boot_ast);
JSValue result = JS_RunMachTree(ctx, ast, JS_NULL);
cJSON_Delete(ast);
int exit_code = 0;
if (JS_IsException(result)) {
/* Error already printed to stderr by JS_Throw* */
JS_GetException(ctx);
exit_code = 1;
} else if (!JS_IsNull(result)) {

View File

@@ -6283,8 +6283,8 @@ static JSValue js_cell_number (JSContext *ctx, JSValue this_val, int argc, JSVal
return val;
}
/* Handle string */
if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
/* Handle string (immediate ASCII or heap JSText) */
if (JS_IsText (val)) {
const char *str = JS_ToCString (ctx, val);
if (!str) return JS_EXCEPTION;
@@ -6925,7 +6925,7 @@ JSValue js_cell_character (JSContext *ctx, JSValue this_val, int argc, JSValue *
int tag = JS_VALUE_GET_TAG (arg);
/* Handle string - return first character */
if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
if (JS_IsText (arg)) {
if (js_string_value_len (arg) == 0) return JS_NewString (ctx, "");
return js_sub_string_val (ctx, arg, 0, 1);
}
@@ -6978,7 +6978,7 @@ static JSValue js_cell_text (JSContext *ctx, JSValue this_val, int argc, JSValue
int tag = JS_VALUE_GET_TAG (arg);
/* Handle string / rope */
if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
if (JS_IsText (arg)) {
JSValue str = JS_ToString (ctx, arg); /* owned + flattens rope */
if (JS_IsException (str)) return JS_EXCEPTION;

View File

@@ -64,6 +64,7 @@ var tokenize = function(src, filename) {
def CP_o = 111
def CP_r = 114
def CP_t = 116
def CP_u = 117
def CP_x = 120
def CP_z = 122
def CP_LBRACE = 123
@@ -113,6 +114,23 @@ var tokenize = function(src, filename) {
return (c >= CP_0 && c <= CP_9) || (c >= CP_a && c <= CP_f) || (c >= CP_A && c <= CP_F)
}
var hex_val = function(c) {
if (c >= CP_0 && c <= CP_9) return c - CP_0
if (c >= CP_a && c <= CP_f) return c - CP_a + 10
if (c >= CP_A && c <= CP_F) return c - CP_A + 10
return 0
}
var read_unicode_escape = function() {
var cp_val = 0
var hi = 0
while (hi < 4 && pos < len && is_hex(pk())) {
cp_val = cp_val * 16 + hex_val(adv())
hi = hi + 1
}
return character(cp_val)
}
var is_alpha = function(c) {
return (c >= CP_a && c <= CP_z) || (c >= CP_A && c <= CP_Z)
}
@@ -158,6 +176,7 @@ var tokenize = function(src, filename) {
else if (esc == CP_DQUOTE) { value = value + "\"" }
else if (esc == CP_0) { value = value + character(0) }
else if (esc == CP_BACKTICK) { value = value + "`" }
else if (esc == CP_u) { value = value + read_unicode_escape() }
else { value = value + character(esc) }
} else {
value = value + character(adv())
@@ -177,39 +196,37 @@ var tokenize = function(src, filename) {
var start_row = row
var start_col = col
var value = ""
var esc = 0
var depth = 0
var tc = 0
var q = 0
adv() // skip opening backtick
while (pos < len && pk() != CP_BACKTICK) {
if (pk() == CP_BSLASH && pos + 1 < len) {
adv()
esc = adv()
if (esc == CP_n) { value = value + "\n" }
else if (esc == CP_t) { value = value + "\t" }
else if (esc == CP_r) { value = value + "\r" }
else if (esc == CP_BSLASH) { value = value + "\\" }
else if (esc == CP_BACKTICK) { value = value + "`" }
else if (esc == CP_DOLLAR) { value = value + "$" }
else if (esc == CP_0) { value = value + character(0) }
else { value = value + character(esc) }
value = value + character(adv())
value = value + character(adv())
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
adv() // $
adv() // {
value = value + character(adv()) // $
value = value + character(adv()) // {
depth = 1
while (pos < len && depth > 0) {
tc = pk()
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
else if (tc == CP_RBRACE) { depth = depth - 1; adv() }
if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) }
else if (tc == CP_RBRACE) {
depth = depth - 1
if (depth > 0) { value = value + character(adv()) }
else { value = value + character(adv()) }
}
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
q = adv()
value = value + character(q)
while (pos < len && pk() != q) {
if (pk() == CP_BSLASH && pos + 1 < len) adv()
adv()
if (pk() == CP_BSLASH && pos + 1 < len) {
value = value + character(adv())
}
value = value + character(adv())
}
if (pos < len) adv()
} else { adv() }
if (pos < len) { value = value + character(adv()) }
} else { value = value + character(adv()) }
}
} else {
value = value + character(adv())