From d5209e1d591d6241740826b5dd199200957ab586 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Mon, 9 Feb 2026 17:43:44 -0600 Subject: [PATCH] fix issues with parse.cm and tokenize.cm --- internal/bootstrap.cm | 2 +- parse.cm | 111 ++++++++++++++++++++++++++++++++++++++++-- source/cell.c | 82 +++++++++++++++++++------------ source/runtime.c | 8 +-- tokenize.cm | 55 +++++++++++++-------- 5 files changed, 201 insertions(+), 57 deletions(-) diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm index d5a53ddf..6118967a 100644 --- a/internal/bootstrap.cm +++ b/internal/bootstrap.cm @@ -38,7 +38,7 @@ if (use_mcode) { // analyze: tokenize + parse, check for errors function analyze(src, filename) { var tok_result = tokenize_mod(src, filename) - var ast = parse_mod(tok_result.tokens, src, filename) + var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod) var _i = 0 var prev_line = -1 var prev_msg = null diff --git a/parse.cm b/parse.cm index df98bdc0..36a6f107 100644 --- a/parse.cm +++ b/parse.cm @@ -5,7 +5,7 @@ var is_alpha = function(c) { return (c >= 65 && c <= 90) || (c >= 97 && c <= 122) } -var parse = function(tokens, src, filename) { +var parse = function(tokens, src, filename, tokenizer) { var _src_len = length(src) var cp = [] var _i = 0 @@ -167,6 +167,23 @@ var parse = function(tokens, src, filename) { var rpos = 0 var pattern_str = "" var flags = "" + var tv = null + var has_interp = false + var ti = 0 + var tpl_list = null + var fmt = null + var idx = 0 + var tvi = 0 + var tvlen = 0 + var depth = 0 + var expr_str = null + var tc = null + var tq = null + var esc_ch = null + var expr_tokens = null + var sub_ast = null + var sub_stmt = null + var sub_expr = null if (k == "number") { node = ast_node("number", start) @@ -177,8 +194,96 @@ var parse = function(tokens, src, filename) { return node } if (k == "text") { - node = ast_node("text", start) - node.value = tok.value + // Check for template interpolation: ${...} + tv = tok.value + has_interp = false + ti = 0 + while (ti < length(tv) - 1) { + if (tv[ti] == "$" && tv[ti + 1] == "{") { + if (ti == 0 || tv[ti - 1] != "\\") { + has_interp = true + break + } + } + ti = ti + 1 + } + if (!has_interp || tokenizer == null) { + node = ast_node("text", start) + node.value = tok.value + advance() + ast_node_end(node) + return node + } + // Template literal with interpolation + node = ast_node("text literal", start) + tpl_list = [] + node.list = tpl_list + fmt = "" + idx = 0 + tvi = 0 + tvlen = length(tv) + while (tvi < tvlen) { + if (tv[tvi] == "\\" && tvi + 1 < tvlen) { + esc_ch = tv[tvi + 1] + if (esc_ch == "n") { fmt = fmt + "\n" } + else if (esc_ch == "t") { fmt = fmt + "\t" } + else if (esc_ch == "r") { fmt = fmt + "\r" } + else if (esc_ch == "\\") { fmt = fmt + "\\" } + else if (esc_ch == "`") { fmt = fmt + "`" } + else if (esc_ch == "$") { fmt = fmt + "$" } + else if (esc_ch == "0") { fmt = fmt + character(0) } + else { fmt = fmt + esc_ch } + tvi = tvi + 2 + } else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") { + tvi = tvi + 2 + depth = 1 + expr_str = "" + while (tvi < tvlen && depth > 0) { + tc = tv[tvi] + if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 } + else if (tc == "}") { + depth = depth - 1 + if (depth > 0) { expr_str = expr_str + tc } + tvi = tvi + 1 + } + else if (tc == "'" || tc == "\"" || tc == "`") { + tq = tc + expr_str = expr_str + tc + tvi = tvi + 1 + while (tvi < tvlen && tv[tvi] != tq) { + if (tv[tvi] == "\\" && tvi + 1 < tvlen) { + expr_str = expr_str + tv[tvi] + tvi = tvi + 1 + } + expr_str = expr_str + tv[tvi] + tvi = tvi + 1 + } + if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 } + } else { + expr_str = expr_str + tc + tvi = tvi + 1 + } + } + expr_tokens = tokenizer(expr_str, "