fix issues with parse.cm and tokenize.cm

This commit is contained in:
2026-02-09 17:43:44 -06:00
parent 68e2395b92
commit d5209e1d59
5 changed files with 201 additions and 57 deletions

111
parse.cm
View File

@@ -5,7 +5,7 @@ var is_alpha = function(c) {
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
}
var parse = function(tokens, src, filename) {
var parse = function(tokens, src, filename, tokenizer) {
var _src_len = length(src)
var cp = []
var _i = 0
@@ -167,6 +167,23 @@ var parse = function(tokens, src, filename) {
var rpos = 0
var pattern_str = ""
var flags = ""
var tv = null
var has_interp = false
var ti = 0
var tpl_list = null
var fmt = null
var idx = 0
var tvi = 0
var tvlen = 0
var depth = 0
var expr_str = null
var tc = null
var tq = null
var esc_ch = null
var expr_tokens = null
var sub_ast = null
var sub_stmt = null
var sub_expr = null
if (k == "number") {
node = ast_node("number", start)
@@ -177,8 +194,96 @@ var parse = function(tokens, src, filename) {
return node
}
if (k == "text") {
node = ast_node("text", start)
node.value = tok.value
// Check for template interpolation: ${...}
tv = tok.value
has_interp = false
ti = 0
while (ti < length(tv) - 1) {
if (tv[ti] == "$" && tv[ti + 1] == "{") {
if (ti == 0 || tv[ti - 1] != "\\") {
has_interp = true
break
}
}
ti = ti + 1
}
if (!has_interp || tokenizer == null) {
node = ast_node("text", start)
node.value = tok.value
advance()
ast_node_end(node)
return node
}
// Template literal with interpolation
node = ast_node("text literal", start)
tpl_list = []
node.list = tpl_list
fmt = ""
idx = 0
tvi = 0
tvlen = length(tv)
while (tvi < tvlen) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
esc_ch = tv[tvi + 1]
if (esc_ch == "n") { fmt = fmt + "\n" }
else if (esc_ch == "t") { fmt = fmt + "\t" }
else if (esc_ch == "r") { fmt = fmt + "\r" }
else if (esc_ch == "\\") { fmt = fmt + "\\" }
else if (esc_ch == "`") { fmt = fmt + "`" }
else if (esc_ch == "$") { fmt = fmt + "$" }
else if (esc_ch == "0") { fmt = fmt + character(0) }
else { fmt = fmt + esc_ch }
tvi = tvi + 2
} else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") {
tvi = tvi + 2
depth = 1
expr_str = ""
while (tvi < tvlen && depth > 0) {
tc = tv[tvi]
if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 }
else if (tc == "}") {
depth = depth - 1
if (depth > 0) { expr_str = expr_str + tc }
tvi = tvi + 1
}
else if (tc == "'" || tc == "\"" || tc == "`") {
tq = tc
expr_str = expr_str + tc
tvi = tvi + 1
while (tvi < tvlen && tv[tvi] != tq) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
expr_str = expr_str + tv[tvi]
tvi = tvi + 1
}
expr_str = expr_str + tv[tvi]
tvi = tvi + 1
}
if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 }
} else {
expr_str = expr_str + tc
tvi = tvi + 1
}
}
expr_tokens = tokenizer(expr_str, "<template>").tokens
sub_ast = parse(expr_tokens, expr_str, "<template>", tokenizer)
if (sub_ast != null && sub_ast.statements != null && length(sub_ast.statements) > 0) {
sub_stmt = sub_ast.statements[0]
sub_expr = null
if (sub_stmt.kind == "call") {
sub_expr = sub_stmt.expression
} else {
sub_expr = sub_stmt
}
push(tpl_list, sub_expr)
}
fmt = fmt + "{" + text(idx) + "}"
idx = idx + 1
} else {
fmt = fmt + tv[tvi]
tvi = tvi + 1
}
}
node.value = fmt
advance()
ast_node_end(node)
return node