From d5209e1d591d6241740826b5dd199200957ab586 Mon Sep 17 00:00:00 2001
From: John Alanbrook <john@pockle.world>
Date: Mon, 9 Feb 2026 17:43:44 -0600
Subject: [PATCH] fix issues with parse.cm and tokenize.cm

---
 internal/bootstrap.cm |   2 +-
 parse.cm              | 111 ++++++++++++++++++++++++++++++++++++++++--
 source/cell.c         |  82 +++++++++++++++++++------------
 source/runtime.c      |   8 +--
 tokenize.cm           |  55 +++++++++++++--------
 5 files changed, 201 insertions(+), 57 deletions(-)

diff --git a/internal/bootstrap.cm b/internal/bootstrap.cm
index d5a53ddf..6118967a 100644
--- a/internal/bootstrap.cm
+++ b/internal/bootstrap.cm
@@ -38,7 +38,7 @@ if (use_mcode) {
 // analyze: tokenize + parse, check for errors
 function analyze(src, filename) {
   var tok_result = tokenize_mod(src, filename)
-  var ast = parse_mod(tok_result.tokens, src, filename)
+  var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
   var _i = 0
   var prev_line = -1
   var prev_msg = null
diff --git a/parse.cm b/parse.cm
index df98bdc0..36a6f107 100644
--- a/parse.cm
+++ b/parse.cm
@@ -5,7 +5,7 @@ var is_alpha = function(c) {
   return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
 }
 
-var parse = function(tokens, src, filename) {
+var parse = function(tokens, src, filename, tokenizer) {
   var _src_len = length(src)
   var cp = []
   var _i = 0
@@ -167,6 +167,23 @@ var parse = function(tokens, src, filename) {
     var rpos = 0
     var pattern_str = ""
     var flags = ""
+    var tv = null
+    var has_interp = false
+    var ti = 0
+    var tpl_list = null
+    var fmt = null
+    var idx = 0
+    var tvi = 0
+    var tvlen = 0
+    var depth = 0
+    var expr_str = null
+    var tc = null
+    var tq = null
+    var esc_ch = null
+    var expr_tokens = null
+    var sub_ast = null
+    var sub_stmt = null
+    var sub_expr = null
 
     if (k == "number") {
       node = ast_node("number", start)
@@ -177,8 +194,96 @@ var parse = function(tokens, src, filename) {
       return node
     }
     if (k == "text") {
-      node = ast_node("text", start)
-      node.value = tok.value
+      // Check for template interpolation: ${...}
+      tv = tok.value
+      has_interp = false
+      ti = 0
+      while (ti < length(tv) - 1) {
+        if (tv[ti] == "$" && tv[ti + 1] == "{") {
+          if (ti == 0 || tv[ti - 1] != "\\") {
+            has_interp = true
+            break
+          }
+        }
+        ti = ti + 1
+      }
+      if (!has_interp || tokenizer == null) {
+        node = ast_node("text", start)
+        node.value = tok.value
+        advance()
+        ast_node_end(node)
+        return node
+      }
+      // Template literal with interpolation
+      node = ast_node("text literal", start)
+      tpl_list = []
+      node.list = tpl_list
+      fmt = ""
+      idx = 0
+      tvi = 0
+      tvlen = length(tv)
+      while (tvi < tvlen) {
+        if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
+          esc_ch = tv[tvi + 1]
+          if (esc_ch == "n") { fmt = fmt + "\n" }
+          else if (esc_ch == "t") { fmt = fmt + "\t" }
+          else if (esc_ch == "r") { fmt = fmt + "\r" }
+          else if (esc_ch == "\\") { fmt = fmt + "\\" }
+          else if (esc_ch == "`") { fmt = fmt + "`" }
+          else if (esc_ch == "$") { fmt = fmt + "$" }
+          else if (esc_ch == "0") { fmt = fmt + character(0) }
+          else { fmt = fmt + esc_ch }
+          tvi = tvi + 2
+        } else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") {
+          tvi = tvi + 2
+          depth = 1
+          expr_str = ""
+          while (tvi < tvlen && depth > 0) {
+            tc = tv[tvi]
+            if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 }
+            else if (tc == "}") {
+              depth = depth - 1
+              if (depth > 0) { expr_str = expr_str + tc }
+              tvi = tvi + 1
+            }
+            else if (tc == "'" || tc == "\"" || tc == "`") {
+              tq = tc
+              expr_str = expr_str + tc
+              tvi = tvi + 1
+              while (tvi < tvlen && tv[tvi] != tq) {
+                if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
+                  expr_str = expr_str + tv[tvi]
+                  tvi = tvi + 1
+                }
+                expr_str = expr_str + tv[tvi]
+                tvi = tvi + 1
+              }
+              if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 }
+            } else {
+              expr_str = expr_str + tc
+              tvi = tvi + 1
+            }
+          }
+          expr_tokens = tokenizer(expr_str, "<template>").tokens
+          sub_ast = parse(expr_tokens, expr_str, "<template>", tokenizer)
+          if (sub_ast != null && sub_ast.statements != null && length(sub_ast.statements) > 0) {
+            sub_stmt = sub_ast.statements[0]
+            sub_expr = null
+            if (sub_stmt.kind == "call") {
+              sub_expr = sub_stmt.expression
+            } else {
+              sub_expr = sub_stmt
+            }
+            push(tpl_list, sub_expr)
+          }
+          fmt = fmt + "{" + text(idx) + "}"
+          idx = idx + 1
+        } else {
+          fmt = fmt + tv[tvi]
+          tvi = tvi + 1
+        }
+      }
+      node.value = fmt
       advance()
       ast_node_end(node)
       return node
diff --git a/source/cell.c b/source/cell.c
index 49e57222..71fabb9d 100644
--- a/source/cell.c
+++ b/source/cell.c
@@ -552,59 +552,81 @@ int cell_init(int argc, char **argv)
 
   /* Check for --mach-run flag to compile and run through MACH VM */
   if (argc >= 3 && strcmp(argv[1], "--mach-run") == 0) {
-    if (!find_cell_shop()) return 1;
+    const char *script_name = argv[2];
+    char *script = NULL;
+    char *allocated_script = NULL;
+    const char *filename = script_name;
 
-    size_t boot_size;
-    char *boot_data = load_core_file("internal/bootstrap.cm", &boot_size);
-    if (!boot_data) {
-      printf("ERROR: Could not load internal/bootstrap.cm from %s\n", core_path);
+    struct stat st;
+    if (stat(script_name, &st) == 0 && S_ISREG(st.st_mode)) {
+      /* Exact name found */
+    } else {
+      /* Try .ce then .cm extension */
+      static char pathbuf[4096];
+      snprintf(pathbuf, sizeof(pathbuf), "%s.ce", script_name);
+      if (stat(pathbuf, &st) == 0 && S_ISREG(st.st_mode)) {
+        script_name = pathbuf;
+        filename = pathbuf;
+      } else {
+        snprintf(pathbuf, sizeof(pathbuf), "%s.cm", script_name);
+        if (stat(pathbuf, &st) == 0 && S_ISREG(st.st_mode)) {
+          script_name = pathbuf;
+          filename = pathbuf;
+        } else {
+          printf("Failed to find file: %s\n", argv[2]);
+          return 1;
+        }
+      }
+    }
+
+    FILE *f = fopen(script_name, "r");
+    if (!f) {
+      printf("Failed to open file: %s\n", script_name);
+      return 1;
+    }
+    allocated_script = malloc(st.st_size + 1);
+    if (!allocated_script) {
+      fclose(f);
+      printf("Failed to allocate memory for script\n");
+      return 1;
+    }
+    size_t read_size = fread(allocated_script, 1, st.st_size, f);
+    fclose(f);
+    allocated_script[read_size] = '\0';
+    script = allocated_script;
+
+    cJSON *ast = JS_ASTTree(script, read_size, filename);
+    free(allocated_script);
+    if (!ast) {
+      printf("Failed to parse %s\n", filename);
       return 1;
     }
 
-    cJSON *boot_ast = JS_ASTTree(boot_data, boot_size, "internal/bootstrap.cm");
-    free(boot_data);
-    if (!boot_ast) {
-      printf("Failed to parse internal/bootstrap.cm\n");
-      return 1;
-    }
-
-    if (print_tree_errors(boot_ast)) {
-      cJSON_Delete(boot_ast);
+    if (print_tree_errors(ast)) {
+      cJSON_Delete(ast);
       return 1;
     }
 
     JSRuntime *rt = JS_NewRuntime();
     if (!rt) {
       printf("Failed to create JS runtime\n");
-      cJSON_Delete(boot_ast);
+      cJSON_Delete(ast);
       return 1;
     }
     JSContext *ctx = JS_NewContextWithHeapSize(rt, 16 * 1024 * 1024);
     if (!ctx) {
       printf("Failed to create JS context\n");
-      cJSON_Delete(boot_ast); JS_FreeRuntime(rt);
+      cJSON_Delete(ast); JS_FreeRuntime(rt);
       return 1;
     }
 
     JS_FreeValue(ctx, js_blob_use(ctx));
 
-    JSValue hidden_env = JS_NewObject(ctx);
-    JS_SetPropertyStr(ctx, hidden_env, "os", js_os_use(ctx));
-    JS_SetPropertyStr(ctx, hidden_env, "core_path", JS_NewString(ctx, core_path));
-    JSValue args_arr = JS_NewArray(ctx);
-    for (int i = 2; i < argc; i++) {
-      JSValue str = JS_NewString(ctx, argv[i]);
-      JS_ArrayPush(ctx, &args_arr, str);
-    }
-    JS_SetPropertyStr(ctx, hidden_env, "args", args_arr);
-    hidden_env = JS_Stone(ctx, hidden_env);
-
-    JSValue result = JS_RunMachTree(ctx, boot_ast, hidden_env);
-    cJSON_Delete(boot_ast);
+    JSValue result = JS_RunMachTree(ctx, ast, JS_NULL);
+    cJSON_Delete(ast);
 
     int exit_code = 0;
     if (JS_IsException(result)) {
-      /* Error already printed to stderr by JS_Throw* */
       JS_GetException(ctx);
       exit_code = 1;
     } else if (!JS_IsNull(result)) {
diff --git a/source/runtime.c b/source/runtime.c
index 636de699..4f1ff79c 100644
--- a/source/runtime.c
+++ b/source/runtime.c
@@ -6283,8 +6283,8 @@ static JSValue js_cell_number (JSContext *ctx, JSValue this_val, int argc, JSVal
     return val;
   }
 
-  /* Handle string */
-  if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
+  /* Handle string (immediate ASCII or heap JSText) */
+  if (JS_IsText (val)) {
     const char *str = JS_ToCString (ctx, val);
     if (!str) return JS_EXCEPTION;
 
@@ -6925,7 +6925,7 @@ JSValue js_cell_character (JSContext *ctx, JSValue this_val, int argc, JSValue *
   int tag = JS_VALUE_GET_TAG (arg);
 
   /* Handle string - return first character */
-  if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
+  if (JS_IsText (arg)) {
     if (js_string_value_len (arg) == 0) return JS_NewString (ctx, "");
     return js_sub_string_val (ctx, arg, 0, 1);
   }
@@ -6978,7 +6978,7 @@ static JSValue js_cell_text (JSContext *ctx, JSValue this_val, int argc, JSValue
   int tag = JS_VALUE_GET_TAG (arg);
 
   /* Handle string / rope */
-  if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
+  if (JS_IsText (arg)) {
     JSValue str = JS_ToString (ctx, arg); /* owned + flattens rope */
     if (JS_IsException (str)) return JS_EXCEPTION;
 
diff --git a/tokenize.cm b/tokenize.cm
index 2e714e60..c49d9019 100644
--- a/tokenize.cm
+++ b/tokenize.cm
@@ -64,6 +64,7 @@ var tokenize = function(src, filename) {
   def CP_o = 111
   def CP_r = 114
   def CP_t = 116
+  def CP_u = 117
   def CP_x = 120
   def CP_z = 122
   def CP_LBRACE = 123
@@ -113,6 +114,23 @@ var tokenize = function(src, filename) {
     return (c >= CP_0 && c <= CP_9) || (c >= CP_a && c <= CP_f) || (c >= CP_A && c <= CP_F)
   }
 
+  var hex_val = function(c) {
+    if (c >= CP_0 && c <= CP_9) return c - CP_0
+    if (c >= CP_a && c <= CP_f) return c - CP_a + 10
+    if (c >= CP_A && c <= CP_F) return c - CP_A + 10
+    return 0
+  }
+
+  var read_unicode_escape = function() {
+    var cp_val = 0
+    var hi = 0
+    while (hi < 4 && pos < len && is_hex(pk())) {
+      cp_val = cp_val * 16 + hex_val(adv())
+      hi = hi + 1
+    }
+    return character(cp_val)
+  }
+
   var is_alpha = function(c) {
     return (c >= CP_a && c <= CP_z) || (c >= CP_A && c <= CP_Z)
   }
@@ -158,6 +176,7 @@ var tokenize = function(src, filename) {
         else if (esc == CP_DQUOTE) { value = value + "\"" }
         else if (esc == CP_0) { value = value + character(0) }
         else if (esc == CP_BACKTICK) { value = value + "`" }
+        else if (esc == CP_u) { value = value + read_unicode_escape() }
         else { value = value + character(esc) }
       } else {
         value = value + character(adv())
@@ -177,39 +196,37 @@ var tokenize = function(src, filename) {
     var start_row = row
     var start_col = col
     var value = ""
-    var esc = 0
     var depth = 0
     var tc = 0
     var q = 0
     adv() // skip opening backtick
     while (pos < len && pk() != CP_BACKTICK) {
       if (pk() == CP_BSLASH && pos + 1 < len) {
-        adv()
-        esc = adv()
-        if (esc == CP_n) { value = value + "\n" }
-        else if (esc == CP_t) { value = value + "\t" }
-        else if (esc == CP_r) { value = value + "\r" }
-        else if (esc == CP_BSLASH) { value = value + "\\" }
-        else if (esc == CP_BACKTICK) { value = value + "`" }
-        else if (esc == CP_DOLLAR) { value = value + "$" }
-        else if (esc == CP_0) { value = value + character(0) }
-        else { value = value + character(esc) }
+        value = value + character(adv())
+        value = value + character(adv())
       } else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
-        adv() // $
-        adv() // {
+        value = value + character(adv()) // $
+        value = value + character(adv()) // {
         depth = 1
         while (pos < len && depth > 0) {
           tc = pk()
-          if (tc == CP_LBRACE) { depth = depth + 1; adv() }
-          else if (tc == CP_RBRACE) { depth = depth - 1; adv() }
+          if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) }
+          else if (tc == CP_RBRACE) {
+            depth = depth - 1
+            if (depth > 0) { value = value + character(adv()) }
+            else { value = value + character(adv()) }
+          }
           else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
             q = adv()
+            value = value + character(q)
             while (pos < len && pk() != q) {
-              if (pk() == CP_BSLASH && pos + 1 < len) adv()
-              adv()
+              if (pk() == CP_BSLASH && pos + 1 < len) {
+                value = value + character(adv())
+              }
+              value = value + character(adv())
             }
-            if (pos < len) adv()
-          } else { adv() }
+            if (pos < len) { value = value + character(adv()) }
+          } else { value = value + character(adv()) }
         }
       } else {
         value = value + character(adv())