fix issues with parse.cm and tokenize.cm
This commit is contained in:
@@ -38,7 +38,7 @@ if (use_mcode) {
|
||||
// analyze: tokenize + parse, check for errors
|
||||
function analyze(src, filename) {
|
||||
var tok_result = tokenize_mod(src, filename)
|
||||
var ast = parse_mod(tok_result.tokens, src, filename)
|
||||
var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
|
||||
var _i = 0
|
||||
var prev_line = -1
|
||||
var prev_msg = null
|
||||
|
||||
111
parse.cm
111
parse.cm
@@ -5,7 +5,7 @@ var is_alpha = function(c) {
|
||||
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
|
||||
}
|
||||
|
||||
var parse = function(tokens, src, filename) {
|
||||
var parse = function(tokens, src, filename, tokenizer) {
|
||||
var _src_len = length(src)
|
||||
var cp = []
|
||||
var _i = 0
|
||||
@@ -167,6 +167,23 @@ var parse = function(tokens, src, filename) {
|
||||
var rpos = 0
|
||||
var pattern_str = ""
|
||||
var flags = ""
|
||||
var tv = null
|
||||
var has_interp = false
|
||||
var ti = 0
|
||||
var tpl_list = null
|
||||
var fmt = null
|
||||
var idx = 0
|
||||
var tvi = 0
|
||||
var tvlen = 0
|
||||
var depth = 0
|
||||
var expr_str = null
|
||||
var tc = null
|
||||
var tq = null
|
||||
var esc_ch = null
|
||||
var expr_tokens = null
|
||||
var sub_ast = null
|
||||
var sub_stmt = null
|
||||
var sub_expr = null
|
||||
|
||||
if (k == "number") {
|
||||
node = ast_node("number", start)
|
||||
@@ -177,8 +194,96 @@ var parse = function(tokens, src, filename) {
|
||||
return node
|
||||
}
|
||||
if (k == "text") {
|
||||
node = ast_node("text", start)
|
||||
node.value = tok.value
|
||||
// Check for template interpolation: ${...}
|
||||
tv = tok.value
|
||||
has_interp = false
|
||||
ti = 0
|
||||
while (ti < length(tv) - 1) {
|
||||
if (tv[ti] == "$" && tv[ti + 1] == "{") {
|
||||
if (ti == 0 || tv[ti - 1] != "\\") {
|
||||
has_interp = true
|
||||
break
|
||||
}
|
||||
}
|
||||
ti = ti + 1
|
||||
}
|
||||
if (!has_interp || tokenizer == null) {
|
||||
node = ast_node("text", start)
|
||||
node.value = tok.value
|
||||
advance()
|
||||
ast_node_end(node)
|
||||
return node
|
||||
}
|
||||
// Template literal with interpolation
|
||||
node = ast_node("text literal", start)
|
||||
tpl_list = []
|
||||
node.list = tpl_list
|
||||
fmt = ""
|
||||
idx = 0
|
||||
tvi = 0
|
||||
tvlen = length(tv)
|
||||
while (tvi < tvlen) {
|
||||
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
|
||||
esc_ch = tv[tvi + 1]
|
||||
if (esc_ch == "n") { fmt = fmt + "\n" }
|
||||
else if (esc_ch == "t") { fmt = fmt + "\t" }
|
||||
else if (esc_ch == "r") { fmt = fmt + "\r" }
|
||||
else if (esc_ch == "\\") { fmt = fmt + "\\" }
|
||||
else if (esc_ch == "`") { fmt = fmt + "`" }
|
||||
else if (esc_ch == "$") { fmt = fmt + "$" }
|
||||
else if (esc_ch == "0") { fmt = fmt + character(0) }
|
||||
else { fmt = fmt + esc_ch }
|
||||
tvi = tvi + 2
|
||||
} else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") {
|
||||
tvi = tvi + 2
|
||||
depth = 1
|
||||
expr_str = ""
|
||||
while (tvi < tvlen && depth > 0) {
|
||||
tc = tv[tvi]
|
||||
if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 }
|
||||
else if (tc == "}") {
|
||||
depth = depth - 1
|
||||
if (depth > 0) { expr_str = expr_str + tc }
|
||||
tvi = tvi + 1
|
||||
}
|
||||
else if (tc == "'" || tc == "\"" || tc == "`") {
|
||||
tq = tc
|
||||
expr_str = expr_str + tc
|
||||
tvi = tvi + 1
|
||||
while (tvi < tvlen && tv[tvi] != tq) {
|
||||
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
|
||||
expr_str = expr_str + tv[tvi]
|
||||
tvi = tvi + 1
|
||||
}
|
||||
expr_str = expr_str + tv[tvi]
|
||||
tvi = tvi + 1
|
||||
}
|
||||
if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 }
|
||||
} else {
|
||||
expr_str = expr_str + tc
|
||||
tvi = tvi + 1
|
||||
}
|
||||
}
|
||||
expr_tokens = tokenizer(expr_str, "<template>").tokens
|
||||
sub_ast = parse(expr_tokens, expr_str, "<template>", tokenizer)
|
||||
if (sub_ast != null && sub_ast.statements != null && length(sub_ast.statements) > 0) {
|
||||
sub_stmt = sub_ast.statements[0]
|
||||
sub_expr = null
|
||||
if (sub_stmt.kind == "call") {
|
||||
sub_expr = sub_stmt.expression
|
||||
} else {
|
||||
sub_expr = sub_stmt
|
||||
}
|
||||
push(tpl_list, sub_expr)
|
||||
}
|
||||
fmt = fmt + "{" + text(idx) + "}"
|
||||
idx = idx + 1
|
||||
} else {
|
||||
fmt = fmt + tv[tvi]
|
||||
tvi = tvi + 1
|
||||
}
|
||||
}
|
||||
node.value = fmt
|
||||
advance()
|
||||
ast_node_end(node)
|
||||
return node
|
||||
|
||||
@@ -552,59 +552,81 @@ int cell_init(int argc, char **argv)
|
||||
|
||||
/* Check for --mach-run flag to compile and run through MACH VM */
|
||||
if (argc >= 3 && strcmp(argv[1], "--mach-run") == 0) {
|
||||
if (!find_cell_shop()) return 1;
|
||||
const char *script_name = argv[2];
|
||||
char *script = NULL;
|
||||
char *allocated_script = NULL;
|
||||
const char *filename = script_name;
|
||||
|
||||
size_t boot_size;
|
||||
char *boot_data = load_core_file("internal/bootstrap.cm", &boot_size);
|
||||
if (!boot_data) {
|
||||
printf("ERROR: Could not load internal/bootstrap.cm from %s\n", core_path);
|
||||
struct stat st;
|
||||
if (stat(script_name, &st) == 0 && S_ISREG(st.st_mode)) {
|
||||
/* Exact name found */
|
||||
} else {
|
||||
/* Try .ce then .cm extension */
|
||||
static char pathbuf[4096];
|
||||
snprintf(pathbuf, sizeof(pathbuf), "%s.ce", script_name);
|
||||
if (stat(pathbuf, &st) == 0 && S_ISREG(st.st_mode)) {
|
||||
script_name = pathbuf;
|
||||
filename = pathbuf;
|
||||
} else {
|
||||
snprintf(pathbuf, sizeof(pathbuf), "%s.cm", script_name);
|
||||
if (stat(pathbuf, &st) == 0 && S_ISREG(st.st_mode)) {
|
||||
script_name = pathbuf;
|
||||
filename = pathbuf;
|
||||
} else {
|
||||
printf("Failed to find file: %s\n", argv[2]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FILE *f = fopen(script_name, "r");
|
||||
if (!f) {
|
||||
printf("Failed to open file: %s\n", script_name);
|
||||
return 1;
|
||||
}
|
||||
allocated_script = malloc(st.st_size + 1);
|
||||
if (!allocated_script) {
|
||||
fclose(f);
|
||||
printf("Failed to allocate memory for script\n");
|
||||
return 1;
|
||||
}
|
||||
size_t read_size = fread(allocated_script, 1, st.st_size, f);
|
||||
fclose(f);
|
||||
allocated_script[read_size] = '\0';
|
||||
script = allocated_script;
|
||||
|
||||
cJSON *ast = JS_ASTTree(script, read_size, filename);
|
||||
free(allocated_script);
|
||||
if (!ast) {
|
||||
printf("Failed to parse %s\n", filename);
|
||||
return 1;
|
||||
}
|
||||
|
||||
cJSON *boot_ast = JS_ASTTree(boot_data, boot_size, "internal/bootstrap.cm");
|
||||
free(boot_data);
|
||||
if (!boot_ast) {
|
||||
printf("Failed to parse internal/bootstrap.cm\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (print_tree_errors(boot_ast)) {
|
||||
cJSON_Delete(boot_ast);
|
||||
if (print_tree_errors(ast)) {
|
||||
cJSON_Delete(ast);
|
||||
return 1;
|
||||
}
|
||||
|
||||
JSRuntime *rt = JS_NewRuntime();
|
||||
if (!rt) {
|
||||
printf("Failed to create JS runtime\n");
|
||||
cJSON_Delete(boot_ast);
|
||||
cJSON_Delete(ast);
|
||||
return 1;
|
||||
}
|
||||
JSContext *ctx = JS_NewContextWithHeapSize(rt, 16 * 1024 * 1024);
|
||||
if (!ctx) {
|
||||
printf("Failed to create JS context\n");
|
||||
cJSON_Delete(boot_ast); JS_FreeRuntime(rt);
|
||||
cJSON_Delete(ast); JS_FreeRuntime(rt);
|
||||
return 1;
|
||||
}
|
||||
|
||||
JS_FreeValue(ctx, js_blob_use(ctx));
|
||||
|
||||
JSValue hidden_env = JS_NewObject(ctx);
|
||||
JS_SetPropertyStr(ctx, hidden_env, "os", js_os_use(ctx));
|
||||
JS_SetPropertyStr(ctx, hidden_env, "core_path", JS_NewString(ctx, core_path));
|
||||
JSValue args_arr = JS_NewArray(ctx);
|
||||
for (int i = 2; i < argc; i++) {
|
||||
JSValue str = JS_NewString(ctx, argv[i]);
|
||||
JS_ArrayPush(ctx, &args_arr, str);
|
||||
}
|
||||
JS_SetPropertyStr(ctx, hidden_env, "args", args_arr);
|
||||
hidden_env = JS_Stone(ctx, hidden_env);
|
||||
|
||||
JSValue result = JS_RunMachTree(ctx, boot_ast, hidden_env);
|
||||
cJSON_Delete(boot_ast);
|
||||
JSValue result = JS_RunMachTree(ctx, ast, JS_NULL);
|
||||
cJSON_Delete(ast);
|
||||
|
||||
int exit_code = 0;
|
||||
if (JS_IsException(result)) {
|
||||
/* Error already printed to stderr by JS_Throw* */
|
||||
JS_GetException(ctx);
|
||||
exit_code = 1;
|
||||
} else if (!JS_IsNull(result)) {
|
||||
|
||||
@@ -6283,8 +6283,8 @@ static JSValue js_cell_number (JSContext *ctx, JSValue this_val, int argc, JSVal
|
||||
return val;
|
||||
}
|
||||
|
||||
/* Handle string */
|
||||
if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
|
||||
/* Handle string (immediate ASCII or heap JSText) */
|
||||
if (JS_IsText (val)) {
|
||||
const char *str = JS_ToCString (ctx, val);
|
||||
if (!str) return JS_EXCEPTION;
|
||||
|
||||
@@ -6925,7 +6925,7 @@ JSValue js_cell_character (JSContext *ctx, JSValue this_val, int argc, JSValue *
|
||||
int tag = JS_VALUE_GET_TAG (arg);
|
||||
|
||||
/* Handle string - return first character */
|
||||
if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
|
||||
if (JS_IsText (arg)) {
|
||||
if (js_string_value_len (arg) == 0) return JS_NewString (ctx, "");
|
||||
return js_sub_string_val (ctx, arg, 0, 1);
|
||||
}
|
||||
@@ -6978,7 +6978,7 @@ static JSValue js_cell_text (JSContext *ctx, JSValue this_val, int argc, JSValue
|
||||
int tag = JS_VALUE_GET_TAG (arg);
|
||||
|
||||
/* Handle string / rope */
|
||||
if (tag == JS_TAG_STRING || tag == JS_TAG_STRING_IMM) {
|
||||
if (JS_IsText (arg)) {
|
||||
JSValue str = JS_ToString (ctx, arg); /* owned + flattens rope */
|
||||
if (JS_IsException (str)) return JS_EXCEPTION;
|
||||
|
||||
|
||||
55
tokenize.cm
55
tokenize.cm
@@ -64,6 +64,7 @@ var tokenize = function(src, filename) {
|
||||
def CP_o = 111
|
||||
def CP_r = 114
|
||||
def CP_t = 116
|
||||
def CP_u = 117
|
||||
def CP_x = 120
|
||||
def CP_z = 122
|
||||
def CP_LBRACE = 123
|
||||
@@ -113,6 +114,23 @@ var tokenize = function(src, filename) {
|
||||
return (c >= CP_0 && c <= CP_9) || (c >= CP_a && c <= CP_f) || (c >= CP_A && c <= CP_F)
|
||||
}
|
||||
|
||||
var hex_val = function(c) {
|
||||
if (c >= CP_0 && c <= CP_9) return c - CP_0
|
||||
if (c >= CP_a && c <= CP_f) return c - CP_a + 10
|
||||
if (c >= CP_A && c <= CP_F) return c - CP_A + 10
|
||||
return 0
|
||||
}
|
||||
|
||||
var read_unicode_escape = function() {
|
||||
var cp_val = 0
|
||||
var hi = 0
|
||||
while (hi < 4 && pos < len && is_hex(pk())) {
|
||||
cp_val = cp_val * 16 + hex_val(adv())
|
||||
hi = hi + 1
|
||||
}
|
||||
return character(cp_val)
|
||||
}
|
||||
|
||||
var is_alpha = function(c) {
|
||||
return (c >= CP_a && c <= CP_z) || (c >= CP_A && c <= CP_Z)
|
||||
}
|
||||
@@ -158,6 +176,7 @@ var tokenize = function(src, filename) {
|
||||
else if (esc == CP_DQUOTE) { value = value + "\"" }
|
||||
else if (esc == CP_0) { value = value + character(0) }
|
||||
else if (esc == CP_BACKTICK) { value = value + "`" }
|
||||
else if (esc == CP_u) { value = value + read_unicode_escape() }
|
||||
else { value = value + character(esc) }
|
||||
} else {
|
||||
value = value + character(adv())
|
||||
@@ -177,39 +196,37 @@ var tokenize = function(src, filename) {
|
||||
var start_row = row
|
||||
var start_col = col
|
||||
var value = ""
|
||||
var esc = 0
|
||||
var depth = 0
|
||||
var tc = 0
|
||||
var q = 0
|
||||
adv() // skip opening backtick
|
||||
while (pos < len && pk() != CP_BACKTICK) {
|
||||
if (pk() == CP_BSLASH && pos + 1 < len) {
|
||||
adv()
|
||||
esc = adv()
|
||||
if (esc == CP_n) { value = value + "\n" }
|
||||
else if (esc == CP_t) { value = value + "\t" }
|
||||
else if (esc == CP_r) { value = value + "\r" }
|
||||
else if (esc == CP_BSLASH) { value = value + "\\" }
|
||||
else if (esc == CP_BACKTICK) { value = value + "`" }
|
||||
else if (esc == CP_DOLLAR) { value = value + "$" }
|
||||
else if (esc == CP_0) { value = value + character(0) }
|
||||
else { value = value + character(esc) }
|
||||
value = value + character(adv())
|
||||
value = value + character(adv())
|
||||
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
|
||||
adv() // $
|
||||
adv() // {
|
||||
value = value + character(adv()) // $
|
||||
value = value + character(adv()) // {
|
||||
depth = 1
|
||||
while (pos < len && depth > 0) {
|
||||
tc = pk()
|
||||
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
|
||||
else if (tc == CP_RBRACE) { depth = depth - 1; adv() }
|
||||
if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) }
|
||||
else if (tc == CP_RBRACE) {
|
||||
depth = depth - 1
|
||||
if (depth > 0) { value = value + character(adv()) }
|
||||
else { value = value + character(adv()) }
|
||||
}
|
||||
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
|
||||
q = adv()
|
||||
value = value + character(q)
|
||||
while (pos < len && pk() != q) {
|
||||
if (pk() == CP_BSLASH && pos + 1 < len) adv()
|
||||
adv()
|
||||
if (pk() == CP_BSLASH && pos + 1 < len) {
|
||||
value = value + character(adv())
|
||||
}
|
||||
value = value + character(adv())
|
||||
}
|
||||
if (pos < len) adv()
|
||||
} else { adv() }
|
||||
if (pos < len) { value = value + character(adv()) }
|
||||
} else { value = value + character(adv()) }
|
||||
}
|
||||
} else {
|
||||
value = value + character(adv())
|
||||
|
||||
Reference in New Issue
Block a user