Merge branch 'syntax' into ast

This commit is contained in:
2026-02-05 17:45:15 -06:00
75 changed files with 889 additions and 21 deletions

View File

@@ -397,6 +397,63 @@ int cell_init(int argc, char **argv)
return json ? 0 : 1; return json ? 0 : 1;
} }
/* Check for --tokenize flag to output token array JSON */
if (argc >= 3 && strcmp(argv[1], "--tokenize") == 0) {
const char *script_or_file = argv[2];
char *script = NULL;
char *allocated_script = NULL;
const char *filename = "<eval>";
struct stat st;
if (stat(script_or_file, &st) == 0 && S_ISREG(st.st_mode)) {
FILE *f = fopen(script_or_file, "r");
if (!f) {
printf("Failed to open file: %s\n", script_or_file);
return 1;
}
allocated_script = malloc(st.st_size + 1);
if (!allocated_script) {
fclose(f);
printf("Failed to allocate memory for script\n");
return 1;
}
size_t read_size = fread(allocated_script, 1, st.st_size, f);
fclose(f);
allocated_script[read_size] = '\0';
script = allocated_script;
filename = script_or_file;
} else {
script = (char *)script_or_file;
}
JSRuntime *rt = JS_NewRuntime();
if (!rt) {
printf("Failed to create JS runtime\n");
free(allocated_script);
return 1;
}
JSContext *ctx = JS_NewContext(rt);
if (!ctx) {
printf("Failed to create JS context\n");
JS_FreeRuntime(rt);
free(allocated_script);
return 1;
}
char *json = JS_Tokenize(ctx, script, strlen(script), filename);
if (json) {
printf("%s\n", json);
free(json);
} else {
printf("Failed to tokenize\n");
}
JS_FreeContext(ctx);
JS_FreeRuntime(rt);
free(allocated_script);
return json ? 0 : 1;
}
/* Check for --mach flag to output machine code JSON */ /* Check for --mach flag to output machine code JSON */
if (argc >= 3 && strcmp(argv[1], "--mach") == 0) { if (argc >= 3 && strcmp(argv[1], "--mach") == 0) {
const char *script_or_file = argv[2]; const char *script_or_file = argv[2];

View File

@@ -8513,6 +8513,10 @@ enum {
TOK_ERROR, TOK_ERROR,
TOK_PRIVATE_NAME, TOK_PRIVATE_NAME,
TOK_EOF, TOK_EOF,
/* whitespace/comment tokens for tokenizer */
TOK_COMMENT,
TOK_NEWLINE,
TOK_SPACE,
/* keywords: WARNING: same order as atoms */ /* keywords: WARNING: same order as atoms */
TOK_NULL, /* must be first */ TOK_NULL, /* must be first */
TOK_FALSE, TOK_FALSE,
@@ -8574,6 +8578,113 @@ enum {
#define CP_LS 0x2028 #define CP_LS 0x2028
#define CP_PS 0x2029 #define CP_PS 0x2029
/* Map token values to kind strings for tokenizer output */
static const char *ast_token_kind_str(int token_val) {
static char single_char[2] = {0, 0};
switch (token_val) {
case TOK_NUMBER: return "number";
case TOK_STRING: return "text";
case TOK_TEMPLATE: return "text";
case TOK_IDENT: return "name";
case TOK_COMMENT: return "comment";
case TOK_NEWLINE: return "newline";
case TOK_SPACE: return "space";
case TOK_REGEXP: return "regexp";
case TOK_PRIVATE_NAME: return "private_name";
case TOK_EOF: return "eof";
case TOK_ERROR: return "error";
/* compound operators */
case TOK_MUL_ASSIGN: return "*=";
case TOK_DIV_ASSIGN: return "/=";
case TOK_MOD_ASSIGN: return "%=";
case TOK_PLUS_ASSIGN: return "+=";
case TOK_MINUS_ASSIGN: return "-=";
case TOK_SHL_ASSIGN: return "<<=";
case TOK_SAR_ASSIGN: return ">>=";
case TOK_SHR_ASSIGN: return ">>>=";
case TOK_AND_ASSIGN: return "&=";
case TOK_XOR_ASSIGN: return "^=";
case TOK_OR_ASSIGN: return "|=";
case TOK_POW_ASSIGN: return "**=";
case TOK_LAND_ASSIGN: return "&&=";
case TOK_LOR_ASSIGN: return "||=";
case TOK_DOUBLE_QUESTION_MARK_ASSIGN: return "?\?=";
case TOK_DEC: return "--";
case TOK_INC: return "++";
case TOK_SHL: return "<<";
case TOK_SAR: return ">>";
case TOK_SHR: return ">>>";
case TOK_LT: return "<";
case TOK_LTE: return "<=";
case TOK_GT: return ">";
case TOK_GTE: return ">=";
case TOK_EQ: return "==";
case TOK_STRICT_EQ: return "===";
case TOK_NEQ: return "!=";
case TOK_STRICT_NEQ: return "!==";
case TOK_LAND: return "&&";
case TOK_LOR: return "||";
case TOK_POW: return "**";
case TOK_ARROW: return "=>";
case TOK_DOUBLE_QUESTION_MARK: return "??";
case TOK_QUESTION_MARK_DOT: return "?.";
/* keywords */
case TOK_NULL: return "null";
case TOK_FALSE: return "false";
case TOK_TRUE: return "true";
case TOK_IF: return "if";
case TOK_ELSE: return "else";
case TOK_RETURN: return "return";
case TOK_GO: return "go";
case TOK_VAR: return "var";
case TOK_DEF: return "def";
case TOK_THIS: return "this";
case TOK_DELETE: return "delete";
case TOK_VOID: return "void";
case TOK_NEW: return "new";
case TOK_IN: return "in";
case TOK_DO: return "do";
case TOK_WHILE: return "while";
case TOK_FOR: return "for";
case TOK_BREAK: return "break";
case TOK_CONTINUE: return "continue";
case TOK_SWITCH: return "switch";
case TOK_CASE: return "case";
case TOK_DEFAULT: return "default";
case TOK_THROW: return "throw";
case TOK_TRY: return "try";
case TOK_CATCH: return "catch";
case TOK_FINALLY: return "finally";
case TOK_FUNCTION: return "function";
case TOK_DEBUGGER: return "debugger";
case TOK_WITH: return "with";
case TOK_CLASS: return "class";
case TOK_CONST: return "const";
case TOK_ENUM: return "enum";
case TOK_EXPORT: return "export";
case TOK_EXTENDS: return "extends";
case TOK_IMPORT: return "import";
case TOK_SUPER: return "super";
case TOK_IMPLEMENTS: return "implements";
case TOK_INTERFACE: return "interface";
case TOK_LET: return "let";
case TOK_PRIVATE: return "private";
case TOK_PROTECTED: return "protected";
case TOK_PUBLIC: return "public";
case TOK_STATIC: return "static";
case TOK_YIELD: return "yield";
case TOK_AWAIT: return "await";
case TOK_OF: return "of";
default:
/* Single character tokens */
if (token_val >= 0 && token_val < 128) {
single_char[0] = (char)token_val;
return single_char;
}
return "unknown";
}
}
typedef struct BlockEnv { typedef struct BlockEnv {
struct BlockEnv *prev; struct BlockEnv *prev;
JSValue label_name; /* JS_NULL if none */ JSValue label_name; /* JS_NULL if none */
@@ -27862,8 +27973,35 @@ typedef struct ASTParseState {
} ASTParseState; } ASTParseState;
static cJSON *ast_parse_expr (ASTParseState *s); static cJSON *ast_parse_expr (ASTParseState *s);
static cJSON *ast_parse_assign_expr (ASTParseState *s);
static cJSON *ast_parse_statement (ASTParseState *s); static cJSON *ast_parse_statement (ASTParseState *s);
static cJSON *ast_parse_function_inner (ASTParseState *s, BOOL is_expr); static cJSON *ast_parse_function_inner (ASTParseState *s, BOOL is_expr);
static cJSON *ast_parse_arrow_function (ASTParseState *s);
/* Check if we're looking at an arrow function starting with '(' */
static BOOL ast_is_arrow_function (ASTParseState *s) {
if (s->token_val != '(') return FALSE;
const uint8_t *p = s->buf_ptr;
int depth = 1;
while (p < s->buf_end && depth > 0) {
uint8_t c = *p++;
if (c == '(') depth++;
else if (c == ')') depth--;
else if (c == '"' || c == '\'' || c == '`') {
/* Skip string */
uint8_t quote = c;
while (p < s->buf_end && *p != quote) {
if (*p == '\\' && p + 1 < s->buf_end) p++;
p++;
}
if (p < s->buf_end) p++;
}
}
/* Skip whitespace */
while (p < s->buf_end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) p++;
/* Check for => */
return (p + 1 < s->buf_end && p[0] == '=' && p[1] == '>');
}
static void ast_free_token (ASTParseState *s) { static void ast_free_token (ASTParseState *s) {
switch (s->token_val) { switch (s->token_val) {
@@ -28222,6 +28360,287 @@ redo:
return 0; return 0;
} }
/* Tokenizer function that does NOT skip whitespace/comments - emits them as tokens */
static int tokenize_next (ASTParseState *s) {
const uint8_t *p;
int c;
BOOL ident_has_escape;
ast_free_token (s);
p = s->buf_ptr;
s->got_lf = FALSE;
s->token_ptr = p;
c = *p;
switch (c) {
case 0:
if (p >= s->buf_end) {
s->token_val = TOK_EOF;
} else {
goto def_token;
}
break;
case '`': {
const uint8_t *start = p;
p++;
while (p < s->buf_end && *p != '`') {
if (*p == '\\' && p + 1 < s->buf_end) p++;
if (*p == '$' && p + 1 < s->buf_end && p[1] == '{') {
/* template with expressions - not fully supported in AST yet */
}
p++;
}
if (p < s->buf_end) p++;
s->token_val = TOK_TEMPLATE;
s->token_u.str.str = JS_NewStringLen (s->ctx, (const char *)(start + 1), p - start - 2);
} break;
case '\'':
case '\"': {
const uint8_t *start = p;
int quote = c;
p++;
while (p < s->buf_end && *p != quote) {
if (*p == '\\' && p + 1 < s->buf_end) p++;
p++;
}
if (p < s->buf_end) p++;
s->token_val = TOK_STRING;
s->token_u.str.str = JS_NewStringLen (s->ctx, (const char *)(start + 1), p - start - 2);
} break;
case '\r':
if (p[1] == '\n') p++;
/* fall through */
case '\n':
p++;
s->got_lf = TRUE;
s->token_val = TOK_NEWLINE;
break;
case '\f':
case '\v':
case ' ':
case '\t': {
/* Collect consecutive whitespace (excluding newlines) */
while (p < s->buf_end && (*p == ' ' || *p == '\t' || *p == '\f' || *p == '\v')) p++;
s->token_val = TOK_SPACE;
} break;
case '/':
if (p[1] == '*') {
/* Multi-line comment */
p += 2;
while (p < s->buf_end) {
if (p[0] == '*' && p[1] == '/') {
p += 2;
break;
}
if (*p == '\n' || *p == '\r') s->got_lf = TRUE;
p++;
}
s->token_val = TOK_COMMENT;
} else if (p[1] == '/') {
/* Single-line comment */
p += 2;
while (p < s->buf_end && *p != '\n' && *p != '\r') p++;
s->token_val = TOK_COMMENT;
} else if (p[1] == '=') {
p += 2;
s->token_val = TOK_DIV_ASSIGN;
} else {
p++;
s->token_val = c;
}
break;
case '\\':
goto def_token;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case '_': case '$': {
const uint8_t *start = p;
ident_has_escape = FALSE;
p++;
while (p < s->buf_end) {
c = *p;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '_' || c == '$') {
p++;
} else if (c >= 0x80) {
p++;
while (p < s->buf_end && (*p & 0xc0) == 0x80) p++;
} else {
break;
}
}
size_t len = p - start;
s->token_u.ident.str = JS_NewStringLen (s->ctx, (const char *)start, len);
s->token_u.ident.has_escape = ident_has_escape;
s->token_u.ident.is_reserved = FALSE;
s->token_val = TOK_IDENT;
/* Check for keywords */
if (len == 2 && !memcmp (start, "if", 2)) s->token_val = TOK_IF;
else if (len == 2 && !memcmp (start, "in", 2)) s->token_val = TOK_IN;
else if (len == 2 && !memcmp (start, "do", 2)) s->token_val = TOK_DO;
else if (len == 2 && !memcmp (start, "go", 2)) s->token_val = TOK_GO;
else if (len == 3 && !memcmp (start, "var", 3)) s->token_val = TOK_VAR;
else if (len == 3 && !memcmp (start, "def", 3)) s->token_val = TOK_DEF;
else if (len == 3 && !memcmp (start, "for", 3)) s->token_val = TOK_FOR;
else if (len == 3 && !memcmp (start, "new", 3)) s->token_val = TOK_NEW;
else if (len == 3 && !memcmp (start, "try", 3)) s->token_val = TOK_TRY;
else if (len == 4 && !memcmp (start, "else", 4)) s->token_val = TOK_ELSE;
else if (len == 4 && !memcmp (start, "this", 4)) s->token_val = TOK_THIS;
else if (len == 4 && !memcmp (start, "null", 4)) s->token_val = TOK_NULL;
else if (len == 4 && !memcmp (start, "true", 4)) s->token_val = TOK_TRUE;
else if (len == 4 && !memcmp (start, "void", 4)) s->token_val = TOK_VOID;
else if (len == 4 && !memcmp (start, "case", 4)) s->token_val = TOK_CASE;
else if (len == 5 && !memcmp (start, "false", 5)) s->token_val = TOK_FALSE;
else if (len == 5 && !memcmp (start, "while", 5)) s->token_val = TOK_WHILE;
else if (len == 5 && !memcmp (start, "break", 5)) s->token_val = TOK_BREAK;
else if (len == 5 && !memcmp (start, "throw", 5)) s->token_val = TOK_THROW;
else if (len == 5 && !memcmp (start, "catch", 5)) s->token_val = TOK_CATCH;
else if (len == 6 && !memcmp (start, "return", 6)) s->token_val = TOK_RETURN;
else if (len == 6 && !memcmp (start, "delete", 6)) s->token_val = TOK_DELETE;
else if (len == 6 && !memcmp (start, "switch", 6)) s->token_val = TOK_SWITCH;
else if (len == 7 && !memcmp (start, "default", 7)) s->token_val = TOK_DEFAULT;
else if (len == 7 && !memcmp (start, "finally", 7)) s->token_val = TOK_FINALLY;
else if (len == 8 && !memcmp (start, "function", 8)) s->token_val = TOK_FUNCTION;
else if (len == 8 && !memcmp (start, "continue", 8)) s->token_val = TOK_CONTINUE;
} break;
case '.':
if (p[1] >= '0' && p[1] <= '9') {
goto tokenize_number;
} else {
goto def_token;
}
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
tokenize_number: {
const uint8_t *start = p;
BOOL is_float = FALSE;
if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
p += 2;
while (p < s->buf_end && ((c = *p, (c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == '_'))) p++;
} else if (p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) {
p += 2;
while (p < s->buf_end && (*p == '0' || *p == '1' || *p == '_')) p++;
} else if (p[0] == '0' && (p[1] == 'o' || p[1] == 'O')) {
p += 2;
while (p < s->buf_end && (*p >= '0' && *p <= '7')) p++;
} else {
while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++;
if (p < s->buf_end && *p == '.') {
is_float = TRUE;
p++;
while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++;
}
if (p < s->buf_end && (*p == 'e' || *p == 'E')) {
is_float = TRUE;
p++;
if (p < s->buf_end && (*p == '+' || *p == '-')) p++;
while (p < s->buf_end && (*p >= '0' && *p <= '9')) p++;
}
}
(void)is_float;
s->token_val = TOK_NUMBER;
char *numstr = sys_malloc (p - start + 1);
memcpy (numstr, start, p - start);
numstr[p - start] = '\0';
double val = strtod (numstr, NULL);
sys_free (numstr);
s->token_u.num.val = JS_NewFloat64 (s->ctx, val);
} break;
case '*':
if (p[1] == '=') { p += 2; s->token_val = TOK_MUL_ASSIGN; }
else if (p[1] == '*') {
if (p[2] == '=') { p += 3; s->token_val = TOK_POW_ASSIGN; }
else { p += 2; s->token_val = TOK_POW; }
} else { goto def_token; }
break;
case '%':
if (p[1] == '=') { p += 2; s->token_val = TOK_MOD_ASSIGN; }
else { goto def_token; }
break;
case '+':
if (p[1] == '=') { p += 2; s->token_val = TOK_PLUS_ASSIGN; }
else if (p[1] == '+') { p += 2; s->token_val = TOK_INC; }
else { goto def_token; }
break;
case '-':
if (p[1] == '=') { p += 2; s->token_val = TOK_MINUS_ASSIGN; }
else if (p[1] == '-') { p += 2; s->token_val = TOK_DEC; }
else { goto def_token; }
break;
case '<':
if (p[1] == '=') { p += 2; s->token_val = TOK_LTE; }
else if (p[1] == '<') {
if (p[2] == '=') { p += 3; s->token_val = TOK_SHL_ASSIGN; }
else { p += 2; s->token_val = TOK_SHL; }
} else { goto def_token; }
break;
case '>':
if (p[1] == '=') { p += 2; s->token_val = TOK_GTE; }
else if (p[1] == '>') {
if (p[2] == '>') {
if (p[3] == '=') { p += 4; s->token_val = TOK_SHR_ASSIGN; }
else { p += 3; s->token_val = TOK_SHR; }
} else if (p[2] == '=') { p += 3; s->token_val = TOK_SAR_ASSIGN; }
else { p += 2; s->token_val = TOK_SAR; }
} else { goto def_token; }
break;
case '=':
if (p[1] == '=') {
if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_EQ; }
else { p += 2; s->token_val = TOK_EQ; }
} else if (p[1] == '>') { p += 2; s->token_val = TOK_ARROW; }
else { goto def_token; }
break;
case '!':
if (p[1] == '=') {
if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_NEQ; }
else { p += 2; s->token_val = TOK_NEQ; }
} else { goto def_token; }
break;
case '&':
if (p[1] == '&') {
if (p[2] == '=') { p += 3; s->token_val = TOK_LAND_ASSIGN; }
else { p += 2; s->token_val = TOK_LAND; }
} else if (p[1] == '=') { p += 2; s->token_val = TOK_AND_ASSIGN; }
else { goto def_token; }
break;
case '|':
if (p[1] == '|') {
if (p[2] == '=') { p += 3; s->token_val = TOK_LOR_ASSIGN; }
else { p += 2; s->token_val = TOK_LOR; }
} else if (p[1] == '=') { p += 2; s->token_val = TOK_OR_ASSIGN; }
else { goto def_token; }
break;
case '^':
if (p[1] == '=') { p += 2; s->token_val = TOK_XOR_ASSIGN; }
else { goto def_token; }
break;
case '?':
if (p[1] == '?') {
if (p[2] == '=') { p += 3; s->token_val = TOK_DOUBLE_QUESTION_MARK_ASSIGN; }
else { p += 2; s->token_val = TOK_DOUBLE_QUESTION_MARK; }
} else if (p[1] == '.') { p += 2; s->token_val = TOK_QUESTION_MARK_DOT; }
else { goto def_token; }
break;
default:
def_token:
p++;
s->token_val = c;
break;
}
s->buf_ptr = p;
return 0;
}
static cJSON *ast_parse_primary (ASTParseState *s) { static cJSON *ast_parse_primary (ASTParseState *s) {
const uint8_t *start = s->token_ptr; const uint8_t *start = s->token_ptr;
cJSON *node = NULL; cJSON *node = NULL;
@@ -28264,12 +28683,19 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
} break; } break;
case TOK_IDENT: { case TOK_IDENT: {
node = ast_node (s, "name", start); /* Check for single-param arrow function: x => ... */
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str); const uint8_t *p = s->buf_ptr;
cJSON_AddStringToObject (node, "name", str ? str : ""); while (p < s->buf_end && (*p == ' ' || *p == '\t')) p++;
JS_FreeCString (s->ctx, str); if (p + 1 < s->buf_end && p[0] == '=' && p[1] == '>') {
ast_node_end (s, node, s->buf_ptr); node = ast_parse_arrow_function (s);
ast_next_token (s); } else {
node = ast_node (s, "name", start);
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str);
cJSON_AddStringToObject (node, "name", str ? str : "");
JS_FreeCString (s->ctx, str);
ast_node_end (s, node, s->buf_ptr);
ast_next_token (s);
}
} break; } break;
case TOK_NULL: case TOK_NULL:
@@ -28301,7 +28727,7 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
cJSON *list = cJSON_AddArrayToObject (node, "list"); cJSON *list = cJSON_AddArrayToObject (node, "list");
ast_next_token (s); ast_next_token (s);
while (s->token_val != ']' && s->token_val != TOK_EOF) { while (s->token_val != ']' && s->token_val != TOK_EOF) {
cJSON *elem = ast_parse_expr (s); cJSON *elem = ast_parse_assign_expr (s);
if (elem) cJSON_AddItemToArray (list, elem); if (elem) cJSON_AddItemToArray (list, elem);
if (s->token_val == ',') ast_next_token (s); if (s->token_val == ',') ast_next_token (s);
else break; else break;
@@ -28321,13 +28747,13 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
while (s->token_val != '}' && s->token_val != TOK_EOF) { while (s->token_val != '}' && s->token_val != TOK_EOF) {
cJSON *pair = cJSON_CreateObject (); cJSON *pair = cJSON_CreateObject ();
/* property name */ /* property name */
if (s->token_val == TOK_IDENT || s->token_val == TOK_STRING) { if (s->token_val == TOK_IDENT || s->token_val == TOK_STRING || s->token_val == TOK_NUMBER) {
cJSON *left = ast_parse_primary (s); cJSON *left = ast_parse_primary (s);
cJSON_AddItemToObject (pair, "left", left); cJSON_AddItemToObject (pair, "left", left);
} else if (s->token_val == '[') { } else if (s->token_val == '[') {
/* computed property */ /* computed property */
ast_next_token (s); ast_next_token (s);
cJSON *left = ast_parse_expr (s); cJSON *left = ast_parse_assign_expr (s);
cJSON_AddItemToObject (pair, "left", left); cJSON_AddItemToObject (pair, "left", left);
if (s->token_val == ']') { if (s->token_val == ']') {
ast_next_token (s); ast_next_token (s);
@@ -28342,7 +28768,7 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
/* colon and value */ /* colon and value */
if (s->token_val == ':') { if (s->token_val == ':') {
ast_next_token (s); ast_next_token (s);
cJSON *right = ast_parse_expr (s); cJSON *right = ast_parse_assign_expr (s);
cJSON_AddItemToObject (pair, "right", right); cJSON_AddItemToObject (pair, "right", right);
} else { } else {
ast_error (s, s->token_ptr, "expected ':' after property name"); ast_error (s, s->token_ptr, "expected ':' after property name");
@@ -28360,14 +28786,19 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
} break; } break;
case '(': { case '(': {
ast_next_token (s); /* Check for arrow function: () => ..., (a, b) => ... */
node = ast_parse_expr (s); if (ast_is_arrow_function (s)) {
if (s->token_val == ')') { node = ast_parse_arrow_function (s);
ast_next_token (s);
} else if (s->token_val == TOK_EOF) {
ast_error (s, s->token_ptr, "unterminated parenthesized expression, expected ')'");
} else { } else {
ast_error (s, s->token_ptr, "expected ')' after expression"); ast_next_token (s);
node = ast_parse_expr (s);
if (s->token_val == ')') {
ast_next_token (s);
} else if (s->token_val == TOK_EOF) {
ast_error (s, s->token_ptr, "unterminated parenthesized expression, expected ')'");
} else {
ast_error (s, s->token_ptr, "expected ')' after expression");
}
} }
} break; } break;
@@ -28375,6 +28806,52 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
node = ast_parse_function_inner (s, TRUE); node = ast_parse_function_inner (s, TRUE);
} break; } break;
case '/': {
/* Regex literal - when / appears in primary position, it's a regex */
node = ast_node (s, "regexp", start);
const uint8_t *p = s->token_ptr + 1; /* skip opening / */
const uint8_t *pattern_start = p;
/* Parse pattern - find closing / (not escaped) */
while (p < s->buf_end && *p != '/') {
if (*p == '\\' && p + 1 < s->buf_end) {
p += 2; /* skip escape sequence */
} else if (*p == '\n' || *p == '\r') {
ast_error (s, p, "unterminated regex literal");
break;
} else {
p++;
}
}
size_t pattern_len = p - pattern_start;
if (p < s->buf_end) p++; /* skip closing / */
/* Parse flags */
const uint8_t *flags_start = p;
while (p < s->buf_end && ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z'))) {
p++;
}
size_t flags_len = p - flags_start;
char *pattern = sys_malloc (pattern_len + 1);
memcpy (pattern, pattern_start, pattern_len);
pattern[pattern_len] = '\0';
cJSON_AddStringToObject (node, "pattern", pattern);
sys_free (pattern);
if (flags_len > 0) {
char *flags = sys_malloc (flags_len + 1);
memcpy (flags, flags_start, flags_len);
flags[flags_len] = '\0';
cJSON_AddStringToObject (node, "flags", flags);
sys_free (flags);
}
s->buf_ptr = p;
ast_node_end (s, node, s->buf_ptr);
ast_next_token (s);
} break;
default: default:
/* Report syntax error with token info */ /* Report syntax error with token info */
if (s->token_val >= 32 && s->token_val < 127) { if (s->token_val >= 32 && s->token_val < 127) {
@@ -28413,7 +28890,7 @@ static cJSON *ast_parse_postfix (ASTParseState *s) {
ast_next_token (s); ast_next_token (s);
cJSON *new_node = ast_node (s, "[", start); cJSON *new_node = ast_node (s, "[", start);
cJSON_AddItemToObject (new_node, "left", node); cJSON_AddItemToObject (new_node, "left", node);
cJSON *index = ast_parse_expr (s); cJSON *index = ast_parse_assign_expr (s);
cJSON_AddItemToObject (new_node, "right", index); cJSON_AddItemToObject (new_node, "right", index);
if (s->token_val == ']') ast_next_token (s); if (s->token_val == ']') ast_next_token (s);
ast_node_end (s, new_node, s->buf_ptr); ast_node_end (s, new_node, s->buf_ptr);
@@ -28424,7 +28901,7 @@ static cJSON *ast_parse_postfix (ASTParseState *s) {
cJSON_AddItemToObject (new_node, "expression", node); cJSON_AddItemToObject (new_node, "expression", node);
cJSON *list = cJSON_AddArrayToObject (new_node, "list"); cJSON *list = cJSON_AddArrayToObject (new_node, "list");
while (s->token_val != ')' && s->token_val != TOK_EOF) { while (s->token_val != ')' && s->token_val != TOK_EOF) {
cJSON *arg = ast_parse_expr (s); cJSON *arg = ast_parse_assign_expr (s);
if (arg) cJSON_AddItemToArray (list, arg); if (arg) cJSON_AddItemToArray (list, arg);
if (s->token_val == ',') ast_next_token (s); if (s->token_val == ',') ast_next_token (s);
else break; else break;
@@ -28446,6 +28923,44 @@ static cJSON *ast_parse_postfix (ASTParseState *s) {
ast_next_token (s); ast_next_token (s);
ast_node_end (s, new_node, s->buf_ptr); ast_node_end (s, new_node, s->buf_ptr);
node = new_node; node = new_node;
} else if (s->token_val == TOK_QUESTION_MARK_DOT) {
ast_next_token (s);
if (s->token_val == '[') {
/* Optional bracket access: o?.["a"] */
ast_next_token (s);
cJSON *new_node = ast_node (s, "?.[", start);
cJSON_AddItemToObject (new_node, "left", node);
cJSON *index = ast_parse_assign_expr (s);
cJSON_AddItemToObject (new_node, "right", index);
if (s->token_val == ']') ast_next_token (s);
ast_node_end (s, new_node, s->buf_ptr);
node = new_node;
} else if (s->token_val == '(') {
/* Optional call: o.f?.() */
ast_next_token (s);
cJSON *new_node = ast_node (s, "?.(", start);
cJSON_AddItemToObject (new_node, "expression", node);
cJSON *list = cJSON_AddArrayToObject (new_node, "list");
while (s->token_val != ')' && s->token_val != TOK_EOF) {
cJSON *arg = ast_parse_assign_expr (s);
if (arg) cJSON_AddItemToArray (list, arg);
if (s->token_val == ',') ast_next_token (s);
else break;
}
if (s->token_val == ')') ast_next_token (s);
ast_node_end (s, new_node, s->buf_ptr);
node = new_node;
} else if (s->token_val == TOK_IDENT) {
/* Optional property access: o?.a */
cJSON *new_node = ast_node (s, "?.", start);
cJSON_AddItemToObject (new_node, "left", node);
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str);
cJSON_AddStringToObject (new_node, "right", str ? str : "");
JS_FreeCString (s->ctx, str);
ast_next_token (s);
ast_node_end (s, new_node, s->buf_ptr);
node = new_node;
}
} else { } else {
break; break;
} }
@@ -28533,7 +29048,7 @@ static cJSON *ast_parse_unary (ASTParseState *s) {
ast_next_token (s); ast_next_token (s);
cJSON *list = cJSON_AddArrayToObject (node, "list"); cJSON *list = cJSON_AddArrayToObject (node, "list");
while (s->token_val != ')' && s->token_val != TOK_EOF) { while (s->token_val != ')' && s->token_val != TOK_EOF) {
cJSON *arg = ast_parse_expr (s); cJSON *arg = ast_parse_assign_expr (s);
if (arg) cJSON_AddItemToArray (list, arg); if (arg) cJSON_AddItemToArray (list, arg);
if (s->token_val == ',') ast_next_token (s); if (s->token_val == ',') ast_next_token (s);
else break; else break;
@@ -28681,10 +29196,31 @@ static cJSON *ast_parse_assign (ASTParseState *s) {
return node; return node;
} }
static cJSON *ast_parse_expr (ASTParseState *s) { /* Parse assignment expression (excludes comma operator) */
static cJSON *ast_parse_assign_expr (ASTParseState *s) {
return ast_parse_assign (s); return ast_parse_assign (s);
} }
/* Parse full expression including comma operator */
static cJSON *ast_parse_expr (ASTParseState *s) {
cJSON *left = ast_parse_assign (s);
if (!left) return NULL;
/* Handle comma operator: (1, 2, 3) => 3 */
while (s->token_val == ',') {
const uint8_t *start = s->token_ptr;
ast_next_token (s);
cJSON *right = ast_parse_assign (s);
cJSON *node = ast_node (s, ",", start);
cJSON_AddItemToObject (node, "left", left);
cJSON_AddItemToObject (node, "right", right);
ast_node_end (s, node, s->buf_ptr);
left = node;
}
return left;
}
static cJSON *ast_parse_block_statements (ASTParseState *s) { static cJSON *ast_parse_block_statements (ASTParseState *s) {
cJSON *stmts = cJSON_CreateArray (); cJSON *stmts = cJSON_CreateArray ();
while (s->token_val != '}' && s->token_val != TOK_EOF) { while (s->token_val != '}' && s->token_val != TOK_EOF) {
@@ -28756,6 +29292,82 @@ static cJSON *ast_parse_function_inner (ASTParseState *s, BOOL is_expr) {
return node; return node;
} }
/* Parse arrow function: x => expr, (a, b) => expr, (x = 10) => expr, () => expr */
static cJSON *ast_parse_arrow_function (ASTParseState *s) {
const uint8_t *start = s->token_ptr;
cJSON *node = ast_node (s, "function", start);
cJSON_AddBoolToObject (node, "arrow", 1);
/* Parameters */
cJSON *params = cJSON_AddArrayToObject (node, "list");
if (s->token_val == TOK_IDENT) {
/* Single parameter without parens: x => ... */
cJSON *param = ast_node (s, "name", s->token_ptr);
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str);
cJSON_AddStringToObject (param, "name", str ? str : "");
JS_FreeCString (s->ctx, str);
ast_node_end (s, param, s->buf_ptr);
cJSON_AddItemToArray (params, param);
ast_next_token (s);
} else if (s->token_val == '(') {
/* Parenthesized parameters: () => ..., (a, b) => ..., (x = 10) => ... */
ast_next_token (s);
while (s->token_val != ')' && s->token_val != TOK_EOF) {
if (s->token_val == TOK_IDENT) {
cJSON *param = ast_node (s, "name", s->token_ptr);
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str);
cJSON_AddStringToObject (param, "name", str ? str : "");
JS_FreeCString (s->ctx, str);
ast_node_end (s, param, s->buf_ptr);
ast_next_token (s);
/* Check for default value */
if (s->token_val == '=') {
ast_next_token (s);
cJSON *default_val = ast_parse_expr (s);
cJSON_AddItemToObject (param, "default", default_val);
}
cJSON_AddItemToArray (params, param);
} else {
ast_error (s, s->token_ptr, "expected parameter name");
break;
}
if (s->token_val == ',') ast_next_token (s);
else break;
}
if (s->token_val == ')') ast_next_token (s);
}
/* Arrow token */
if (s->token_val != TOK_ARROW) {
ast_error (s, s->token_ptr, "expected '=>' in arrow function");
} else {
ast_next_token (s);
}
/* Body: either block or expression */
if (s->token_val == '{') {
ast_next_token (s);
cJSON *stmts = ast_parse_block_statements (s);
cJSON_AddItemToObject (node, "statements", stmts);
if (s->token_val == '}') ast_next_token (s);
} else {
/* Expression body - wrap in implicit return */
cJSON *stmts = cJSON_CreateArray ();
cJSON *ret = ast_node (s, "return", s->token_ptr);
cJSON *expr = ast_parse_expr (s);
cJSON_AddItemToObject (ret, "expression", expr);
ast_node_end (s, ret, s->buf_ptr);
cJSON_AddItemToArray (stmts, ret);
cJSON_AddItemToObject (node, "statements", stmts);
}
cJSON_AddNumberToObject (node, "function_nr", s->function_nr++);
ast_node_end (s, node, s->buf_ptr);
return node;
}
static cJSON *ast_parse_statement (ASTParseState *s) { static cJSON *ast_parse_statement (ASTParseState *s) {
const uint8_t *start = s->token_ptr; const uint8_t *start = s->token_ptr;
cJSON *node = NULL; cJSON *node = NULL;
@@ -29074,6 +29686,33 @@ static cJSON *ast_parse_statement (ASTParseState *s) {
ast_next_token (s); ast_next_token (s);
return NULL; return NULL;
case TOK_IDENT: {
/* Check if this is a labeled statement: identifier: statement */
const uint8_t *p = s->buf_ptr;
while (p < s->buf_end && (*p == ' ' || *p == '\t')) p++;
if (p < s->buf_end && *p == ':') {
/* Labeled statement */
node = ast_node (s, "label", start);
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str);
cJSON_AddStringToObject (node, "name", str ? str : "");
JS_FreeCString (s->ctx, str);
ast_next_token (s); /* skip identifier */
ast_next_token (s); /* skip colon */
cJSON *stmt = ast_parse_statement (s);
cJSON_AddItemToObject (node, "statement", stmt);
ast_node_end (s, node, s->buf_ptr);
} else {
/* Expression statement */
cJSON *expr = ast_parse_expr (s);
if (expr) {
node = ast_node (s, "call", start);
cJSON_AddItemToObject (node, "expression", expr);
ast_node_end (s, node, s->buf_ptr);
}
if (s->token_val == ';') ast_next_token (s);
}
} break;
default: { default: {
/* Expression statement */ /* Expression statement */
cJSON *expr = ast_parse_expr (s); cJSON *expr = ast_parse_expr (s);
@@ -29147,6 +29786,102 @@ char *JS_AST (JSContext *ctx, const char *source, size_t len, const char *filena
return json; return json;
} }
/* Build a token object for the tokenizer output */
static cJSON *build_token_object (ASTParseState *s) {
cJSON *tok = cJSON_CreateObject ();
const char *kind = ast_token_kind_str (s->token_val);
cJSON_AddStringToObject (tok, "kind", kind);
/* Position info */
int at = (int)(s->token_ptr - s->buf_start);
int from_row, from_col;
ast_get_line_col (s, s->token_ptr, &from_row, &from_col);
int to_row, to_col;
ast_get_line_col (s, s->buf_ptr, &to_row, &to_col);
cJSON_AddNumberToObject (tok, "at", at);
cJSON_AddNumberToObject (tok, "from_row", from_row);
cJSON_AddNumberToObject (tok, "from_column", from_col);
cJSON_AddNumberToObject (tok, "to_row", to_row);
cJSON_AddNumberToObject (tok, "to_column", to_col);
/* Value field based on token type */
switch (s->token_val) {
case TOK_NUMBER: {
/* Store original source text as value */
size_t len = s->buf_ptr - s->token_ptr;
char *text = sys_malloc (len + 1);
memcpy (text, s->token_ptr, len);
text[len] = '\0';
cJSON_AddStringToObject (tok, "value", text);
sys_free (text);
/* Store parsed number */
double d = JS_VALUE_GET_FLOAT64 (s->token_u.num.val);
if (JS_VALUE_GET_TAG (s->token_u.num.val) == JS_TAG_INT) {
d = JS_VALUE_GET_INT (s->token_u.num.val);
}
cJSON_AddNumberToObject (tok, "number", d);
} break;
case TOK_STRING:
case TOK_TEMPLATE: {
const char *str = JS_ToCString (s->ctx, s->token_u.str.str);
cJSON_AddStringToObject (tok, "value", str ? str : "");
JS_FreeCString (s->ctx, str);
} break;
case TOK_IDENT: {
const char *str = JS_ToCString (s->ctx, s->token_u.ident.str);
cJSON_AddStringToObject (tok, "value", str ? str : "");
JS_FreeCString (s->ctx, str);
} break;
case TOK_COMMENT:
case TOK_SPACE:
case TOK_NEWLINE: {
/* Store the raw source text */
size_t len = s->buf_ptr - s->token_ptr;
char *text = sys_malloc (len + 1);
memcpy (text, s->token_ptr, len);
text[len] = '\0';
cJSON_AddStringToObject (tok, "value", text);
sys_free (text);
} break;
default:
/* No value field for operators/punctuators/keywords */
break;
}
return tok;
}
char *JS_Tokenize (JSContext *ctx, const char *source, size_t len, const char *filename) {
ASTParseState s;
memset (&s, 0, sizeof (s));
s.ctx = ctx;
s.filename = filename;
s.buf_start = (const uint8_t *)source;
s.buf_ptr = (const uint8_t *)source;
s.buf_end = (const uint8_t *)source + len;
s.function_nr = 0;
s.errors = NULL;
s.has_error = 0;
cJSON *root = cJSON_CreateObject ();
cJSON_AddStringToObject (root, "filename", filename);
cJSON *tokens = cJSON_AddArrayToObject (root, "tokens");
/* Tokenize all tokens including whitespace */
while (1) {
tokenize_next (&s);
cJSON *tok = build_token_object (&s);
cJSON_AddItemToArray (tokens, tok);
if (s.token_val == TOK_EOF) break;
}
char *json = cJSON_Print (root);
cJSON_Delete (root);
return json;
}
/* ============================================================ /* ============================================================
Register-Based Machine Code Generator Register-Based Machine Code Generator
============================================================ */ ============================================================ */

View File

@@ -1222,6 +1222,10 @@ CellModule *JS_CompileModule (JSContext *ctx, const char *input, size_t input_le
Returns malloc'd JSON string (caller must free), or NULL on error. */ Returns malloc'd JSON string (caller must free), or NULL on error. */
char *JS_AST (JSContext *ctx, const char *source, size_t len, const char *filename); char *JS_AST (JSContext *ctx, const char *source, size_t len, const char *filename);
/* Tokenize source code and return token array as JSON string.
Returns malloc'd JSON string (caller must free), or NULL on error. */
char *JS_Tokenize (JSContext *ctx, const char *source, size_t len, const char *filename);
/* Generate register-based machine code from AST JSON. /* Generate register-based machine code from AST JSON.
Returns malloc'd JSON string (caller must free), or NULL on error. */ Returns malloc'd JSON string (caller must free), or NULL on error. */
char *JS_Mach (JSContext *ctx, const char *ast_json); char *JS_Mach (JSContext *ctx, const char *ast_json);

View File

@@ -0,0 +1 @@
var f = (x = 10) => x; f()

View File

@@ -0,0 +1 @@
var f = () => 42; f()

1
vm_test/assign_add.txt Normal file
View File

@@ -0,0 +1 @@
var x = 5; x += 3; x

1
vm_test/assign_and.txt Normal file
View File

@@ -0,0 +1 @@
var x = 7; x &= 3; x

1
vm_test/assign_div.txt Normal file
View File

@@ -0,0 +1 @@
var x = 6; x /= 2; x

1
vm_test/assign_land.txt Normal file
View File

@@ -0,0 +1 @@
var x = 5; x &&= 10; x

1
vm_test/assign_lor.txt Normal file
View File

@@ -0,0 +1 @@
var x = 0; x ||= 10; x

1
vm_test/assign_mod.txt Normal file
View File

@@ -0,0 +1 @@
var x = 7; x %= 3; x

1
vm_test/assign_mul.txt Normal file
View File

@@ -0,0 +1 @@
var x = 5; x *= 3; x

View File

@@ -0,0 +1 @@
var x = null; x ??= 10; x

1
vm_test/assign_or.txt Normal file
View File

@@ -0,0 +1 @@
var x = 5; x |= 2; x

1
vm_test/assign_power.txt Normal file
View File

@@ -0,0 +1 @@
var x = 2; x **= 3; x

1
vm_test/assign_shl.txt Normal file
View File

@@ -0,0 +1 @@
var x = 2; x <<= 3; x

1
vm_test/assign_shr.txt Normal file
View File

@@ -0,0 +1 @@
var x = 8; x >>= 2; x

1
vm_test/assign_shru.txt Normal file
View File

@@ -0,0 +1 @@
var x = -8; x >>>= 2; x

1
vm_test/assign_sub.txt Normal file
View File

@@ -0,0 +1 @@
var x = 5; x -= 3; x

1
vm_test/assign_xor.txt Normal file
View File

@@ -0,0 +1 @@
var x = 5; x ^= 3; x

View File

@@ -0,0 +1 @@
var x, y; x = y = 5; x + y

View File

@@ -0,0 +1 @@
/* comment */ 5

View File

@@ -0,0 +1 @@
1 /* a */ + /* b */ 2

View File

@@ -0,0 +1 @@
;;; 5

1
vm_test/func_expr.txt Normal file
View File

@@ -0,0 +1 @@
var f = function(x) { return x * 2 }; f(3)

1
vm_test/func_iife.txt Normal file
View File

@@ -0,0 +1 @@
(function(x) { return x * 2 })(5)

View File

@@ -0,0 +1 @@
function fac(n) { if (n <= 1) return 1; return n * fac(n - 1) }; fac(5)

1
vm_test/label_break.txt Normal file
View File

@@ -0,0 +1 @@
var x = 0; outer: { x = 1; break outer; x = 2 }; x

View File

@@ -0,0 +1 @@
var s = 0; outer: for (var i = 0; i < 3; i++) { for (var j = 0; j < 3; j++) { if (j == 1) continue outer; s = s + 1 } }; s

1
vm_test/multi_var.txt Normal file
View File

@@ -0,0 +1 @@
var x = 1, y = 2; x + y

1
vm_test/nested_block.txt Normal file
View File

@@ -0,0 +1 @@
var x = 1; { var y = 2; { var z = 3; x = x + y + z } }; x

1
vm_test/num_binary.txt Normal file
View File

@@ -0,0 +1 @@
0b1010

1
vm_test/num_exp.txt Normal file
View File

@@ -0,0 +1 @@
1e3

1
vm_test/num_float.txt Normal file
View File

@@ -0,0 +1 @@
3.14

1
vm_test/num_hex.txt Normal file
View File

@@ -0,0 +1 @@
0xff

1
vm_test/num_octal.txt Normal file
View File

@@ -0,0 +1 @@
0o17

View File

@@ -0,0 +1 @@
1_000_000

View File

@@ -0,0 +1 @@
~5

View File

@@ -0,0 +1 @@
5 | 2

View File

@@ -0,0 +1 @@
5 ^ 3

1
vm_test/op_comma.txt Normal file
View File

@@ -0,0 +1 @@
(1, 2, 3)

View File

@@ -0,0 +1 @@
3 == 3

View File

@@ -0,0 +1 @@
5 >= 5

View File

@@ -0,0 +1 @@
3 < 5

View File

@@ -0,0 +1 @@
3 <= 3

View File

@@ -0,0 +1 @@
3 != 4

View File

@@ -0,0 +1 @@
var x = 5; x--; x

View File

@@ -0,0 +1 @@
var x = 5; --x

1
vm_test/op_delete.txt Normal file
View File

@@ -0,0 +1 @@
var o = {x: 1}; delete o.x; o.x

1
vm_test/op_in.txt Normal file
View File

@@ -0,0 +1 @@
var o = {x: 1}; "x" in o

View File

@@ -0,0 +1 @@
var x = 5; x++; x

View File

@@ -0,0 +1 @@
var x = 5; ++x

View File

@@ -0,0 +1 @@
!false

View File

@@ -0,0 +1 @@
false || true

1
vm_test/op_nullish.txt Normal file
View File

@@ -0,0 +1 @@
null ?? 5

1
vm_test/op_power.txt Normal file
View File

@@ -0,0 +1 @@
2 ** 3

View File

@@ -0,0 +1 @@
2 << 3

View File

@@ -0,0 +1 @@
8 >> 2

View File

@@ -0,0 +1 @@
-8 >>> 2

1
vm_test/op_typeof.txt Normal file
View File

@@ -0,0 +1 @@
typeof 5

View File

@@ -0,0 +1 @@
-5

View File

@@ -0,0 +1 @@
+"5"

1
vm_test/op_void.txt Normal file
View File

@@ -0,0 +1 @@
void 0

View File

@@ -0,0 +1 @@
var o = {a: 1}; o?.["a"]

View File

@@ -0,0 +1 @@
var o = {f: () => 1}; o.f?.()

View File

@@ -0,0 +1 @@
var o = null; o?.a

View File

@@ -0,0 +1 @@
var o = {a: 1}; o?.a

View File

@@ -0,0 +1 @@
(1 + 2) * 3

1
vm_test/record_chain.txt Normal file
View File

@@ -0,0 +1 @@
var o = {a: {b: {c: 1}}}; o.a.b.c

View File

@@ -0,0 +1 @@
var o = {a: {b: 1}}; o.a.b

View File

@@ -0,0 +1 @@
var o = {1: "one"}; o[1]

View File

@@ -0,0 +1 @@
"hello\nworld"

View File

@@ -0,0 +1 @@
"\u0041"

View File

@@ -0,0 +1 @@
var x = 1; try { throw 0 } catch(e) { x = 2 } finally { x = x + 1 }; x

1
vm_test/try_finally.txt Normal file
View File

@@ -0,0 +1 @@
var x = 1; try { x = 2 } finally { x = 3 }; x