/* * QuickJS Javascript Engine * * Copyright (c) 2017-2025 Fabrice Bellard * Copyright (c) 2017-2025 Charlie Gordon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "quickjs-internal.h" static const char *ast_token_kind_str(int token_val) { static char single_char[2] = {0, 0}; switch (token_val) { case TOK_NUMBER: return "number"; case TOK_STRING: return "text"; case TOK_TEMPLATE: return "text"; case TOK_IDENT: return "name"; case TOK_COMMENT: return "comment"; case TOK_NEWLINE: return "newline"; case TOK_SPACE: return "space"; case TOK_REGEXP: return "regexp"; case TOK_PRIVATE_NAME: return "private_name"; case TOK_EOF: return "eof"; case TOK_ERROR: return "error"; /* compound operators */ case TOK_MUL_ASSIGN: return "*="; case TOK_DIV_ASSIGN: return "/="; case TOK_MOD_ASSIGN: return "%="; case TOK_PLUS_ASSIGN: return "+="; case TOK_MINUS_ASSIGN: return "-="; case TOK_SHL_ASSIGN: return "<<="; case TOK_SAR_ASSIGN: return ">>="; case TOK_SHR_ASSIGN: return ">>>="; case TOK_AND_ASSIGN: return "&="; case TOK_XOR_ASSIGN: return "^="; case TOK_OR_ASSIGN: return "|="; case TOK_POW_ASSIGN: return "**="; case TOK_LAND_ASSIGN: return "&&="; case TOK_LOR_ASSIGN: return "||="; case TOK_DEC: return "--"; case TOK_INC: return "++"; case TOK_SHL: return "<<"; case TOK_SAR: return ">>"; case TOK_SHR: return ">>>"; case TOK_LT: return "<"; case TOK_LTE: return "<="; case TOK_GT: return ">"; case TOK_GTE: return ">="; case TOK_EQ: return "=="; case TOK_STRICT_EQ: return "==="; case TOK_NEQ: return "!="; case TOK_STRICT_NEQ: return "!=="; case TOK_LAND: return "&&"; case TOK_LOR: return "||"; case TOK_POW: return "**"; case TOK_ARROW: return "=>"; /* keywords */ case TOK_NULL: return "null"; case TOK_FALSE: return "false"; case TOK_TRUE: return "true"; case TOK_IF: return "if"; case TOK_ELSE: return "else"; case TOK_RETURN: return "return"; case TOK_GO: return "go"; case TOK_VAR: return "var"; case TOK_DEF: return "def"; case TOK_THIS: return "this"; case TOK_DELETE: return "delete"; case TOK_IN: return "in"; case TOK_DO: return "do"; case TOK_WHILE: return "while"; case TOK_FOR: return "for"; case TOK_BREAK: return "break"; case TOK_CONTINUE: return "continue"; case TOK_DISRUPT: return "disrupt"; case TOK_DISRUPTION: return "disruption"; case TOK_FUNCTION: return "function"; case TOK_DEBUGGER: return "debugger"; case TOK_WITH: return "with"; case TOK_CLASS: return "class"; case TOK_CONST: return "const"; case TOK_ENUM: return "enum"; case TOK_EXPORT: return "export"; case TOK_EXTENDS: return "extends"; case TOK_IMPORT: return "import"; case TOK_SUPER: return "super"; case TOK_IMPLEMENTS: return "implements"; case TOK_INTERFACE: return "interface"; case TOK_LET: return "let"; case TOK_PRIVATE: return "private"; case TOK_PROTECTED: return "protected"; case TOK_PUBLIC: return "public"; case TOK_STATIC: return "static"; case TOK_YIELD: return "yield"; case TOK_AWAIT: return "await"; case TOK_OF: return "of"; default: /* Single character tokens */ if (token_val >= 0 && token_val < 128) { single_char[0] = (char)token_val; return single_char; } return "unknown"; } } /* ============================================================ AST JSON Output Implementation ============================================================ */ /* Add a length-delimited string to a cJSON object (source pointers aren't null-terminated) */ void cjson_add_strn (cJSON *obj, const char *key, const char *str, size_t len) { char buf[256]; char *tmp = (len < sizeof (buf)) ? buf : sys_malloc (len + 1); memcpy (tmp, str, len); tmp[len] = '\0'; cJSON_AddStringToObject (obj, key, tmp); if (tmp != buf) sys_free (tmp); } /* Compare a length-delimited token string against a null-terminated literal */ inline BOOL tok_eq (const char *str, size_t len, const char *lit) { size_t ll = strlen (lit); return len == ll && memcmp (str, lit, ll) == 0; } cJSON *ast_parse_expr (ASTParseState *s); cJSON *ast_parse_assign_expr (ASTParseState *s); cJSON *ast_parse_statement (ASTParseState *s); void ast_sync_to_statement (ASTParseState *s); cJSON *ast_parse_block_statements (ASTParseState *s); cJSON *ast_parse_function_inner (ASTParseState *s, BOOL is_expr); cJSON *ast_parse_arrow_function (ASTParseState *s); /* Check if we're looking at an arrow function starting with '(' */ BOOL ast_is_arrow_function (ASTParseState *s) { if (s->token_val != '(') return FALSE; const uint8_t *p = s->buf_ptr; int depth = 1; while (p < s->buf_end && depth > 0) { uint8_t c = *p++; if (c == '(') depth++; else if (c == ')') depth--; else if (c == '"' || c == '\'' || c == '`') { /* Skip string */ uint8_t quote = c; while (p < s->buf_end && *p != quote) { if (*p == '\\' && p + 1 < s->buf_end) p++; p++; } if (p < s->buf_end) p++; } } /* Skip whitespace */ while (p < s->buf_end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) p++; /* Check for => */ return (p + 1 < s->buf_end && p[0] == '=' && p[1] == '>'); } void ast_free_token (ASTParseState *s) { if (s->decoded_str) { sys_free (s->decoded_str); s->decoded_str = NULL; } } void ast_get_line_col (ASTParseState *s, const uint8_t *ptr, int *line, int *col) { *line = get_line_col_cached (&s->lc_cache, col, ptr); } cJSON *ast_node (ASTParseState *s, const char *kind, const uint8_t *start_ptr) { cJSON *node = cJSON_CreateObject (); cJSON_AddStringToObject (node, "kind", kind); int at = (int)(start_ptr - s->buf_start); int from_row, from_col; ast_get_line_col (s, start_ptr, &from_row, &from_col); cJSON_AddNumberToObject (node, "at", at); cJSON_AddNumberToObject (node, "from_row", from_row); cJSON_AddNumberToObject (node, "from_column", from_col); return node; } void ast_node_end (ASTParseState *s, cJSON *node, const uint8_t *end_ptr) { int to_row, to_col; ast_get_line_col (s, end_ptr, &to_row, &to_col); cJSON_AddNumberToObject (node, "to_row", to_row); cJSON_AddNumberToObject (node, "to_column", to_col); } void ast_error (ASTParseState *s, const uint8_t *ptr, const char *fmt, ...) { if (s->error_count >= 5) return; s->error_count++; va_list ap; char buf[256]; int line, col; va_start (ap, fmt); vsnprintf (buf, sizeof(buf), fmt, ap); va_end (ap); ast_get_line_col (s, ptr, &line, &col); cJSON *err = cJSON_CreateObject (); cJSON_AddStringToObject (err, "message", buf); cJSON_AddNumberToObject (err, "line", line + 1); /* 1-based for user display */ cJSON_AddNumberToObject (err, "column", col + 1); cJSON_AddNumberToObject (err, "offset", (int)(ptr - s->buf_start)); if (!s->errors) { s->errors = cJSON_CreateArray (); } cJSON_AddItemToArray (s->errors, err); s->has_error = 1; } /* Decode escape sequences in a string literal into dst. Returns decoded length. */ static int ast_decode_string (const uint8_t *src, int len, char *dst) { const uint8_t *end = src + len; char *out = dst; while (src < end) { if (*src == '\\' && src + 1 < end) { src++; switch (*src) { case 'n': *out++ = '\n'; src++; break; case 't': *out++ = '\t'; src++; break; case 'r': *out++ = '\r'; src++; break; case '\\': *out++ = '\\'; src++; break; case '\'': *out++ = '\''; src++; break; case '\"': *out++ = '\"'; src++; break; case '0': *out++ = '\0'; src++; break; case 'b': *out++ = '\b'; src++; break; case 'f': *out++ = '\f'; src++; break; case 'v': *out++ = '\v'; src++; break; case 'u': { src++; unsigned int cp = 0; for (int i = 0; i < 4 && src < end; i++, src++) { cp <<= 4; if (*src >= '0' && *src <= '9') cp |= *src - '0'; else if (*src >= 'a' && *src <= 'f') cp |= *src - 'a' + 10; else if (*src >= 'A' && *src <= 'F') cp |= *src - 'A' + 10; else break; } out += unicode_to_utf8 ((uint8_t *)out, cp); } break; default: *out++ = *src++; break; } } else { *out++ = *src++; } } return out - dst; } int ast_next_token (ASTParseState *s) { const uint8_t *p; int c; BOOL ident_has_escape; ast_free_token (s); p = s->buf_ptr; s->got_lf = FALSE; redo: s->token_ptr = p; c = *p; switch (c) { case 0: if (p >= s->buf_end) { s->token_val = TOK_EOF; } else { goto def_token; } break; case '`': { const uint8_t *start = p; p++; while (p < s->buf_end && *p != '`') { if (*p == '\\' && p + 1 < s->buf_end) { p += 2; continue; } if (*p == '$' && p + 1 < s->buf_end && p[1] == '{') { p += 2; int depth = 1; while (p < s->buf_end && depth > 0) { if (*p == '{') { depth++; p++; } else if (*p == '}') { depth--; p++; } else if (*p == '\'' || *p == '"' || *p == '`') { int q = *p; p++; while (p < s->buf_end && *p != q) { if (*p == '\\' && p + 1 < s->buf_end) p++; p++; } if (p < s->buf_end) p++; } else { p++; } } continue; } p++; } if (p >= s->buf_end) { ast_error (s, start, "unterminated template literal"); s->buf_ptr = p; goto redo; } p++; s->token_val = TOK_TEMPLATE; { const uint8_t *raw = start + 1; int raw_len = p - start - 2; BOOL has_escape = FALSE; for (int i = 0; i < raw_len; i++) { if (raw[i] == '\\') { has_escape = TRUE; break; } } if (has_escape) { char *buf = sys_malloc (raw_len * 4 + 1); int decoded_len = ast_decode_string (raw, raw_len, buf); s->decoded_str = buf; s->token_u.str.str = buf; s->token_u.str.len = decoded_len; } else { s->token_u.str.str = (const char *)raw; s->token_u.str.len = raw_len; } } } break; case '\'': case '\"': { const uint8_t *start = p; int quote = c; p++; while (p < s->buf_end && *p != quote) { if (*p == '\\' && p + 1 < s->buf_end) p++; p++; } if (p >= s->buf_end) { ast_error (s, start, "unterminated string literal"); s->buf_ptr = p; goto redo; } p++; /* Store the string content without quotes, decoding escape sequences */ s->token_val = TOK_STRING; { const uint8_t *raw = start + 1; int raw_len = p - start - 2; /* Check if any escape sequences need decoding */ BOOL has_escape = FALSE; for (int i = 0; i < raw_len; i++) { if (raw[i] == '\\') { has_escape = TRUE; break; } } if (has_escape) { char *buf = sys_malloc (raw_len * 4 + 1); int decoded_len = ast_decode_string (raw, raw_len, buf); s->decoded_str = buf; s->token_u.str.str = buf; s->token_u.str.len = decoded_len; } else { s->token_u.str.str = (const char *)raw; s->token_u.str.len = raw_len; } } } break; case '\r': if (p[1] == '\n') p++; /* fall through */ case '\n': p++; s->got_lf = TRUE; goto redo; case '\f': case '\v': case ' ': case '\t': p++; goto redo; case '/': if (p[1] == '*') { const uint8_t *comment_start = p; p += 2; BOOL found_end = FALSE; while (p < s->buf_end) { if (p[0] == '*' && p + 1 < s->buf_end && p[1] == '/') { p += 2; found_end = TRUE; break; } if (*p == '\n' || *p == '\r') s->got_lf = TRUE; p++; } if (!found_end) ast_error (s, comment_start, "unterminated block comment"); goto redo; } else if (p[1] == '/') { p += 2; while (p < s->buf_end && *p != '\n' && *p != '\r') p++; goto redo; } else if (p[1] == '=') { p += 2; s->token_val = TOK_DIV_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { p++; s->token_val = c; } break; case '\\': goto def_token; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case '$': { const uint8_t *start = p; ident_has_escape = FALSE; p++; while (p < s->buf_end) { c = *p; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '$' || c == '?' || c == '!') { p++; } else if (c >= 0x80) { /* unicode identifier */ p++; while (p < s->buf_end && (*p & 0xc0) == 0x80) p++; } else { break; } } size_t len = p - start; s->token_u.ident.str = (const char *)start; s->token_u.ident.len = len; s->token_u.ident.has_escape = ident_has_escape; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; /* Check for keywords */ if (len == 2 && !memcmp (start, "if", 2)) s->token_val = TOK_IF; else if (len == 2 && !memcmp (start, "in", 2)) s->token_val = TOK_IN; else if (len == 2 && !memcmp (start, "do", 2)) s->token_val = TOK_DO; else if (len == 2 && !memcmp (start, "go", 2)) s->token_val = TOK_GO; else if (len == 3 && !memcmp (start, "var", 3)) s->token_val = TOK_VAR; else if (len == 3 && !memcmp (start, "def", 3)) s->token_val = TOK_DEF; else if (len == 3 && !memcmp (start, "for", 3)) s->token_val = TOK_FOR; else if (len == 4 && !memcmp (start, "else", 4)) s->token_val = TOK_ELSE; else if (len == 4 && !memcmp (start, "this", 4)) s->token_val = TOK_THIS; else if (len == 4 && !memcmp (start, "null", 4)) s->token_val = TOK_NULL; else if (len == 4 && !memcmp (start, "true", 4)) s->token_val = TOK_TRUE; else if (len == 5 && !memcmp (start, "false", 5)) s->token_val = TOK_FALSE; else if (len == 5 && !memcmp (start, "while", 5)) s->token_val = TOK_WHILE; else if (len == 5 && !memcmp (start, "break", 5)) s->token_val = TOK_BREAK; else if (len == 6 && !memcmp (start, "return", 6)) s->token_val = TOK_RETURN; else if (len == 6 && !memcmp (start, "delete", 6)) s->token_val = TOK_DELETE; else if (len == 7 && !memcmp (start, "disrupt", 7)) s->token_val = TOK_DISRUPT; else if (len == 8 && !memcmp (start, "function", 8)) s->token_val = TOK_FUNCTION; else if (len == 8 && !memcmp (start, "continue", 8)) s->token_val = TOK_CONTINUE; else if (len == 10 && !memcmp (start, "disruption", 10)) s->token_val = TOK_DISRUPTION; } break; case '.': if (p[1] >= '0' && p[1] <= '9') { goto parse_number; } else { goto def_token; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': parse_number: { const uint8_t *start = p; BOOL is_float = FALSE; /* hex */ if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { p += 2; const uint8_t *digits_start = p; while (p < s->buf_end && ((c = *p, (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == '_'))) p++; if (p == digits_start) ast_error (s, start, "malformed hex number: no digits after '0x'"); } else if (p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) { p += 2; const uint8_t *digits_start = p; while (p < s->buf_end && (*p == '0' || *p == '1' || *p == '_')) p++; if (p == digits_start) ast_error (s, start, "malformed binary number: no digits after '0b'"); } else if (p[0] == '0' && (p[1] == 'o' || p[1] == 'O')) { p += 2; const uint8_t *digits_start = p; while (p < s->buf_end && (*p >= '0' && *p <= '7')) p++; if (p == digits_start) ast_error (s, start, "malformed octal number: no digits after '0o'"); } else { while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++; if (p < s->buf_end && *p == '.') { is_float = TRUE; p++; while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++; } if (p < s->buf_end && (*p == 'e' || *p == 'E')) { is_float = TRUE; p++; if (p < s->buf_end && (*p == '+' || *p == '-')) p++; const uint8_t *exp_start = p; while (p < s->buf_end && (*p >= '0' && *p <= '9')) p++; if (p == exp_start) ast_error (s, start, "malformed number: no digits after exponent"); } } s->token_val = TOK_NUMBER; /* Parse the number value */ char *numstr = sys_malloc (p - start + 1); memcpy (numstr, start, p - start); numstr[p - start] = '\0'; double val = strtod (numstr, NULL); sys_free (numstr); s->token_u.num.val = val; } break; case '*': if (p[1] == '=') { p += 2; s->token_val = TOK_MUL_ASSIGN; } else if (p[1] == '*') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_POW_ASSIGN; } else { p += 2; s->token_val = TOK_POW; } } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '%': if (p[1] == '=') { p += 2; s->token_val = TOK_MOD_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '+': if (p[1] == '=') { p += 2; s->token_val = TOK_PLUS_ASSIGN; } else if (p[1] == '+') { p += 2; s->token_val = TOK_INC; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '-': if (p[1] == '=') { p += 2; s->token_val = TOK_MINUS_ASSIGN; } else if (p[1] == '-') { p += 2; s->token_val = TOK_DEC; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '<': if (p[1] == '=' && p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[1] == '=') { p += 2; s->token_val = TOK_LTE; } else if (p[1] == '<') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_SHL_ASSIGN; } else { p += 2; s->token_val = TOK_SHL; } } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '>': if (p[1] == '=' && p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[1] == '=') { p += 2; s->token_val = TOK_GTE; } else if (p[1] == '>') { if (p[2] == '>') { if (p[3] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 4; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 4; } else if (p[3] == '=') { p += 4; s->token_val = TOK_SHR_ASSIGN; } else { p += 3; s->token_val = TOK_SHR; } } else if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_SAR_ASSIGN; } else { p += 2; s->token_val = TOK_SAR; } } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '=': if (p[1] == '=') { if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_EQ; } else { p += 2; s->token_val = TOK_EQ; } } else if (p[1] == '>') { p += 2; s->token_val = TOK_ARROW; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '!': if (p[1] == '=') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_NEQ; } else { p += 2; s->token_val = TOK_NEQ; } } else { goto def_token; } break; case '&': if (p[1] == '&') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_LAND_ASSIGN; } else { p += 2; s->token_val = TOK_LAND; } } else if (p[1] == '=') { p += 2; s->token_val = TOK_AND_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '|': if (p[1] == '|') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_LOR_ASSIGN; } else { p += 2; s->token_val = TOK_LOR; } } else if (p[1] == '=') { p += 2; s->token_val = TOK_OR_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '^': if (p[1] == '=') { p += 2; s->token_val = TOK_XOR_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '[': if (p[1] == ']' && p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else { goto def_token; } break; case '~': if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '?': goto def_token; default: def_token: p++; s->token_val = c; break; } s->buf_ptr = p; return 0; } /* Tokenizer function that does NOT skip whitespace/comments - emits them as tokens */ int tokenize_next (ASTParseState *s) { const uint8_t *p; int c; BOOL ident_has_escape; ast_free_token (s); p = s->buf_ptr; s->got_lf = FALSE; s->token_ptr = p; c = *p; switch (c) { case 0: if (p >= s->buf_end) { s->token_val = TOK_EOF; } else { goto def_token; } break; case '`': { const uint8_t *start = p; p++; while (p < s->buf_end && *p != '`') { if (*p == '\\' && p + 1 < s->buf_end) { p += 2; continue; } if (*p == '$' && p + 1 < s->buf_end && p[1] == '{') { p += 2; int depth = 1; while (p < s->buf_end && depth > 0) { if (*p == '{') { depth++; p++; } else if (*p == '}') { depth--; p++; } else if (*p == '\'' || *p == '"' || *p == '`') { int q = *p; p++; while (p < s->buf_end && *p != q) { if (*p == '\\' && p + 1 < s->buf_end) p++; p++; } if (p < s->buf_end) p++; } else { p++; } } continue; } p++; } if (p >= s->buf_end) { ast_error (s, start, "unterminated template literal"); s->token_val = TOK_ERROR; s->token_u.str.str = (const char *)(start + 1); s->token_u.str.len = p - start - 1; } else { p++; s->token_val = TOK_TEMPLATE; s->token_u.str.str = (const char *)(start + 1); s->token_u.str.len = p - start - 2; } } break; case '\'': case '\"': { const uint8_t *start = p; int quote = c; p++; while (p < s->buf_end && *p != quote) { if (*p == '\\' && p + 1 < s->buf_end) p++; p++; } if (p >= s->buf_end) { ast_error (s, start, "unterminated string literal"); s->token_val = TOK_ERROR; s->token_u.str.str = (const char *)(start + 1); s->token_u.str.len = p - start - 1; } else { p++; s->token_val = TOK_STRING; s->token_u.str.str = (const char *)(start + 1); s->token_u.str.len = p - start - 2; } } break; case '\r': if (p[1] == '\n') p++; /* fall through */ case '\n': p++; s->got_lf = TRUE; s->token_val = TOK_NEWLINE; break; case '\f': case '\v': case ' ': case '\t': { /* Collect consecutive whitespace (excluding newlines) */ while (p < s->buf_end && (*p == ' ' || *p == '\t' || *p == '\f' || *p == '\v')) p++; s->token_val = TOK_SPACE; } break; case '/': if (p[1] == '*') { /* Multi-line comment */ const uint8_t *comment_start = p; p += 2; BOOL found_end = FALSE; while (p < s->buf_end) { if (p[0] == '*' && p + 1 < s->buf_end && p[1] == '/') { p += 2; found_end = TRUE; break; } if (*p == '\n' || *p == '\r') s->got_lf = TRUE; p++; } if (!found_end) { ast_error (s, comment_start, "unterminated block comment"); s->token_val = TOK_ERROR; } else { s->token_val = TOK_COMMENT; } } else if (p[1] == '/') { /* Single-line comment */ p += 2; while (p < s->buf_end && *p != '\n' && *p != '\r') p++; s->token_val = TOK_COMMENT; } else if (p[1] == '=') { p += 2; s->token_val = TOK_DIV_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { p++; s->token_val = c; } break; case '\\': goto def_token; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case '$': { const uint8_t *start = p; ident_has_escape = FALSE; p++; while (p < s->buf_end) { c = *p; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '$' || c == '?' || c == '!') { p++; } else if (c >= 0x80) { p++; while (p < s->buf_end && (*p & 0xc0) == 0x80) p++; } else { break; } } size_t len = p - start; s->token_u.ident.str = (const char *)start; s->token_u.ident.len = len; s->token_u.ident.has_escape = ident_has_escape; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; /* Check for keywords */ if (len == 2 && !memcmp (start, "if", 2)) s->token_val = TOK_IF; else if (len == 2 && !memcmp (start, "in", 2)) s->token_val = TOK_IN; else if (len == 2 && !memcmp (start, "do", 2)) s->token_val = TOK_DO; else if (len == 2 && !memcmp (start, "go", 2)) s->token_val = TOK_GO; else if (len == 3 && !memcmp (start, "var", 3)) s->token_val = TOK_VAR; else if (len == 3 && !memcmp (start, "def", 3)) s->token_val = TOK_DEF; else if (len == 3 && !memcmp (start, "for", 3)) s->token_val = TOK_FOR; else if (len == 4 && !memcmp (start, "else", 4)) s->token_val = TOK_ELSE; else if (len == 4 && !memcmp (start, "this", 4)) s->token_val = TOK_THIS; else if (len == 4 && !memcmp (start, "null", 4)) s->token_val = TOK_NULL; else if (len == 4 && !memcmp (start, "true", 4)) s->token_val = TOK_TRUE; else if (len == 5 && !memcmp (start, "false", 5)) s->token_val = TOK_FALSE; else if (len == 5 && !memcmp (start, "while", 5)) s->token_val = TOK_WHILE; else if (len == 5 && !memcmp (start, "break", 5)) s->token_val = TOK_BREAK; else if (len == 6 && !memcmp (start, "return", 6)) s->token_val = TOK_RETURN; else if (len == 6 && !memcmp (start, "delete", 6)) s->token_val = TOK_DELETE; else if (len == 7 && !memcmp (start, "disrupt", 7)) s->token_val = TOK_DISRUPT; else if (len == 8 && !memcmp (start, "function", 8)) s->token_val = TOK_FUNCTION; else if (len == 8 && !memcmp (start, "continue", 8)) s->token_val = TOK_CONTINUE; else if (len == 10 && !memcmp (start, "disruption", 10)) s->token_val = TOK_DISRUPTION; } break; case '.': if (p[1] >= '0' && p[1] <= '9') { goto tokenize_number; } else { goto def_token; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': tokenize_number: { const uint8_t *start = p; BOOL is_float = FALSE; if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { p += 2; const uint8_t *digits_start = p; while (p < s->buf_end && ((c = *p, (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == '_'))) p++; if (p == digits_start) ast_error (s, start, "malformed hex number: no digits after '0x'"); } else if (p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) { p += 2; const uint8_t *digits_start = p; while (p < s->buf_end && (*p == '0' || *p == '1' || *p == '_')) p++; if (p == digits_start) ast_error (s, start, "malformed binary number: no digits after '0b'"); } else if (p[0] == '0' && (p[1] == 'o' || p[1] == 'O')) { p += 2; const uint8_t *digits_start = p; while (p < s->buf_end && (*p >= '0' && *p <= '7')) p++; if (p == digits_start) ast_error (s, start, "malformed octal number: no digits after '0o'"); } else { while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++; if (p < s->buf_end && *p == '.') { is_float = TRUE; p++; while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++; } if (p < s->buf_end && (*p == 'e' || *p == 'E')) { is_float = TRUE; p++; if (p < s->buf_end && (*p == '+' || *p == '-')) p++; const uint8_t *exp_start = p; while (p < s->buf_end && (*p >= '0' && *p <= '9')) p++; if (p == exp_start) ast_error (s, start, "malformed number: no digits after exponent"); } } (void)is_float; s->token_val = TOK_NUMBER; char *numstr = sys_malloc (p - start + 1); memcpy (numstr, start, p - start); numstr[p - start] = '\0'; double val = strtod (numstr, NULL); sys_free (numstr); s->token_u.num.val = val; } break; case '*': if (p[1] == '=') { p += 2; s->token_val = TOK_MUL_ASSIGN; } else if (p[1] == '*') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_POW_ASSIGN; } else { p += 2; s->token_val = TOK_POW; } } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '%': if (p[1] == '=') { p += 2; s->token_val = TOK_MOD_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '+': if (p[1] == '=') { p += 2; s->token_val = TOK_PLUS_ASSIGN; } else if (p[1] == '+') { p += 2; s->token_val = TOK_INC; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '-': if (p[1] == '=') { p += 2; s->token_val = TOK_MINUS_ASSIGN; } else if (p[1] == '-') { p += 2; s->token_val = TOK_DEC; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '<': if (p[1] == '=' && p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[1] == '=') { p += 2; s->token_val = TOK_LTE; } else if (p[1] == '<') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_SHL_ASSIGN; } else { p += 2; s->token_val = TOK_SHL; } } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '>': if (p[1] == '=' && p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[1] == '=') { p += 2; s->token_val = TOK_GTE; } else if (p[1] == '>') { if (p[2] == '>') { if (p[3] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 4; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 4; } else if (p[3] == '=') { p += 4; s->token_val = TOK_SHR_ASSIGN; } else { p += 3; s->token_val = TOK_SHR; } } else if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_SAR_ASSIGN; } else { p += 2; s->token_val = TOK_SAR; } } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '=': if (p[1] == '=') { if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_EQ; } else { p += 2; s->token_val = TOK_EQ; } } else if (p[1] == '>') { p += 2; s->token_val = TOK_ARROW; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '!': if (p[1] == '=') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_NEQ; } else { p += 2; s->token_val = TOK_NEQ; } } else { goto def_token; } break; case '&': if (p[1] == '&') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_LAND_ASSIGN; } else { p += 2; s->token_val = TOK_LAND; } } else if (p[1] == '=') { p += 2; s->token_val = TOK_AND_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '|': if (p[1] == '|') { if (p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else if (p[2] == '=') { p += 3; s->token_val = TOK_LOR_ASSIGN; } else { p += 2; s->token_val = TOK_LOR; } } else if (p[1] == '=') { p += 2; s->token_val = TOK_OR_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '^': if (p[1] == '=') { p += 2; s->token_val = TOK_XOR_ASSIGN; } else if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '[': if (p[1] == ']' && p[2] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 3; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 3; } else { goto def_token; } break; case '~': if (p[1] == '!') { s->token_u.ident.str = (const char *)p; s->token_u.ident.len = 2; s->token_u.ident.has_escape = FALSE; s->token_u.ident.is_reserved = FALSE; s->token_val = TOK_IDENT; p += 2; } else { goto def_token; } break; case '?': goto def_token; default: def_token: p++; s->token_val = c; break; } s->buf_ptr = p; return 0; } static cJSON *build_token_object (ASTParseState *s) { cJSON *tok = cJSON_CreateObject (); const char *kind = ast_token_kind_str (s->token_val); cJSON_AddStringToObject (tok, "kind", kind); /* Position info */ int at = (int)(s->token_ptr - s->buf_start); int from_row, from_col; ast_get_line_col (s, s->token_ptr, &from_row, &from_col); int to_row, to_col; ast_get_line_col (s, s->buf_ptr, &to_row, &to_col); cJSON_AddNumberToObject (tok, "at", at); cJSON_AddNumberToObject (tok, "from_row", from_row); cJSON_AddNumberToObject (tok, "from_column", from_col); cJSON_AddNumberToObject (tok, "to_row", to_row); cJSON_AddNumberToObject (tok, "to_column", to_col); /* Value field based on token type */ switch (s->token_val) { case TOK_NUMBER: { /* Store original source text as value */ size_t len = s->buf_ptr - s->token_ptr; char *text = sys_malloc (len + 1); memcpy (text, s->token_ptr, len); text[len] = '\0'; cJSON_AddStringToObject (tok, "value", text); sys_free (text); /* Store parsed number */ double d = s->token_u.num.val; cJSON_AddNumberToObject (tok, "number", d); } break; case TOK_STRING: case TOK_TEMPLATE: { cjson_add_strn (tok, "value", s->token_u.str.str, s->token_u.str.len); } break; case TOK_IDENT: { cjson_add_strn (tok, "value", s->token_u.ident.str, s->token_u.ident.len); } break; case TOK_ERROR: { /* Store the raw source text as value */ size_t len = s->buf_ptr - s->token_ptr; char *text = sys_malloc (len + 1); memcpy (text, s->token_ptr, len); text[len] = '\0'; cJSON_AddStringToObject (tok, "value", text); sys_free (text); } break; case TOK_COMMENT: case TOK_SPACE: case TOK_NEWLINE: { /* Store the raw source text */ size_t len = s->buf_ptr - s->token_ptr; char *text = sys_malloc (len + 1); memcpy (text, s->token_ptr, len); text[len] = '\0'; cJSON_AddStringToObject (tok, "value", text); sys_free (text); } break; default: /* No value field for operators/punctuators/keywords */ break; } return tok; } char *JS_Tokenize (const char *source, size_t len, const char *filename) { ASTParseState s; memset (&s, 0, sizeof (s)); s.filename = filename; s.buf_start = (const uint8_t *)source; s.buf_ptr = (const uint8_t *)source; s.buf_end = (const uint8_t *)source + len; s.function_nr = 0; s.errors = NULL; s.has_error = 0; s.lc_cache.ptr = s.buf_start; s.lc_cache.buf_start = s.buf_start; cJSON *root = cJSON_CreateObject (); cJSON_AddStringToObject (root, "filename", filename); cJSON *tokens = cJSON_AddArrayToObject (root, "tokens"); /* Tokenize all tokens including whitespace */ while (1) { tokenize_next (&s); cJSON *tok = build_token_object (&s); cJSON_AddItemToArray (tokens, tok); if (s.token_val == TOK_EOF) break; } /* Add errors to output if any */ if (s.errors) { cJSON_AddItemToObject (root, "errors", s.errors); } char *json = cJSON_PrintUnformatted (root); cJSON_Delete (root); return json; } /* ============================================================ MACH Compiler — AST directly to binary JSCodeRegister ============================================================ */ /* Variable kinds */ #define MACH_VAR_ARG 0 #define MACH_VAR_LOCAL 1 #define MACH_VAR_CLOSED 2 /* Variable resolution result */ typedef enum MachVarResolution { MACH_VAR_LOCAL_SLOT, /* variable is in current scope */ MACH_VAR_CLOSURE, /* variable is in parent scope */ MACH_VAR_UNBOUND /* variable not found in any scope */ } MachVarResolution;