1433 lines
46 KiB
C
1433 lines
46 KiB
C
/*
|
|
* QuickJS Javascript Engine
|
|
*
|
|
* Copyright (c) 2017-2025 Fabrice Bellard
|
|
* Copyright (c) 2017-2025 Charlie Gordon
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "quickjs-internal.h"
|
|
|
|
static const char *ast_token_kind_str(int token_val) {
|
|
static char single_char[2] = {0, 0};
|
|
switch (token_val) {
|
|
case TOK_NUMBER: return "number";
|
|
case TOK_STRING: return "text";
|
|
case TOK_TEMPLATE: return "text";
|
|
case TOK_IDENT: return "name";
|
|
case TOK_COMMENT: return "comment";
|
|
case TOK_NEWLINE: return "newline";
|
|
case TOK_SPACE: return "space";
|
|
case TOK_REGEXP: return "regexp";
|
|
case TOK_PRIVATE_NAME: return "private_name";
|
|
case TOK_EOF: return "eof";
|
|
case TOK_ERROR: return "error";
|
|
/* compound operators */
|
|
case TOK_MUL_ASSIGN: return "*=";
|
|
case TOK_DIV_ASSIGN: return "/=";
|
|
case TOK_MOD_ASSIGN: return "%=";
|
|
case TOK_PLUS_ASSIGN: return "+=";
|
|
case TOK_MINUS_ASSIGN: return "-=";
|
|
case TOK_SHL_ASSIGN: return "<<=";
|
|
case TOK_SAR_ASSIGN: return ">>=";
|
|
case TOK_SHR_ASSIGN: return ">>>=";
|
|
case TOK_AND_ASSIGN: return "&=";
|
|
case TOK_XOR_ASSIGN: return "^=";
|
|
case TOK_OR_ASSIGN: return "|=";
|
|
case TOK_POW_ASSIGN: return "**=";
|
|
case TOK_LAND_ASSIGN: return "&&=";
|
|
case TOK_LOR_ASSIGN: return "||=";
|
|
case TOK_DEC: return "--";
|
|
case TOK_INC: return "++";
|
|
case TOK_SHL: return "<<";
|
|
case TOK_SAR: return ">>";
|
|
case TOK_SHR: return ">>>";
|
|
case TOK_LT: return "<";
|
|
case TOK_LTE: return "<=";
|
|
case TOK_GT: return ">";
|
|
case TOK_GTE: return ">=";
|
|
case TOK_EQ: return "==";
|
|
case TOK_STRICT_EQ: return "===";
|
|
case TOK_NEQ: return "!=";
|
|
case TOK_STRICT_NEQ: return "!==";
|
|
case TOK_LAND: return "&&";
|
|
case TOK_LOR: return "||";
|
|
case TOK_POW: return "**";
|
|
case TOK_ARROW: return "=>";
|
|
/* keywords */
|
|
case TOK_NULL: return "null";
|
|
case TOK_FALSE: return "false";
|
|
case TOK_TRUE: return "true";
|
|
case TOK_IF: return "if";
|
|
case TOK_ELSE: return "else";
|
|
case TOK_RETURN: return "return";
|
|
case TOK_GO: return "go";
|
|
case TOK_VAR: return "var";
|
|
case TOK_DEF: return "def";
|
|
case TOK_THIS: return "this";
|
|
case TOK_DELETE: return "delete";
|
|
case TOK_IN: return "in";
|
|
case TOK_DO: return "do";
|
|
case TOK_WHILE: return "while";
|
|
case TOK_FOR: return "for";
|
|
case TOK_BREAK: return "break";
|
|
case TOK_CONTINUE: return "continue";
|
|
case TOK_DISRUPT: return "disrupt";
|
|
case TOK_DISRUPTION: return "disruption";
|
|
case TOK_FUNCTION: return "function";
|
|
case TOK_DEBUGGER: return "debugger";
|
|
case TOK_WITH: return "with";
|
|
case TOK_CLASS: return "class";
|
|
case TOK_CONST: return "const";
|
|
case TOK_ENUM: return "enum";
|
|
case TOK_EXPORT: return "export";
|
|
case TOK_EXTENDS: return "extends";
|
|
case TOK_IMPORT: return "import";
|
|
case TOK_SUPER: return "super";
|
|
case TOK_IMPLEMENTS: return "implements";
|
|
case TOK_INTERFACE: return "interface";
|
|
case TOK_LET: return "let";
|
|
case TOK_PRIVATE: return "private";
|
|
case TOK_PROTECTED: return "protected";
|
|
case TOK_PUBLIC: return "public";
|
|
case TOK_STATIC: return "static";
|
|
case TOK_YIELD: return "yield";
|
|
case TOK_AWAIT: return "await";
|
|
case TOK_OF: return "of";
|
|
default:
|
|
/* Single character tokens */
|
|
if (token_val >= 0 && token_val < 128) {
|
|
single_char[0] = (char)token_val;
|
|
return single_char;
|
|
}
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
/* ============================================================
|
|
AST JSON Output Implementation
|
|
============================================================ */
|
|
|
|
|
|
/* Add a length-delimited string to a cJSON object (source pointers aren't null-terminated) */
|
|
void cjson_add_strn (cJSON *obj, const char *key, const char *str, size_t len) {
|
|
char buf[256];
|
|
char *tmp = (len < sizeof (buf)) ? buf : sys_malloc (len + 1);
|
|
memcpy (tmp, str, len);
|
|
tmp[len] = '\0';
|
|
cJSON_AddStringToObject (obj, key, tmp);
|
|
if (tmp != buf) sys_free (tmp);
|
|
}
|
|
|
|
/* Compare a length-delimited token string against a null-terminated literal */
|
|
inline BOOL tok_eq (const char *str, size_t len, const char *lit) {
|
|
size_t ll = strlen (lit);
|
|
return len == ll && memcmp (str, lit, ll) == 0;
|
|
}
|
|
|
|
cJSON *ast_parse_expr (ASTParseState *s);
|
|
cJSON *ast_parse_assign_expr (ASTParseState *s);
|
|
cJSON *ast_parse_statement (ASTParseState *s);
|
|
void ast_sync_to_statement (ASTParseState *s);
|
|
cJSON *ast_parse_block_statements (ASTParseState *s);
|
|
cJSON *ast_parse_function_inner (ASTParseState *s, BOOL is_expr);
|
|
cJSON *ast_parse_arrow_function (ASTParseState *s);
|
|
|
|
/* Check if we're looking at an arrow function starting with '(' */
|
|
BOOL ast_is_arrow_function (ASTParseState *s) {
|
|
if (s->token_val != '(') return FALSE;
|
|
const uint8_t *p = s->buf_ptr;
|
|
int depth = 1;
|
|
while (p < s->buf_end && depth > 0) {
|
|
uint8_t c = *p++;
|
|
if (c == '(') depth++;
|
|
else if (c == ')') depth--;
|
|
else if (c == '"' || c == '\'' || c == '`') {
|
|
/* Skip string */
|
|
uint8_t quote = c;
|
|
while (p < s->buf_end && *p != quote) {
|
|
if (*p == '\\' && p + 1 < s->buf_end) p++;
|
|
p++;
|
|
}
|
|
if (p < s->buf_end) p++;
|
|
}
|
|
}
|
|
/* Skip whitespace */
|
|
while (p < s->buf_end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) p++;
|
|
/* Check for => */
|
|
return (p + 1 < s->buf_end && p[0] == '=' && p[1] == '>');
|
|
}
|
|
|
|
void ast_free_token (ASTParseState *s) {
|
|
if (s->decoded_str) {
|
|
sys_free (s->decoded_str);
|
|
s->decoded_str = NULL;
|
|
}
|
|
}
|
|
|
|
void ast_get_line_col (ASTParseState *s, const uint8_t *ptr, int *line, int *col) {
|
|
*line = get_line_col_cached (&s->lc_cache, col, ptr);
|
|
}
|
|
|
|
cJSON *ast_node (ASTParseState *s, const char *kind, const uint8_t *start_ptr) {
|
|
cJSON *node = cJSON_CreateObject ();
|
|
cJSON_AddStringToObject (node, "kind", kind);
|
|
int at = (int)(start_ptr - s->buf_start);
|
|
int from_row, from_col;
|
|
ast_get_line_col (s, start_ptr, &from_row, &from_col);
|
|
cJSON_AddNumberToObject (node, "at", at);
|
|
cJSON_AddNumberToObject (node, "from_row", from_row);
|
|
cJSON_AddNumberToObject (node, "from_column", from_col);
|
|
return node;
|
|
}
|
|
|
|
void ast_node_end (ASTParseState *s, cJSON *node, const uint8_t *end_ptr) {
|
|
int to_row, to_col;
|
|
ast_get_line_col (s, end_ptr, &to_row, &to_col);
|
|
cJSON_AddNumberToObject (node, "to_row", to_row);
|
|
cJSON_AddNumberToObject (node, "to_column", to_col);
|
|
}
|
|
|
|
void ast_error (ASTParseState *s, const uint8_t *ptr, const char *fmt, ...) {
|
|
if (s->error_count >= 5) return;
|
|
s->error_count++;
|
|
|
|
va_list ap;
|
|
char buf[256];
|
|
int line, col;
|
|
|
|
va_start (ap, fmt);
|
|
vsnprintf (buf, sizeof(buf), fmt, ap);
|
|
va_end (ap);
|
|
|
|
ast_get_line_col (s, ptr, &line, &col);
|
|
|
|
cJSON *err = cJSON_CreateObject ();
|
|
cJSON_AddStringToObject (err, "message", buf);
|
|
cJSON_AddNumberToObject (err, "line", line + 1); /* 1-based for user display */
|
|
cJSON_AddNumberToObject (err, "column", col + 1);
|
|
cJSON_AddNumberToObject (err, "offset", (int)(ptr - s->buf_start));
|
|
|
|
if (!s->errors) {
|
|
s->errors = cJSON_CreateArray ();
|
|
}
|
|
cJSON_AddItemToArray (s->errors, err);
|
|
s->has_error = 1;
|
|
}
|
|
|
|
/* Decode escape sequences in a string literal into dst. Returns decoded length. */
|
|
static int ast_decode_string (const uint8_t *src, int len, char *dst) {
|
|
const uint8_t *end = src + len;
|
|
char *out = dst;
|
|
while (src < end) {
|
|
if (*src == '\\' && src + 1 < end) {
|
|
src++;
|
|
switch (*src) {
|
|
case 'n': *out++ = '\n'; src++; break;
|
|
case 't': *out++ = '\t'; src++; break;
|
|
case 'r': *out++ = '\r'; src++; break;
|
|
case '\\': *out++ = '\\'; src++; break;
|
|
case '\'': *out++ = '\''; src++; break;
|
|
case '\"': *out++ = '\"'; src++; break;
|
|
case '0': *out++ = '\0'; src++; break;
|
|
case 'b': *out++ = '\b'; src++; break;
|
|
case 'f': *out++ = '\f'; src++; break;
|
|
case 'v': *out++ = '\v'; src++; break;
|
|
case 'u': {
|
|
src++;
|
|
unsigned int cp = 0;
|
|
for (int i = 0; i < 4 && src < end; i++, src++) {
|
|
cp <<= 4;
|
|
if (*src >= '0' && *src <= '9') cp |= *src - '0';
|
|
else if (*src >= 'a' && *src <= 'f') cp |= *src - 'a' + 10;
|
|
else if (*src >= 'A' && *src <= 'F') cp |= *src - 'A' + 10;
|
|
else break;
|
|
}
|
|
out += unicode_to_utf8 ((uint8_t *)out, cp);
|
|
} break;
|
|
default: *out++ = *src++; break;
|
|
}
|
|
} else {
|
|
*out++ = *src++;
|
|
}
|
|
}
|
|
return out - dst;
|
|
}
|
|
|
|
int ast_next_token (ASTParseState *s) {
|
|
const uint8_t *p;
|
|
int c;
|
|
BOOL ident_has_escape;
|
|
|
|
ast_free_token (s);
|
|
p = s->buf_ptr;
|
|
s->got_lf = FALSE;
|
|
|
|
redo:
|
|
s->token_ptr = p;
|
|
c = *p;
|
|
switch (c) {
|
|
case 0:
|
|
if (p >= s->buf_end) {
|
|
s->token_val = TOK_EOF;
|
|
} else {
|
|
goto def_token;
|
|
}
|
|
break;
|
|
case '`': {
|
|
const uint8_t *start = p;
|
|
p++;
|
|
while (p < s->buf_end && *p != '`') {
|
|
if (*p == '\\' && p + 1 < s->buf_end) { p += 2; continue; }
|
|
if (*p == '$' && p + 1 < s->buf_end && p[1] == '{') {
|
|
p += 2;
|
|
int depth = 1;
|
|
while (p < s->buf_end && depth > 0) {
|
|
if (*p == '{') { depth++; p++; }
|
|
else if (*p == '}') { depth--; p++; }
|
|
else if (*p == '\'' || *p == '"' || *p == '`') {
|
|
int q = *p; p++;
|
|
while (p < s->buf_end && *p != q) {
|
|
if (*p == '\\' && p + 1 < s->buf_end) p++;
|
|
p++;
|
|
}
|
|
if (p < s->buf_end) p++;
|
|
} else { p++; }
|
|
}
|
|
continue;
|
|
}
|
|
p++;
|
|
}
|
|
if (p >= s->buf_end) {
|
|
ast_error (s, start, "unterminated template literal");
|
|
s->buf_ptr = p;
|
|
goto redo;
|
|
}
|
|
p++;
|
|
s->token_val = TOK_TEMPLATE;
|
|
{
|
|
const uint8_t *raw = start + 1;
|
|
int raw_len = p - start - 2;
|
|
BOOL has_escape = FALSE;
|
|
for (int i = 0; i < raw_len; i++) {
|
|
if (raw[i] == '\\') { has_escape = TRUE; break; }
|
|
}
|
|
if (has_escape) {
|
|
char *buf = sys_malloc (raw_len * 4 + 1);
|
|
int decoded_len = ast_decode_string (raw, raw_len, buf);
|
|
s->decoded_str = buf;
|
|
s->token_u.str.str = buf;
|
|
s->token_u.str.len = decoded_len;
|
|
} else {
|
|
s->token_u.str.str = (const char *)raw;
|
|
s->token_u.str.len = raw_len;
|
|
}
|
|
}
|
|
} break;
|
|
case '\'':
|
|
case '\"': {
|
|
const uint8_t *start = p;
|
|
int quote = c;
|
|
p++;
|
|
while (p < s->buf_end && *p != quote) {
|
|
if (*p == '\\' && p + 1 < s->buf_end) p++;
|
|
p++;
|
|
}
|
|
if (p >= s->buf_end) {
|
|
ast_error (s, start, "unterminated string literal");
|
|
s->buf_ptr = p;
|
|
goto redo;
|
|
}
|
|
p++;
|
|
/* Store the string content without quotes, decoding escape sequences */
|
|
s->token_val = TOK_STRING;
|
|
{
|
|
const uint8_t *raw = start + 1;
|
|
int raw_len = p - start - 2;
|
|
/* Check if any escape sequences need decoding */
|
|
BOOL has_escape = FALSE;
|
|
for (int i = 0; i < raw_len; i++) {
|
|
if (raw[i] == '\\') { has_escape = TRUE; break; }
|
|
}
|
|
if (has_escape) {
|
|
char *buf = sys_malloc (raw_len * 4 + 1);
|
|
int decoded_len = ast_decode_string (raw, raw_len, buf);
|
|
s->decoded_str = buf;
|
|
s->token_u.str.str = buf;
|
|
s->token_u.str.len = decoded_len;
|
|
} else {
|
|
s->token_u.str.str = (const char *)raw;
|
|
s->token_u.str.len = raw_len;
|
|
}
|
|
}
|
|
} break;
|
|
case '\r':
|
|
if (p[1] == '\n') p++;
|
|
/* fall through */
|
|
case '\n':
|
|
p++;
|
|
s->got_lf = TRUE;
|
|
goto redo;
|
|
case '\f':
|
|
case '\v':
|
|
case ' ':
|
|
case '\t':
|
|
p++;
|
|
goto redo;
|
|
case '/':
|
|
if (p[1] == '*') {
|
|
const uint8_t *comment_start = p;
|
|
p += 2;
|
|
BOOL found_end = FALSE;
|
|
while (p < s->buf_end) {
|
|
if (p[0] == '*' && p + 1 < s->buf_end && p[1] == '/') {
|
|
p += 2;
|
|
found_end = TRUE;
|
|
break;
|
|
}
|
|
if (*p == '\n' || *p == '\r') s->got_lf = TRUE;
|
|
p++;
|
|
}
|
|
if (!found_end)
|
|
ast_error (s, comment_start, "unterminated block comment");
|
|
goto redo;
|
|
} else if (p[1] == '/') {
|
|
p += 2;
|
|
while (p < s->buf_end && *p != '\n' && *p != '\r') p++;
|
|
goto redo;
|
|
} else if (p[1] == '=') {
|
|
p += 2;
|
|
s->token_val = TOK_DIV_ASSIGN;
|
|
} else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
} else {
|
|
p++;
|
|
s->token_val = c;
|
|
}
|
|
break;
|
|
case '\\':
|
|
goto def_token;
|
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
|
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
|
|
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
|
|
case 'v': case 'w': case 'x': case 'y': case 'z':
|
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
|
|
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
|
|
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
|
|
case 'V': case 'W': case 'X': case 'Y': case 'Z':
|
|
case '_': case '$': {
|
|
const uint8_t *start = p;
|
|
ident_has_escape = FALSE;
|
|
p++;
|
|
while (p < s->buf_end) {
|
|
c = *p;
|
|
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
|
(c >= '0' && c <= '9') || c == '_' || c == '$' ||
|
|
c == '?' || c == '!') {
|
|
p++;
|
|
} else if (c >= 0x80) {
|
|
/* unicode identifier */
|
|
p++;
|
|
while (p < s->buf_end && (*p & 0xc0) == 0x80) p++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
size_t len = p - start;
|
|
s->token_u.ident.str = (const char *)start;
|
|
s->token_u.ident.len = len;
|
|
s->token_u.ident.has_escape = ident_has_escape;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
|
|
/* Check for keywords */
|
|
if (len == 2 && !memcmp (start, "if", 2)) s->token_val = TOK_IF;
|
|
else if (len == 2 && !memcmp (start, "in", 2)) s->token_val = TOK_IN;
|
|
else if (len == 2 && !memcmp (start, "do", 2)) s->token_val = TOK_DO;
|
|
else if (len == 2 && !memcmp (start, "go", 2)) s->token_val = TOK_GO;
|
|
else if (len == 3 && !memcmp (start, "var", 3)) s->token_val = TOK_VAR;
|
|
else if (len == 3 && !memcmp (start, "def", 3)) s->token_val = TOK_DEF;
|
|
else if (len == 3 && !memcmp (start, "for", 3)) s->token_val = TOK_FOR;
|
|
else if (len == 4 && !memcmp (start, "else", 4)) s->token_val = TOK_ELSE;
|
|
else if (len == 4 && !memcmp (start, "this", 4)) s->token_val = TOK_THIS;
|
|
else if (len == 4 && !memcmp (start, "null", 4)) s->token_val = TOK_NULL;
|
|
else if (len == 4 && !memcmp (start, "true", 4)) s->token_val = TOK_TRUE;
|
|
else if (len == 5 && !memcmp (start, "false", 5)) s->token_val = TOK_FALSE;
|
|
else if (len == 5 && !memcmp (start, "while", 5)) s->token_val = TOK_WHILE;
|
|
else if (len == 5 && !memcmp (start, "break", 5)) s->token_val = TOK_BREAK;
|
|
else if (len == 6 && !memcmp (start, "return", 6)) s->token_val = TOK_RETURN;
|
|
else if (len == 6 && !memcmp (start, "delete", 6)) s->token_val = TOK_DELETE;
|
|
else if (len == 7 && !memcmp (start, "disrupt", 7)) s->token_val = TOK_DISRUPT;
|
|
else if (len == 8 && !memcmp (start, "function", 8)) s->token_val = TOK_FUNCTION;
|
|
else if (len == 8 && !memcmp (start, "continue", 8)) s->token_val = TOK_CONTINUE;
|
|
else if (len == 10 && !memcmp (start, "disruption", 10)) s->token_val = TOK_DISRUPTION;
|
|
} break;
|
|
case '.':
|
|
if (p[1] >= '0' && p[1] <= '9') {
|
|
goto parse_number;
|
|
} else {
|
|
goto def_token;
|
|
}
|
|
break;
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
parse_number: {
|
|
const uint8_t *start = p;
|
|
BOOL is_float = FALSE;
|
|
/* hex */
|
|
if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
|
|
p += 2;
|
|
const uint8_t *digits_start = p;
|
|
while (p < s->buf_end && ((c = *p, (c >= '0' && c <= '9') ||
|
|
(c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == '_'))) p++;
|
|
if (p == digits_start)
|
|
ast_error (s, start, "malformed hex number: no digits after '0x'");
|
|
} else if (p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) {
|
|
p += 2;
|
|
const uint8_t *digits_start = p;
|
|
while (p < s->buf_end && (*p == '0' || *p == '1' || *p == '_')) p++;
|
|
if (p == digits_start)
|
|
ast_error (s, start, "malformed binary number: no digits after '0b'");
|
|
} else if (p[0] == '0' && (p[1] == 'o' || p[1] == 'O')) {
|
|
p += 2;
|
|
const uint8_t *digits_start = p;
|
|
while (p < s->buf_end && (*p >= '0' && *p <= '7')) p++;
|
|
if (p == digits_start)
|
|
ast_error (s, start, "malformed octal number: no digits after '0o'");
|
|
} else {
|
|
while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++;
|
|
if (p < s->buf_end && *p == '.') {
|
|
is_float = TRUE;
|
|
p++;
|
|
while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++;
|
|
}
|
|
if (p < s->buf_end && (*p == 'e' || *p == 'E')) {
|
|
is_float = TRUE;
|
|
p++;
|
|
if (p < s->buf_end && (*p == '+' || *p == '-')) p++;
|
|
const uint8_t *exp_start = p;
|
|
while (p < s->buf_end && (*p >= '0' && *p <= '9')) p++;
|
|
if (p == exp_start)
|
|
ast_error (s, start, "malformed number: no digits after exponent");
|
|
}
|
|
}
|
|
s->token_val = TOK_NUMBER;
|
|
/* Parse the number value */
|
|
char *numstr = sys_malloc (p - start + 1);
|
|
memcpy (numstr, start, p - start);
|
|
numstr[p - start] = '\0';
|
|
double val = strtod (numstr, NULL);
|
|
sys_free (numstr);
|
|
s->token_u.num.val = val;
|
|
} break;
|
|
case '*':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_MUL_ASSIGN; }
|
|
else if (p[1] == '*') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_POW_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_POW; }
|
|
}
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '%':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_MOD_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '+':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_PLUS_ASSIGN; }
|
|
else if (p[1] == '+') { p += 2; s->token_val = TOK_INC; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '-':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_MINUS_ASSIGN; }
|
|
else if (p[1] == '-') { p += 2; s->token_val = TOK_DEC; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '<':
|
|
if (p[1] == '=' && p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_LTE; }
|
|
else if (p[1] == '<') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_SHL_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_SHL; }
|
|
}
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '>':
|
|
if (p[1] == '=' && p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_GTE; }
|
|
else if (p[1] == '>') {
|
|
if (p[2] == '>') {
|
|
if (p[3] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 4;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 4;
|
|
}
|
|
else if (p[3] == '=') { p += 4; s->token_val = TOK_SHR_ASSIGN; }
|
|
else { p += 3; s->token_val = TOK_SHR; }
|
|
}
|
|
else if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_SAR_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_SAR; }
|
|
}
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '=':
|
|
if (p[1] == '=') {
|
|
if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_EQ; }
|
|
else { p += 2; s->token_val = TOK_EQ; }
|
|
} else if (p[1] == '>') { p += 2; s->token_val = TOK_ARROW; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '!':
|
|
if (p[1] == '=') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_NEQ; }
|
|
else { p += 2; s->token_val = TOK_NEQ; }
|
|
} else { goto def_token; }
|
|
break;
|
|
case '&':
|
|
if (p[1] == '&') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_LAND_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_LAND; }
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_AND_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '|':
|
|
if (p[1] == '|') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_LOR_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_LOR; }
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_OR_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '^':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_XOR_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '[':
|
|
if (p[1] == ']' && p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '~':
|
|
if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '?':
|
|
goto def_token;
|
|
default:
|
|
def_token:
|
|
p++;
|
|
s->token_val = c;
|
|
break;
|
|
}
|
|
s->buf_ptr = p;
|
|
return 0;
|
|
}
|
|
|
|
/* Tokenizer function that does NOT skip whitespace/comments - emits them as tokens */
|
|
int tokenize_next (ASTParseState *s) {
|
|
const uint8_t *p;
|
|
int c;
|
|
BOOL ident_has_escape;
|
|
|
|
ast_free_token (s);
|
|
p = s->buf_ptr;
|
|
s->got_lf = FALSE;
|
|
|
|
s->token_ptr = p;
|
|
c = *p;
|
|
switch (c) {
|
|
case 0:
|
|
if (p >= s->buf_end) {
|
|
s->token_val = TOK_EOF;
|
|
} else {
|
|
goto def_token;
|
|
}
|
|
break;
|
|
case '`': {
|
|
const uint8_t *start = p;
|
|
p++;
|
|
while (p < s->buf_end && *p != '`') {
|
|
if (*p == '\\' && p + 1 < s->buf_end) { p += 2; continue; }
|
|
if (*p == '$' && p + 1 < s->buf_end && p[1] == '{') {
|
|
p += 2;
|
|
int depth = 1;
|
|
while (p < s->buf_end && depth > 0) {
|
|
if (*p == '{') { depth++; p++; }
|
|
else if (*p == '}') { depth--; p++; }
|
|
else if (*p == '\'' || *p == '"' || *p == '`') {
|
|
int q = *p; p++;
|
|
while (p < s->buf_end && *p != q) {
|
|
if (*p == '\\' && p + 1 < s->buf_end) p++;
|
|
p++;
|
|
}
|
|
if (p < s->buf_end) p++;
|
|
} else { p++; }
|
|
}
|
|
continue;
|
|
}
|
|
p++;
|
|
}
|
|
if (p >= s->buf_end) {
|
|
ast_error (s, start, "unterminated template literal");
|
|
s->token_val = TOK_ERROR;
|
|
s->token_u.str.str = (const char *)(start + 1);
|
|
s->token_u.str.len = p - start - 1;
|
|
} else {
|
|
p++;
|
|
s->token_val = TOK_TEMPLATE;
|
|
s->token_u.str.str = (const char *)(start + 1);
|
|
s->token_u.str.len = p - start - 2;
|
|
}
|
|
} break;
|
|
case '\'':
|
|
case '\"': {
|
|
const uint8_t *start = p;
|
|
int quote = c;
|
|
p++;
|
|
while (p < s->buf_end && *p != quote) {
|
|
if (*p == '\\' && p + 1 < s->buf_end) p++;
|
|
p++;
|
|
}
|
|
if (p >= s->buf_end) {
|
|
ast_error (s, start, "unterminated string literal");
|
|
s->token_val = TOK_ERROR;
|
|
s->token_u.str.str = (const char *)(start + 1);
|
|
s->token_u.str.len = p - start - 1;
|
|
} else {
|
|
p++;
|
|
s->token_val = TOK_STRING;
|
|
s->token_u.str.str = (const char *)(start + 1);
|
|
s->token_u.str.len = p - start - 2;
|
|
}
|
|
} break;
|
|
case '\r':
|
|
if (p[1] == '\n') p++;
|
|
/* fall through */
|
|
case '\n':
|
|
p++;
|
|
s->got_lf = TRUE;
|
|
s->token_val = TOK_NEWLINE;
|
|
break;
|
|
case '\f':
|
|
case '\v':
|
|
case ' ':
|
|
case '\t': {
|
|
/* Collect consecutive whitespace (excluding newlines) */
|
|
while (p < s->buf_end && (*p == ' ' || *p == '\t' || *p == '\f' || *p == '\v')) p++;
|
|
s->token_val = TOK_SPACE;
|
|
} break;
|
|
case '/':
|
|
if (p[1] == '*') {
|
|
/* Multi-line comment */
|
|
const uint8_t *comment_start = p;
|
|
p += 2;
|
|
BOOL found_end = FALSE;
|
|
while (p < s->buf_end) {
|
|
if (p[0] == '*' && p + 1 < s->buf_end && p[1] == '/') {
|
|
p += 2;
|
|
found_end = TRUE;
|
|
break;
|
|
}
|
|
if (*p == '\n' || *p == '\r') s->got_lf = TRUE;
|
|
p++;
|
|
}
|
|
if (!found_end) {
|
|
ast_error (s, comment_start, "unterminated block comment");
|
|
s->token_val = TOK_ERROR;
|
|
} else {
|
|
s->token_val = TOK_COMMENT;
|
|
}
|
|
} else if (p[1] == '/') {
|
|
/* Single-line comment */
|
|
p += 2;
|
|
while (p < s->buf_end && *p != '\n' && *p != '\r') p++;
|
|
s->token_val = TOK_COMMENT;
|
|
} else if (p[1] == '=') {
|
|
p += 2;
|
|
s->token_val = TOK_DIV_ASSIGN;
|
|
} else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
} else {
|
|
p++;
|
|
s->token_val = c;
|
|
}
|
|
break;
|
|
case '\\':
|
|
goto def_token;
|
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
|
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
|
|
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
|
|
case 'v': case 'w': case 'x': case 'y': case 'z':
|
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
|
|
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
|
|
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
|
|
case 'V': case 'W': case 'X': case 'Y': case 'Z':
|
|
case '_': case '$': {
|
|
const uint8_t *start = p;
|
|
ident_has_escape = FALSE;
|
|
p++;
|
|
while (p < s->buf_end) {
|
|
c = *p;
|
|
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
|
(c >= '0' && c <= '9') || c == '_' || c == '$' ||
|
|
c == '?' || c == '!') {
|
|
p++;
|
|
} else if (c >= 0x80) {
|
|
p++;
|
|
while (p < s->buf_end && (*p & 0xc0) == 0x80) p++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
size_t len = p - start;
|
|
s->token_u.ident.str = (const char *)start;
|
|
s->token_u.ident.len = len;
|
|
s->token_u.ident.has_escape = ident_has_escape;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
|
|
/* Check for keywords */
|
|
if (len == 2 && !memcmp (start, "if", 2)) s->token_val = TOK_IF;
|
|
else if (len == 2 && !memcmp (start, "in", 2)) s->token_val = TOK_IN;
|
|
else if (len == 2 && !memcmp (start, "do", 2)) s->token_val = TOK_DO;
|
|
else if (len == 2 && !memcmp (start, "go", 2)) s->token_val = TOK_GO;
|
|
else if (len == 3 && !memcmp (start, "var", 3)) s->token_val = TOK_VAR;
|
|
else if (len == 3 && !memcmp (start, "def", 3)) s->token_val = TOK_DEF;
|
|
else if (len == 3 && !memcmp (start, "for", 3)) s->token_val = TOK_FOR;
|
|
else if (len == 4 && !memcmp (start, "else", 4)) s->token_val = TOK_ELSE;
|
|
else if (len == 4 && !memcmp (start, "this", 4)) s->token_val = TOK_THIS;
|
|
else if (len == 4 && !memcmp (start, "null", 4)) s->token_val = TOK_NULL;
|
|
else if (len == 4 && !memcmp (start, "true", 4)) s->token_val = TOK_TRUE;
|
|
else if (len == 5 && !memcmp (start, "false", 5)) s->token_val = TOK_FALSE;
|
|
else if (len == 5 && !memcmp (start, "while", 5)) s->token_val = TOK_WHILE;
|
|
else if (len == 5 && !memcmp (start, "break", 5)) s->token_val = TOK_BREAK;
|
|
else if (len == 6 && !memcmp (start, "return", 6)) s->token_val = TOK_RETURN;
|
|
else if (len == 6 && !memcmp (start, "delete", 6)) s->token_val = TOK_DELETE;
|
|
else if (len == 7 && !memcmp (start, "disrupt", 7)) s->token_val = TOK_DISRUPT;
|
|
else if (len == 8 && !memcmp (start, "function", 8)) s->token_val = TOK_FUNCTION;
|
|
else if (len == 8 && !memcmp (start, "continue", 8)) s->token_val = TOK_CONTINUE;
|
|
else if (len == 10 && !memcmp (start, "disruption", 10)) s->token_val = TOK_DISRUPTION;
|
|
} break;
|
|
case '.':
|
|
if (p[1] >= '0' && p[1] <= '9') {
|
|
goto tokenize_number;
|
|
} else {
|
|
goto def_token;
|
|
}
|
|
break;
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
tokenize_number: {
|
|
const uint8_t *start = p;
|
|
BOOL is_float = FALSE;
|
|
if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
|
|
p += 2;
|
|
const uint8_t *digits_start = p;
|
|
while (p < s->buf_end && ((c = *p, (c >= '0' && c <= '9') ||
|
|
(c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == '_'))) p++;
|
|
if (p == digits_start)
|
|
ast_error (s, start, "malformed hex number: no digits after '0x'");
|
|
} else if (p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) {
|
|
p += 2;
|
|
const uint8_t *digits_start = p;
|
|
while (p < s->buf_end && (*p == '0' || *p == '1' || *p == '_')) p++;
|
|
if (p == digits_start)
|
|
ast_error (s, start, "malformed binary number: no digits after '0b'");
|
|
} else if (p[0] == '0' && (p[1] == 'o' || p[1] == 'O')) {
|
|
p += 2;
|
|
const uint8_t *digits_start = p;
|
|
while (p < s->buf_end && (*p >= '0' && *p <= '7')) p++;
|
|
if (p == digits_start)
|
|
ast_error (s, start, "malformed octal number: no digits after '0o'");
|
|
} else {
|
|
while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++;
|
|
if (p < s->buf_end && *p == '.') {
|
|
is_float = TRUE;
|
|
p++;
|
|
while (p < s->buf_end && ((*p >= '0' && *p <= '9') || *p == '_')) p++;
|
|
}
|
|
if (p < s->buf_end && (*p == 'e' || *p == 'E')) {
|
|
is_float = TRUE;
|
|
p++;
|
|
if (p < s->buf_end && (*p == '+' || *p == '-')) p++;
|
|
const uint8_t *exp_start = p;
|
|
while (p < s->buf_end && (*p >= '0' && *p <= '9')) p++;
|
|
if (p == exp_start)
|
|
ast_error (s, start, "malformed number: no digits after exponent");
|
|
}
|
|
}
|
|
(void)is_float;
|
|
s->token_val = TOK_NUMBER;
|
|
char *numstr = sys_malloc (p - start + 1);
|
|
memcpy (numstr, start, p - start);
|
|
numstr[p - start] = '\0';
|
|
double val = strtod (numstr, NULL);
|
|
sys_free (numstr);
|
|
s->token_u.num.val = val;
|
|
} break;
|
|
case '*':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_MUL_ASSIGN; }
|
|
else if (p[1] == '*') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_POW_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_POW; }
|
|
}
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '%':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_MOD_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '+':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_PLUS_ASSIGN; }
|
|
else if (p[1] == '+') { p += 2; s->token_val = TOK_INC; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '-':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_MINUS_ASSIGN; }
|
|
else if (p[1] == '-') { p += 2; s->token_val = TOK_DEC; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '<':
|
|
if (p[1] == '=' && p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_LTE; }
|
|
else if (p[1] == '<') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_SHL_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_SHL; }
|
|
}
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '>':
|
|
if (p[1] == '=' && p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_GTE; }
|
|
else if (p[1] == '>') {
|
|
if (p[2] == '>') {
|
|
if (p[3] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 4;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 4;
|
|
}
|
|
else if (p[3] == '=') { p += 4; s->token_val = TOK_SHR_ASSIGN; }
|
|
else { p += 3; s->token_val = TOK_SHR; }
|
|
}
|
|
else if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_SAR_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_SAR; }
|
|
}
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '=':
|
|
if (p[1] == '=') {
|
|
if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_EQ; }
|
|
else { p += 2; s->token_val = TOK_EQ; }
|
|
} else if (p[1] == '>') { p += 2; s->token_val = TOK_ARROW; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '!':
|
|
if (p[1] == '=') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_STRICT_NEQ; }
|
|
else { p += 2; s->token_val = TOK_NEQ; }
|
|
} else { goto def_token; }
|
|
break;
|
|
case '&':
|
|
if (p[1] == '&') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_LAND_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_LAND; }
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_AND_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '|':
|
|
if (p[1] == '|') {
|
|
if (p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else if (p[2] == '=') { p += 3; s->token_val = TOK_LOR_ASSIGN; }
|
|
else { p += 2; s->token_val = TOK_LOR; }
|
|
}
|
|
else if (p[1] == '=') { p += 2; s->token_val = TOK_OR_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '^':
|
|
if (p[1] == '=') { p += 2; s->token_val = TOK_XOR_ASSIGN; }
|
|
else if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '[':
|
|
if (p[1] == ']' && p[2] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 3;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 3;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '~':
|
|
if (p[1] == '!') {
|
|
s->token_u.ident.str = (const char *)p;
|
|
s->token_u.ident.len = 2;
|
|
s->token_u.ident.has_escape = FALSE;
|
|
s->token_u.ident.is_reserved = FALSE;
|
|
s->token_val = TOK_IDENT;
|
|
p += 2;
|
|
}
|
|
else { goto def_token; }
|
|
break;
|
|
case '?':
|
|
goto def_token;
|
|
default:
|
|
def_token:
|
|
p++;
|
|
s->token_val = c;
|
|
break;
|
|
}
|
|
s->buf_ptr = p;
|
|
return 0;
|
|
}
|
|
|
|
|
|
static cJSON *build_token_object (ASTParseState *s) {
|
|
cJSON *tok = cJSON_CreateObject ();
|
|
const char *kind = ast_token_kind_str (s->token_val);
|
|
cJSON_AddStringToObject (tok, "kind", kind);
|
|
|
|
/* Position info */
|
|
int at = (int)(s->token_ptr - s->buf_start);
|
|
int from_row, from_col;
|
|
ast_get_line_col (s, s->token_ptr, &from_row, &from_col);
|
|
int to_row, to_col;
|
|
ast_get_line_col (s, s->buf_ptr, &to_row, &to_col);
|
|
|
|
cJSON_AddNumberToObject (tok, "at", at);
|
|
cJSON_AddNumberToObject (tok, "from_row", from_row);
|
|
cJSON_AddNumberToObject (tok, "from_column", from_col);
|
|
cJSON_AddNumberToObject (tok, "to_row", to_row);
|
|
cJSON_AddNumberToObject (tok, "to_column", to_col);
|
|
|
|
/* Value field based on token type */
|
|
switch (s->token_val) {
|
|
case TOK_NUMBER: {
|
|
/* Store original source text as value */
|
|
size_t len = s->buf_ptr - s->token_ptr;
|
|
char *text = sys_malloc (len + 1);
|
|
memcpy (text, s->token_ptr, len);
|
|
text[len] = '\0';
|
|
cJSON_AddStringToObject (tok, "value", text);
|
|
sys_free (text);
|
|
/* Store parsed number */
|
|
double d = s->token_u.num.val;
|
|
cJSON_AddNumberToObject (tok, "number", d);
|
|
} break;
|
|
case TOK_STRING:
|
|
case TOK_TEMPLATE: {
|
|
cjson_add_strn (tok, "value", s->token_u.str.str, s->token_u.str.len);
|
|
} break;
|
|
case TOK_IDENT: {
|
|
cjson_add_strn (tok, "value", s->token_u.ident.str, s->token_u.ident.len);
|
|
} break;
|
|
case TOK_ERROR: {
|
|
/* Store the raw source text as value */
|
|
size_t len = s->buf_ptr - s->token_ptr;
|
|
char *text = sys_malloc (len + 1);
|
|
memcpy (text, s->token_ptr, len);
|
|
text[len] = '\0';
|
|
cJSON_AddStringToObject (tok, "value", text);
|
|
sys_free (text);
|
|
} break;
|
|
case TOK_COMMENT:
|
|
case TOK_SPACE:
|
|
case TOK_NEWLINE: {
|
|
/* Store the raw source text */
|
|
size_t len = s->buf_ptr - s->token_ptr;
|
|
char *text = sys_malloc (len + 1);
|
|
memcpy (text, s->token_ptr, len);
|
|
text[len] = '\0';
|
|
cJSON_AddStringToObject (tok, "value", text);
|
|
sys_free (text);
|
|
} break;
|
|
default:
|
|
/* No value field for operators/punctuators/keywords */
|
|
break;
|
|
}
|
|
|
|
return tok;
|
|
}
|
|
|
|
char *JS_Tokenize (const char *source, size_t len, const char *filename) {
|
|
ASTParseState s;
|
|
memset (&s, 0, sizeof (s));
|
|
|
|
s.filename = filename;
|
|
s.buf_start = (const uint8_t *)source;
|
|
s.buf_ptr = (const uint8_t *)source;
|
|
s.buf_end = (const uint8_t *)source + len;
|
|
s.function_nr = 0;
|
|
s.errors = NULL;
|
|
s.has_error = 0;
|
|
s.lc_cache.ptr = s.buf_start;
|
|
s.lc_cache.buf_start = s.buf_start;
|
|
|
|
cJSON *root = cJSON_CreateObject ();
|
|
cJSON_AddStringToObject (root, "filename", filename);
|
|
cJSON *tokens = cJSON_AddArrayToObject (root, "tokens");
|
|
|
|
/* Tokenize all tokens including whitespace */
|
|
while (1) {
|
|
tokenize_next (&s);
|
|
cJSON *tok = build_token_object (&s);
|
|
cJSON_AddItemToArray (tokens, tok);
|
|
if (s.token_val == TOK_EOF) break;
|
|
}
|
|
|
|
/* Add errors to output if any */
|
|
if (s.errors) {
|
|
cJSON_AddItemToObject (root, "errors", s.errors);
|
|
}
|
|
|
|
char *json = cJSON_PrintUnformatted (root);
|
|
cJSON_Delete (root);
|
|
return json;
|
|
}
|
|
|
|
/* ============================================================
|
|
MACH Compiler — AST directly to binary JSCodeRegister
|
|
============================================================ */
|
|
|
|
/* Variable kinds */
|
|
#define MACH_VAR_ARG 0
|
|
#define MACH_VAR_LOCAL 1
|
|
#define MACH_VAR_CLOSED 2
|
|
|
|
/* Variable resolution result */
|
|
typedef enum MachVarResolution {
|
|
MACH_VAR_LOCAL_SLOT, /* variable is in current scope */
|
|
MACH_VAR_CLOSURE, /* variable is in parent scope */
|
|
MACH_VAR_UNBOUND /* variable not found in any scope */
|
|
} MachVarResolution;
|