regex uses C strings now

This commit is contained in:
2026-02-07 10:28:35 -06:00
parent 83ea67c01b
commit 0287d6ada4

View File

@@ -1435,8 +1435,10 @@ static inline uint32_t js_array_cap (JSArray *arr) {
/* JSRegExp: regular expression object data (must come before JSRecord/JSRecord) */
typedef struct JSRegExp {
JSText *pattern;
JSText *bytecode; /* also contains the flags */
char *pattern; /* UTF-8, null-terminated, js_malloc_rt'd */
uint32_t pattern_len;
uint8_t *bytecode; /* raw lre bytecode, js_malloc_rt'd */
uint32_t bytecode_len;
} JSRegExp;
#define obj_is_stone(rec) objhdr_s ((rec)->mist_hdr)
@@ -19757,9 +19759,13 @@ static int js_is_regexp (JSContext *ctx, JSValue obj);
/* RegExp */
static void js_regexp_finalizer (JSRuntime *rt, JSValue val) {
/* With copying GC, memory is reclaimed automatically */
JSRegExp *re = JS_GetOpaque (val, JS_CLASS_REGEXP);
if (re) {
js_free_rt (re->pattern);
js_free_rt (re->bytecode);
js_free_rt (re);
}
(void)rt;
(void)val;
}
/* create a string containing the RegExp bytecode */
@@ -19844,8 +19850,11 @@ static JSValue js_regexp_constructor_internal (JSContext *ctx, JSValue pattern,
JSValue obj;
JSRecord *p;
JSRegExp *re;
const char *pat_cstr;
size_t pat_len;
int bc_len, i;
/* sanity check - need heap strings for pattern and bytecode */
/* sanity check - need strings for pattern and bytecode */
if (!JS_IsText (bc) || !JS_IsText (pattern)) {
JS_ThrowTypeError (ctx, "string expected");
fail:
@@ -19859,9 +19868,39 @@ static JSValue js_regexp_constructor_internal (JSContext *ctx, JSValue pattern,
re = js_malloc (ctx, sizeof(JSRegExp));
if (!re) goto fail;
REC_SET_OPAQUE(p, re);
/* Store pattern and bytecode - need to handle both immediate and heap strings */
re->pattern = MIST_IsImmediateASCII (pattern) ? NULL : (JSText *)JS_VALUE_GET_PTR (pattern);
re->bytecode = MIST_IsImmediateASCII (bc) ? NULL : (JSText *)JS_VALUE_GET_PTR (bc);
re->pattern = NULL;
re->bytecode = NULL;
/* Extract pattern as UTF-8 C string */
pat_cstr = JS_ToCStringLen (ctx, &pat_len, pattern);
if (!pat_cstr) goto fail;
re->pattern = js_malloc_rt (pat_len + 1);
if (!re->pattern) {
JS_FreeCString (ctx, pat_cstr);
goto fail;
}
memcpy (re->pattern, pat_cstr, pat_len + 1);
re->pattern_len = (uint32_t)pat_len;
JS_FreeCString (ctx, pat_cstr);
/* Extract bytecode as raw bytes via string_get (not JS_ToCStringLen
which UTF-8 encodes and would mangle bytes >= 128) */
if (MIST_IsImmediateASCII (bc)) {
bc_len = MIST_GetImmediateASCIILen (bc);
re->bytecode = js_malloc_rt (bc_len);
if (!re->bytecode) goto fail;
for (i = 0; i < bc_len; i++)
re->bytecode[i] = (uint8_t)MIST_GetImmediateASCIIChar (bc, i);
} else {
JSText *bc_str = (JSText *)JS_VALUE_GET_PTR (bc);
bc_len = (int)JSText_len (bc_str);
re->bytecode = js_malloc_rt (bc_len);
if (!re->bytecode) goto fail;
for (i = 0; i < bc_len; i++)
re->bytecode[i] = (uint8_t)string_get (bc_str, i);
}
re->bytecode_len = (uint32_t)bc_len;
{
JSValue key = JS_KEY_STR (ctx, "lastIndex");
JS_SetPropertyInternal (ctx, obj, key, JS_NewInt32 (ctx, 0));
@@ -19906,9 +19945,11 @@ static JSValue js_regexp_constructor (JSContext *ctx, JSValue this_val, int argc
}
re = js_get_regexp (ctx, pat, FALSE);
if (re) {
pattern = JS_MKPTR (re->pattern);
pattern = JS_NewString (ctx, re->pattern);
if (JS_IsException (pattern)) goto fail;
if (JS_IsNull (flags1)) {
bc = JS_MKPTR (re->bytecode);
bc = js_new_string8_len (ctx, (const char *)re->bytecode, re->bytecode_len);
if (JS_IsException (bc)) goto fail;
goto no_compilation;
} else {
flags = JS_ToString (ctx, flags1);
@@ -19949,6 +19990,9 @@ static JSValue js_regexp_compile (JSContext *ctx, JSValue this_val, int argc, JS
JSRegExp *re1, *re;
JSValue pattern1, flags1;
JSValue bc, pattern;
const char *pat_cstr;
size_t pat_len;
int bc_len, i;
re = js_get_regexp (ctx, this_val, TRUE);
if (!re) return JS_EXCEPTION;
@@ -19958,8 +20002,10 @@ static JSValue js_regexp_compile (JSContext *ctx, JSValue this_val, int argc, JS
if (re1) {
if (!JS_IsNull (flags1))
return JS_ThrowTypeError (ctx, "flags must be undefined");
pattern = JS_MKPTR (re1->pattern);
bc = JS_MKPTR (re1->bytecode);
pattern = JS_NewString (ctx, re1->pattern);
if (JS_IsException (pattern)) goto fail;
bc = js_new_string8_len (ctx, (const char *)re1->bytecode, re1->bytecode_len);
if (JS_IsException (bc)) goto fail;
} else {
bc = JS_NULL;
if (JS_IsNull (pattern1))
@@ -19970,9 +20016,41 @@ static JSValue js_regexp_compile (JSContext *ctx, JSValue this_val, int argc, JS
bc = js_compile_regexp (ctx, pattern, flags1);
if (JS_IsException (bc)) goto fail;
}
/* No need to free old values - copying GC handles it */
re->pattern = JS_VALUE_GET_STRING (pattern);
re->bytecode = JS_VALUE_GET_STRING (bc);
/* Free old C buffers */
js_free_rt (re->pattern);
re->pattern = NULL;
js_free_rt (re->bytecode);
re->bytecode = NULL;
/* Extract pattern as UTF-8 C string */
pat_cstr = JS_ToCStringLen (ctx, &pat_len, pattern);
if (!pat_cstr) goto fail;
re->pattern = js_malloc_rt (pat_len + 1);
if (!re->pattern) {
JS_FreeCString (ctx, pat_cstr);
goto fail;
}
memcpy (re->pattern, pat_cstr, pat_len + 1);
re->pattern_len = (uint32_t)pat_len;
JS_FreeCString (ctx, pat_cstr);
/* Extract bytecode as raw bytes */
if (MIST_IsImmediateASCII (bc)) {
bc_len = MIST_GetImmediateASCIILen (bc);
re->bytecode = js_malloc_rt (bc_len);
if (!re->bytecode) goto fail;
for (i = 0; i < bc_len; i++)
re->bytecode[i] = (uint8_t)MIST_GetImmediateASCIIChar (bc, i);
} else {
JSText *bc_str = (JSText *)JS_VALUE_GET_PTR (bc);
bc_len = (int)JSText_len (bc_str);
re->bytecode = js_malloc_rt (bc_len);
if (!re->bytecode) goto fail;
for (i = 0; i < bc_len; i++)
re->bytecode[i] = (uint8_t)string_get (bc_str, i);
}
re->bytecode_len = (uint32_t)bc_len;
{
JSValue key = JS_KEY_STR (ctx, "lastIndex");
int ret = JS_SetProperty (ctx, this_val, key, JS_NewInt32 (ctx, 0));
@@ -20050,7 +20128,8 @@ static uint16_t *js_string_to_utf16 (JSContext *ctx, JSText *str, int *out_len)
static JSValue js_regexp_exec (JSContext *ctx, JSValue this_val, int argc, JSValue *argv) {
JSRegExp *re = js_get_regexp (ctx, this_val, TRUE);
JSText *str;
JSValue ret, str_val, res, val, groups, captures_arr, match0;
JSGCRef str_ref;
JSValue ret, res, val, groups, captures_arr, match0;
uint8_t *re_bytecode;
uint8_t **capture, *str_buf;
uint16_t *utf16_buf = NULL;
@@ -20061,8 +20140,30 @@ static JSValue js_regexp_exec (JSContext *ctx, JSValue this_val, int argc, JSVal
if (!re) return JS_EXCEPTION;
str_val = JS_ToString (ctx, argv[0]);
if (JS_IsException (str_val)) return JS_EXCEPTION;
JS_PushGCRef (ctx, &str_ref);
str_ref.val = JS_ToString (ctx, argv[0]);
if (JS_IsException (str_ref.val)) {
JS_PopGCRef (ctx, &str_ref);
return JS_EXCEPTION;
}
/* Ensure str_val is a heap string for JS_VALUE_GET_STRING */
if (MIST_IsImmediateASCII (str_ref.val)) {
int imm_len = MIST_GetImmediateASCIILen (str_ref.val);
/* Allocate at least 1 word even for empty string to ensure heap allocation */
JSText *hs = js_alloc_string (ctx, imm_len > 0 ? imm_len : 1);
if (!hs) {
JS_PopGCRef (ctx, &str_ref);
return JS_EXCEPTION;
}
for (int ci = 0; ci < imm_len; ci++)
string_put (hs, ci, MIST_GetImmediateASCIIChar (str_ref.val, ci));
/* Set capacity to actual length and stone it directly (bypass pretext_end
which returns JS_KEY_empty for len=0) */
hs->hdr = objhdr_set_cap56 (hs->hdr, imm_len);
hs->length = 0;
hs->hdr = objhdr_set_s (hs->hdr, true);
str_ref.val = JS_MKPTR (hs);
}
ret = JS_EXCEPTION;
res = JS_NULL;
@@ -20075,11 +20176,10 @@ static JSValue js_regexp_exec (JSContext *ctx, JSValue this_val, int argc, JSVal
if (JS_IsException (val) || JS_ToLength (ctx, &last_index, val))
goto fail;
re_bytecode = (uint8_t *)re->bytecode->packed;
re_bytecode = re->bytecode;
re_flags = lre_get_flags (re_bytecode);
if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) last_index = 0;
str = JS_VALUE_GET_STRING (str_val);
capture_count = lre_get_capture_count (re_bytecode);
if (capture_count > 0) {
@@ -20087,12 +20187,17 @@ static JSValue js_regexp_exec (JSContext *ctx, JSValue this_val, int argc, JSVal
if (!capture) goto fail;
}
/* Refresh str after potential GC from js_malloc */
str = JS_VALUE_GET_STRING (str_ref.val);
/* Convert UTF-32 string to UTF-16 for regex engine */
utf16_buf = js_string_to_utf16 (ctx, str, &utf16_len);
if (!utf16_buf) goto fail;
shift = 1; /* UTF-16 mode */
str_buf = (uint8_t *)utf16_buf;
/* Refresh str again after potential GC from js_string_to_utf16 */
str = JS_VALUE_GET_STRING (str_ref.val);
if (last_index > (int)JSText_len (str)) {
rc = 2;
} else {
@@ -20160,6 +20265,7 @@ static JSValue js_regexp_exec (JSContext *ctx, JSValue this_val, int argc, JSVal
s = JS_NULL;
if (start != -1) {
str = JS_VALUE_GET_STRING (str_ref.val);
s = js_sub_string (ctx, str, start, end);
if (JS_IsException (s)) goto fail;
}
@@ -20208,11 +20314,13 @@ static JSValue js_regexp_exec (JSContext *ctx, JSValue this_val, int argc, JSVal
res = JS_NULL;
done:
JS_PopGCRef (ctx, &str_ref);
js_free (ctx, capture);
js_free (ctx, utf16_buf);
return ret;
fail:
JS_PopGCRef (ctx, &str_ref);
js_free (ctx, capture);
js_free (ctx, utf16_buf);
return JS_EXCEPTION;
@@ -27736,6 +27844,7 @@ typedef struct ASTParseState {
cJSON *errors; /* array of error objects */
int has_error;
int in_disruption;
char *decoded_str; /* allocated buffer for decoded string escapes */
union {
struct {
const char *str;
@@ -27808,18 +27917,9 @@ static BOOL ast_is_arrow_function (ASTParseState *s) {
}
static void ast_free_token (ASTParseState *s) {
switch (s->token_val) {
case TOK_STRING:
case TOK_TEMPLATE:
break;
case TOK_IDENT:
break;
case TOK_REGEXP:
break;
case TOK_NUMBER:
break;
default:
break;
if (s->decoded_str) {
sys_free (s->decoded_str);
s->decoded_str = NULL;
}
}
@@ -27882,6 +27982,45 @@ static void ast_error (ASTParseState *s, const uint8_t *ptr, const char *fmt, ..
s->has_error = 1;
}
/* Decode escape sequences in a string literal into dst. Returns decoded length. */
static int ast_decode_string (const uint8_t *src, int len, char *dst) {
const uint8_t *end = src + len;
char *out = dst;
while (src < end) {
if (*src == '\\' && src + 1 < end) {
src++;
switch (*src) {
case 'n': *out++ = '\n'; src++; break;
case 't': *out++ = '\t'; src++; break;
case 'r': *out++ = '\r'; src++; break;
case '\\': *out++ = '\\'; src++; break;
case '\'': *out++ = '\''; src++; break;
case '\"': *out++ = '\"'; src++; break;
case '0': *out++ = '\0'; src++; break;
case 'b': *out++ = '\b'; src++; break;
case 'f': *out++ = '\f'; src++; break;
case 'v': *out++ = '\v'; src++; break;
case 'u': {
src++;
unsigned int cp = 0;
for (int i = 0; i < 4 && src < end; i++, src++) {
cp <<= 4;
if (*src >= '0' && *src <= '9') cp |= *src - '0';
else if (*src >= 'a' && *src <= 'f') cp |= *src - 'a' + 10;
else if (*src >= 'A' && *src <= 'F') cp |= *src - 'A' + 10;
else break;
}
out += unicode_to_utf8 ((uint8_t *)out, cp);
} break;
default: *out++ = *src++; break;
}
} else {
*out++ = *src++;
}
}
return out - dst;
}
static int ast_next_token (ASTParseState *s) {
const uint8_t *p;
int c;
@@ -27933,8 +28072,24 @@ redo:
}
p++;
s->token_val = TOK_TEMPLATE;
s->token_u.str.str = (const char *)(start + 1);
s->token_u.str.len = p - start - 2;
{
const uint8_t *raw = start + 1;
int raw_len = p - start - 2;
BOOL has_escape = FALSE;
for (int i = 0; i < raw_len; i++) {
if (raw[i] == '\\') { has_escape = TRUE; break; }
}
if (has_escape) {
char *buf = sys_malloc (raw_len * 4 + 1);
int decoded_len = ast_decode_string (raw, raw_len, buf);
s->decoded_str = buf;
s->token_u.str.str = buf;
s->token_u.str.len = decoded_len;
} else {
s->token_u.str.str = (const char *)raw;
s->token_u.str.len = raw_len;
}
}
} break;
case '\'':
case '\"': {
@@ -27951,10 +28106,27 @@ redo:
goto redo;
}
p++;
/* Store the string content without quotes */
/* Store the string content without quotes, decoding escape sequences */
s->token_val = TOK_STRING;
s->token_u.str.str = (const char *)(start + 1);
s->token_u.str.len = p - start - 2;
{
const uint8_t *raw = start + 1;
int raw_len = p - start - 2;
/* Check if any escape sequences need decoding */
BOOL has_escape = FALSE;
for (int i = 0; i < raw_len; i++) {
if (raw[i] == '\\') { has_escape = TRUE; break; }
}
if (has_escape) {
char *buf = sys_malloc (raw_len * 4 + 1);
int decoded_len = ast_decode_string (raw, raw_len, buf);
s->decoded_str = buf;
s->token_u.str.str = buf;
s->token_u.str.len = decoded_len;
} else {
s->token_u.str.str = (const char *)raw;
s->token_u.str.len = raw_len;
}
}
} break;
case '\r':
if (p[1] == '\n') p++;
@@ -28584,8 +28756,29 @@ static cJSON *ast_parse_primary (ASTParseState *s) {
while (p < tmpl_end) {
if (*p == '\\' && p + 1 < tmpl_end) {
p++; /* skip backslash */
if (len + 1 >= cap) { cap *= 2; fmt = sys_realloc (fmt, cap); }
fmt[len++] = *p++;
if (len + 8 >= cap) { cap *= 2; fmt = sys_realloc (fmt, cap); }
switch (*p) {
case 'n': fmt[len++] = '\n'; p++; break;
case 't': fmt[len++] = '\t'; p++; break;
case 'r': fmt[len++] = '\r'; p++; break;
case '\\': fmt[len++] = '\\'; p++; break;
case '`': fmt[len++] = '`'; p++; break;
case '$': fmt[len++] = '$'; p++; break;
case '0': fmt[len++] = '\0'; p++; break;
case 'u': {
p++;
unsigned int cp = 0;
for (int i = 0; i < 4 && p < tmpl_end; i++, p++) {
cp <<= 4;
if (*p >= '0' && *p <= '9') cp |= *p - '0';
else if (*p >= 'a' && *p <= 'f') cp |= *p - 'a' + 10;
else if (*p >= 'A' && *p <= 'F') cp |= *p - 'A' + 10;
else break;
}
len += unicode_to_utf8 ((uint8_t *)fmt + len, cp);
} break;
default: fmt[len++] = *p++; break;
}
continue;
}
if (*p == '$' && p + 1 < tmpl_end && p[1] == '{') {
@@ -29784,6 +29977,7 @@ static cJSON *ast_parse_program (ASTParseState *s) {
typedef struct ASTSemVar {
const char *name;
const char *scope_name; /* disambiguated name for block-scope vars (NULL = use name) */
int is_const;
const char *make; /* "def", "var", "function", "input" */
int function_nr; /* which function this var belongs to */
@@ -29798,6 +29992,7 @@ typedef struct ASTSemScope {
int in_loop;
int function_nr; /* function_nr of enclosing function */
int is_function_scope; /* 1 if this is a function's top-level scope */
int block_depth; /* 0 = function scope, 1+ = block scope */
} ASTSemScope;
typedef struct ASTSemState {
@@ -29806,6 +30001,7 @@ typedef struct ASTSemState {
cJSON *scopes_array;
const char *intrinsics[256];
int intrinsic_count;
int block_var_counter; /* monotonically increasing counter for unique block var names */
} ASTSemState;
static void ast_sem_error (ASTSemState *st, cJSON *node, const char *fmt, ...) {
@@ -29834,6 +30030,7 @@ static void ast_sem_add_var (ASTSemScope *scope, const char *name, int is_const,
if (scope->var_count < AST_SEM_MAX_VARS) {
ASTSemVar *v = &scope->vars[scope->var_count];
v->name = name;
v->scope_name = NULL;
v->is_const = is_const;
v->make = make;
v->function_nr = function_nr;
@@ -29843,6 +30040,26 @@ static void ast_sem_add_var (ASTSemScope *scope, const char *name, int is_const,
}
}
/* Propagate block-scope vars to the function scope (parent) with disambiguated names */
static void ast_sem_propagate_block_vars (ASTSemState *st, ASTSemScope *parent,
ASTSemScope *block) {
for (int i = 0; i < block->var_count; i++) {
ASTSemVar *v = &block->vars[i];
const char *sn = v->scope_name ? v->scope_name : v->name;
if (parent->var_count < AST_SEM_MAX_VARS) {
ASTSemVar *pv = &parent->vars[parent->var_count];
pv->name = sn;
pv->scope_name = NULL;
pv->is_const = v->is_const;
pv->make = v->make;
pv->function_nr = v->function_nr;
pv->nr_uses = v->nr_uses;
pv->closure = v->closure;
parent->var_count++;
}
}
}
typedef struct {
ASTSemVar *var;
int level;
@@ -29934,6 +30151,8 @@ static void ast_sem_check_assign_target (ASTSemState *st, ASTSemScope *scope, cJ
if (r.var) {
cJSON_AddNumberToObject (left, "level", r.level);
cJSON_AddNumberToObject (left, "function_nr", r.def_function_nr);
if (r.var->scope_name)
cJSON_AddStringToObject (left, "scope_name", r.var->scope_name);
} else {
cJSON_AddNumberToObject (left, "level", -1);
}
@@ -30132,6 +30351,8 @@ static void ast_sem_check_expr (ASTSemState *st, ASTSemScope *scope, cJSON *expr
cJSON_AddNumberToObject (expr, "function_nr", r.def_function_nr);
r.var->nr_uses++;
if (r.level > 0) r.var->closure = 1;
if (r.var->scope_name)
cJSON_AddStringToObject (expr, "scope_name", r.var->scope_name);
} else {
cJSON_AddNumberToObject (expr, "level", -1);
ast_sem_add_intrinsic (st, name);
@@ -30167,6 +30388,14 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
ast_sem_error (st, left, "cannot redeclare constant '%s'", name);
}
ast_sem_add_var (scope, name, 0, "var", scope->function_nr);
if (scope->block_depth > 0) {
char buf[128];
snprintf (buf, sizeof (buf), "_%s_%d", name, st->block_var_counter++);
char *sn = sys_malloc (strlen (buf) + 1);
strcpy (sn, buf);
scope->vars[scope->var_count - 1].scope_name = sn;
cJSON_AddStringToObject (left, "scope_name", sn);
}
}
ast_sem_check_expr (st, scope, cJSON_GetObjectItem (stmt, "right"));
return;
@@ -30184,6 +30413,14 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
ast_sem_error (st, left, "cannot redeclare '%s' as constant", name);
}
ast_sem_add_var (scope, name, 1, "def", scope->function_nr);
if (scope->block_depth > 0) {
char buf[128];
snprintf (buf, sizeof (buf), "_%s_%d", name, st->block_var_counter++);
char *sn = sys_malloc (strlen (buf) + 1);
strcpy (sn, buf);
scope->vars[scope->var_count - 1].scope_name = sn;
cJSON_AddStringToObject (left, "scope_name", sn);
}
}
ast_sem_check_expr (st, scope, cJSON_GetObjectItem (stmt, "right"));
return;
@@ -30197,14 +30434,35 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
if (strcmp (kind, "if") == 0) {
ast_sem_check_expr (st, scope, cJSON_GetObjectItem (stmt, "expression"));
cJSON *s2;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "then")) {
ast_sem_check_stmt (st, scope, s2);
{
ASTSemScope then_scope = {0};
then_scope.parent = scope;
then_scope.function_nr = scope->function_nr;
then_scope.block_depth = scope->block_depth + 1;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "then")) {
ast_sem_check_stmt (st, &then_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &then_scope);
}
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "list")) {
ast_sem_check_stmt (st, scope, s2);
{
ASTSemScope list_scope = {0};
list_scope.parent = scope;
list_scope.function_nr = scope->function_nr;
list_scope.block_depth = scope->block_depth + 1;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "list")) {
ast_sem_check_stmt (st, &list_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &list_scope);
}
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "else")) {
ast_sem_check_stmt (st, scope, s2);
{
ASTSemScope else_scope = {0};
else_scope.parent = scope;
else_scope.function_nr = scope->function_nr;
else_scope.block_depth = scope->block_depth + 1;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "else")) {
ast_sem_check_stmt (st, &else_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &else_scope);
}
return;
}
@@ -30215,10 +30473,12 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
loop_scope.parent = scope;
loop_scope.in_loop = 1;
loop_scope.function_nr = scope->function_nr;
loop_scope.block_depth = scope->block_depth + 1;
cJSON *s2;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "statements")) {
ast_sem_check_stmt (st, &loop_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &loop_scope);
return;
}
@@ -30227,10 +30487,12 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
loop_scope.parent = scope;
loop_scope.in_loop = 1;
loop_scope.function_nr = scope->function_nr;
loop_scope.block_depth = scope->block_depth + 1;
cJSON *s2;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "statements")) {
ast_sem_check_stmt (st, &loop_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &loop_scope);
ast_sem_check_expr (st, scope, cJSON_GetObjectItem (stmt, "expression"));
return;
}
@@ -30240,6 +30502,7 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
loop_scope.parent = scope;
loop_scope.in_loop = 1;
loop_scope.function_nr = scope->function_nr;
loop_scope.block_depth = scope->block_depth + 1;
/* init may be a var/def statement or expression */
cJSON *init = cJSON_GetObjectItem (stmt, "init");
if (init) {
@@ -30256,6 +30519,7 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "statements")) {
ast_sem_check_stmt (st, &loop_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &loop_scope);
return;
}
@@ -30286,10 +30550,12 @@ static void ast_sem_check_stmt (ASTSemState *st, ASTSemScope *scope, cJSON *stmt
ASTSemScope block_scope = {0};
block_scope.parent = scope;
block_scope.function_nr = scope->function_nr;
block_scope.block_depth = scope->block_depth + 1;
cJSON *s2;
cJSON_ArrayForEach (s2, cJSON_GetObjectItem (stmt, "statements")) {
ast_sem_check_stmt (st, &block_scope, s2);
}
ast_sem_propagate_block_vars (st, scope, &block_scope);
return;
}
@@ -33361,11 +33627,13 @@ static int mach_gen_compound_assign (MachGenState *s, cJSON *node, const char *o
if (strcmp (left_kind, "name") == 0) {
const char *name = cJSON_GetStringValue (cJSON_GetObjectItem (left, "name"));
const char *sn = cJSON_GetStringValue (cJSON_GetObjectItem (left, "scope_name"));
const char *ln = sn ? sn : name;
cJSON *level_node = cJSON_GetObjectItem (left, "level");
int level = level_node ? (int)cJSON_GetNumberValue (level_node) : -1;
int left_slot = mach_gen_alloc_slot (s);
if (level == 0 || level == -1) {
int local = mach_gen_find_var (s, name);
int local = mach_gen_find_var (s, ln);
if (local >= 0) {
mach_gen_emit_2 (s, "move", left_slot, local);
level = 0; /* treat as local for the store below */
@@ -33374,7 +33642,7 @@ static int mach_gen_compound_assign (MachGenState *s, cJSON *node, const char *o
if (level > 0) {
MachGenState *target = s;
for (int i = 0; i < level; i++) target = target->parent;
int slot = mach_gen_find_var (target, name);
int slot = mach_gen_find_var (target, ln);
mach_gen_emit_3 (s, "get", left_slot, slot, level);
} else if (level == -1) {
cJSON *instr = cJSON_CreateArray ();
@@ -33391,12 +33659,12 @@ static int mach_gen_compound_assign (MachGenState *s, cJSON *node, const char *o
int dest = mach_gen_alloc_slot (s);
mach_gen_emit_3 (s, op, dest, left_slot, right_slot);
if (level == 0) {
int local = mach_gen_find_var (s, name);
int local = mach_gen_find_var (s, ln);
if (local >= 0) mach_gen_emit_2 (s, "move", local, dest);
} else if (level > 0) {
MachGenState *target = s;
for (int i = 0; i < level; i++) target = target->parent;
int slot = mach_gen_find_var (target, name);
int slot = mach_gen_find_var (target, ln);
mach_gen_emit_3 (s, "put", dest, slot, level);
} else {
cJSON *instr = cJSON_CreateArray ();
@@ -33450,15 +33718,27 @@ static int mach_gen_assign (MachGenState *s, cJSON *node) {
if (strcmp (kind, ">>=") == 0) return mach_gen_compound_assign (s, node, "shr");
if (strcmp (kind, ">>>=") == 0) return mach_gen_compound_assign (s, node, "ushr");
/* Push: arr[] = val */
cJSON *push_flag = cJSON_GetObjectItem (node, "push");
if (push_flag && cJSON_IsTrue (push_flag)) {
cJSON *arr_expr = cJSON_GetObjectItem (left, "left");
int arr_slot = mach_gen_expr (s, arr_expr, -1);
int val_slot = mach_gen_expr (s, right, -1);
mach_gen_emit_2 (s, "push", arr_slot, val_slot);
return val_slot;
}
int val_slot = mach_gen_expr (s, right, -1);
const char *left_kind = cJSON_GetStringValue (cJSON_GetObjectItem (left, "kind"));
if (strcmp (left_kind, "name") == 0) {
const char *name = cJSON_GetStringValue (cJSON_GetObjectItem (left, "name"));
const char *sn = cJSON_GetStringValue (cJSON_GetObjectItem (left, "scope_name"));
const char *ln = sn ? sn : name;
cJSON *level_node = cJSON_GetObjectItem (left, "level");
int level = level_node ? (int)cJSON_GetNumberValue (level_node) : -1;
if (level == 0 || level == -1) {
int slot = mach_gen_find_var (s, name);
int slot = mach_gen_find_var (s, ln);
if (slot >= 0) mach_gen_emit_2 (s, "move", slot, val_slot);
else if (level == -1) {
/* No annotation and not local — set global */
@@ -33471,7 +33751,7 @@ static int mach_gen_assign (MachGenState *s, cJSON *node) {
} else if (level > 0) {
MachGenState *target = s;
for (int i = 0; i < level; i++) target = target->parent;
int slot = mach_gen_find_var (target, name);
int slot = mach_gen_find_var (target, ln);
mach_gen_emit_3 (s, "put", val_slot, slot, level);
} else {
mach_gen_error (s, node, "cannot assign to unbound variable '%s'", name);
@@ -33512,6 +33792,18 @@ static int mach_gen_expr (MachGenState *s, cJSON *expr, int target) {
mach_gen_emit_const_str (s, slot, val ? val : "");
return slot;
}
if (strcmp (kind, "regexp") == 0) {
int slot = target >= 0 ? target : mach_gen_alloc_slot (s);
const char *pattern = cJSON_GetStringValue (cJSON_GetObjectItem (expr, "pattern"));
const char *flags = cJSON_GetStringValue (cJSON_GetObjectItem (expr, "flags"));
cJSON *instr = cJSON_CreateArray ();
cJSON_AddItemToArray (instr, cJSON_CreateString ("regexp"));
cJSON_AddItemToArray (instr, cJSON_CreateNumber (slot));
cJSON_AddItemToArray (instr, cJSON_CreateString (pattern ? pattern : ""));
cJSON_AddItemToArray (instr, cJSON_CreateString (flags ? flags : ""));
mach_gen_add_instr (s, instr);
return slot;
}
if (strcmp (kind, "true") == 0) {
int slot = target >= 0 ? target : mach_gen_alloc_slot (s);
mach_gen_emit_const_bool (s, slot, 1);
@@ -33534,16 +33826,18 @@ static int mach_gen_expr (MachGenState *s, cJSON *expr, int target) {
/* Variable reference — uses parser-provided level annotation */
if (strcmp (kind, "name") == 0) {
const char *name = cJSON_GetStringValue (cJSON_GetObjectItem (expr, "name"));
const char *scope_name = cJSON_GetStringValue (cJSON_GetObjectItem (expr, "scope_name"));
const char *lookup_name = scope_name ? scope_name : name;
cJSON *level_node = cJSON_GetObjectItem (expr, "level");
int level = level_node ? (int)cJSON_GetNumberValue (level_node) : -1;
if (level == 0 || level == -1) {
/* level 0 = known local; level -1 = no annotation, try local first */
int slot = mach_gen_find_var (s, name);
int slot = mach_gen_find_var (s, lookup_name);
if (slot >= 0) return slot;
} else if (level > 0) {
MachGenState *target = s;
for (int i = 0; i < level; i++) target = target->parent;
int parent_slot = mach_gen_find_var (target, name);
int parent_slot = mach_gen_find_var (target, lookup_name);
int dest = mach_gen_alloc_slot (s);
mach_gen_emit_3 (s, "get", dest, parent_slot, level);
return dest;
@@ -33649,17 +33943,19 @@ static int mach_gen_expr (MachGenState *s, cJSON *expr, int target) {
if (strcmp (operand_kind, "name") == 0) {
const char *name = cJSON_GetStringValue (cJSON_GetObjectItem (operand, "name"));
const char *inc_sn = cJSON_GetStringValue (cJSON_GetObjectItem (operand, "scope_name"));
const char *inc_ln = inc_sn ? inc_sn : name;
cJSON *level_node = cJSON_GetObjectItem (operand, "level");
int level = level_node ? (int)cJSON_GetNumberValue (level_node) : -1;
int old_slot = mach_gen_alloc_slot (s);
/* Load current value */
if (level == 0) {
int local = mach_gen_find_var (s, name);
int local = mach_gen_find_var (s, inc_ln);
if (local >= 0) mach_gen_emit_2 (s, "move", old_slot, local);
} else if (level > 0) {
MachGenState *target = s;
for (int i = 0; i < level; i++) target = target->parent;
int slot = mach_gen_find_var (target, name);
int slot = mach_gen_find_var (target, inc_ln);
mach_gen_emit_3 (s, "get", old_slot, slot, level);
} else {
cJSON *instr = cJSON_CreateArray ();
@@ -33676,12 +33972,12 @@ static int mach_gen_expr (MachGenState *s, cJSON *expr, int target) {
mach_gen_emit_3 (s, arith_op, new_slot, old_slot, one_slot);
/* Store new value */
if (level == 0) {
int local = mach_gen_find_var (s, name);
int local = mach_gen_find_var (s, inc_ln);
if (local >= 0) mach_gen_emit_2 (s, "move", local, new_slot);
} else if (level > 0) {
MachGenState *target = s;
for (int i = 0; i < level; i++) target = target->parent;
int slot = mach_gen_find_var (target, name);
int slot = mach_gen_find_var (target, inc_ln);
mach_gen_emit_3 (s, "put", new_slot, slot, level);
}
return postfix ? old_slot : new_slot;
@@ -33848,7 +34144,18 @@ static void mach_gen_statement (MachGenState *s, cJSON *stmt) {
cJSON *left = cJSON_GetObjectItem (stmt, "left");
cJSON *right = cJSON_GetObjectItem (stmt, "right");
const char *name = cJSON_GetStringValue (cJSON_GetObjectItem (left, "name"));
int local_slot = mach_gen_find_var (s, name);
const char *scope_name = cJSON_GetStringValue (cJSON_GetObjectItem (left, "scope_name"));
const char *lookup_name = scope_name ? scope_name : name;
int local_slot = mach_gen_find_var (s, lookup_name);
/* Pop: var val = arr[] */
cJSON *pop_flag = cJSON_GetObjectItem (stmt, "pop");
if (pop_flag && cJSON_IsTrue (pop_flag) && right) {
cJSON *arr_expr = cJSON_GetObjectItem (right, "left");
int arr_slot = mach_gen_expr (s, arr_expr, -1);
if (local_slot >= 0)
mach_gen_emit_2 (s, "pop", local_slot, arr_slot);
return;
}
if (right) {
int val_slot = mach_gen_expr (s, right, local_slot);
if (local_slot >= 0 && val_slot != local_slot)
@@ -34325,6 +34632,7 @@ static cJSON *mach_gen_program (MachGenState *s, cJSON *ast) {
mach_gen_statement (s, stmt);
last_expr_slot = -1;
} else if (strcmp (kind, "var") == 0 || strcmp (kind, "def") == 0 ||
strcmp (kind, "var_list") == 0 || strcmp (kind, "def_list") == 0 ||
strcmp (kind, "function") == 0 || strcmp (kind, "block") == 0 ||
strcmp (kind, "if") == 0 || strcmp (kind, "while") == 0 ||
strcmp (kind, "do") == 0 || strcmp (kind, "for") == 0 ||
@@ -35673,6 +35981,27 @@ static JSValue mcode_exec(JSContext *ctx, JSMCode *code, JSValue this_obj,
frame->slots[dest] = stoned;
}
/* ---- Regexp literal ---- */
else if (strcmp(op, "regexp") == 0) {
int dest = (int)a1->valuedouble;
const char *pattern = a2 ? a2->valuestring : "";
cJSON *a3 = cJSON_GetArrayItem(instr, 3);
const char *flags_str = a3 ? a3->valuestring : "";
if (!pattern) pattern = "";
if (!flags_str) flags_str = "";
JSValue pat_val = JS_NewString(ctx, pattern);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
JSValue flags_val = *flags_str ? JS_NewString(ctx, flags_str) : JS_NULL;
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
JSValue bc = js_compile_regexp(ctx, pat_val, flags_val);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(bc)) { goto disrupt; }
JSValue re_obj = js_regexp_constructor_internal(ctx, pat_val, bc);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(re_obj)) { goto disrupt; }
frame->slots[dest] = re_obj;
}
/* ---- Push (append to array) ---- */
else if (strcmp(op, "push") == 0) {
int arr_slot = (int)a1->valuedouble;