From a08ee50f8451edd549e4b96765f7bf2fb6729cbb Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 4 Feb 2026 20:57:44 -0600 Subject: [PATCH] serializable bytecode --- source/quickjs-opcode.h | 1 + source/quickjs.c | 1552 ++++++++++++++++++++++++++++++++++++++- source/quickjs.h | 123 ++++ source/suite.c | 139 ++++ 4 files changed, 1785 insertions(+), 30 deletions(-) diff --git a/source/quickjs-opcode.h b/source/quickjs-opcode.h index e31c91b6..3cc5b18d 100644 --- a/source/quickjs-opcode.h +++ b/source/quickjs-opcode.h @@ -205,6 +205,7 @@ DEF( set_up, 4, 1, 0, u8_u16) /* value, depth:u8, slot:u16 -> */ /* Name resolution with bytecode patching */ DEF( get_name, 5, 0, 1, const) /* cpool_idx -> value, patches itself */ DEF( get_env_slot, 3, 0, 1, u16) /* slot -> value (patched from get_name) */ +DEF( set_env_slot, 3, 1, 0, u16) /* value -> slot (patched from put_var) */ DEF(get_global_slot, 3, 0, 1, u16) /* slot -> value (patched from get_var) */ DEF(set_global_slot, 3, 1, 0, u16) /* value -> slot (patched from put_var) */ diff --git a/source/quickjs.c b/source/quickjs.c index 06ce877e..cd397c65 100644 --- a/source/quickjs.c +++ b/source/quickjs.c @@ -815,7 +815,6 @@ struct JSContext { JSValue throw_type_error; JSValue global_obj; /* global object (immutable intrinsics) */ - JSValue eval_env; /* environment record for eval (stone record) */ uint64_t random_state; @@ -1683,6 +1682,45 @@ typedef struct JSFunctionBytecode { } debug; } JSFunctionBytecode; +/* New simplified compiled unit structure for Phase 1+ simplification. + Replaces JSFunctionBytecode with a simpler model: + - No closure machinery (uses outer_frame chain at runtime) + - Free variables resolved at link time against env + globals + - Nested functions stored as separate units in cpool */ +typedef struct JSCompiledUnit { + objhdr_t header; /* must come first */ + + /* Bytecode (self pointer) */ + uint8_t *byte_code_buf; + int byte_code_len; + + /* Constants - strings, numbers, nested unit refs (self pointer) */ + JSValue *cpool; + int cpool_count; + + /* Stack requirements */ + uint16_t local_count; /* total local slots (args + vars) */ + uint16_t stack_size; /* operand stack depth */ + + /* Flags */ + uint8_t has_debug : 1; + uint8_t read_only_bytecode : 1; + + /* Debug info (optional - only present if has_debug) */ + struct { + JSValue filename; + int source_len; + int pc2line_len; + uint8_t *pc2line_buf; + char *source; + } debug; +} JSCompiledUnit; + +/* ============================================================ + Context-Neutral Module Format (Phase 2+) + Struct definitions are in quickjs.h + ============================================================ */ + typedef struct JSBoundFunction { JSValue func_obj; JSValue this_val; @@ -2503,10 +2541,6 @@ static int ctx_gc (JSContext *ctx, int allow_grow) { #ifdef DUMP_GC_DETAIL printf(" after copy: global_obj = 0x%llx\n", (unsigned long long)ctx->global_obj); fflush(stdout); #endif -#ifdef DUMP_GC_DETAIL - printf(" roots: eval_env\n"); fflush(stdout); -#endif - ctx->eval_env = gc_copy_value (ctx, ctx->eval_env, from_base, from_end, to_base, &to_free, to_end); #ifdef DUMP_GC_DETAIL printf(" roots: regexp_ctor\n"); fflush(stdout); #endif @@ -8127,11 +8161,32 @@ restart: CASE (OP_get_env_slot) : { int slot = get_u16 (pc); pc += 2; - JSRecord *env = (JSRecord *)JS_VALUE_GET_OBJ (ctx->eval_env); + /* Get env_record from current function, not global ctx->eval_env */ + JSFunction *fn = JS_VALUE_GET_FUNCTION (sf->cur_func); + JSValue env_val = fn->u.func.env_record; + if (JS_IsNull (env_val)) { + JS_ThrowReferenceError (ctx, "no environment record"); + goto exception; + } + JSRecord *env = (JSRecord *)JS_VALUE_GET_OBJ (env_val); *sp++ = env->slots[slot].val; } BREAK; + CASE (OP_set_env_slot) : { + int slot = get_u16 (pc); pc += 2; + /* Get env_record from current function */ + JSFunction *fn = JS_VALUE_GET_FUNCTION (sf->cur_func); + JSValue env_val = fn->u.func.env_record; + if (JS_IsNull (env_val)) { + JS_ThrowReferenceError (ctx, "no environment record"); + goto exception; + } + JSRecord *env = (JSRecord *)JS_VALUE_GET_OBJ (env_val); + env->slots[slot].val = *--sp; + } + BREAK; + CASE (OP_get_global_slot) : { int slot = get_u16 (pc); pc += 2; JSRecord *global = (JSRecord *)JS_VALUE_GET_OBJ (ctx->global_obj); @@ -8598,13 +8653,14 @@ static const JSOpCode opcode_info[OP_COUNT + (OP_TEMP_END - OP_TEMP_START)] = { /* Clone bytecode and resolve OP_get_var to OP_get_global_slot. Returns new bytecode on success, NULL on link error. - The linked bytecode is a separate allocation that can be modified. */ + The linked bytecode is a separate allocation that can be modified. + Note: closure_var is not copied - closures use outer_frame chain at runtime. */ static JSFunctionBytecode *js_link_bytecode (JSContext *ctx, JSFunctionBytecode *tpl, JSValue env) { /* Calculate total size of bytecode allocation */ int function_size; - int cpool_offset, vardefs_offset, closure_var_offset, byte_code_offset; + int cpool_offset, vardefs_offset, byte_code_offset; if (tpl->has_debug) { function_size = sizeof (JSFunctionBytecode); @@ -8620,8 +8676,7 @@ static JSFunctionBytecode *js_link_bytecode (JSContext *ctx, function_size += (tpl->arg_count + tpl->var_count) * sizeof (JSVarDef); } - closure_var_offset = function_size; - function_size += tpl->closure_var_count * sizeof (JSClosureVar); + /* closure_var not needed at runtime - closures use outer_frame chain */ byte_code_offset = function_size; function_size += tpl->byte_code_len; @@ -8637,6 +8692,10 @@ static JSFunctionBytecode *js_link_bytecode (JSContext *ctx, memcpy (linked, tpl, offsetof (JSFunctionBytecode, debug)); } + /* Clear closure_var - not needed at runtime */ + linked->closure_var = NULL; + linked->closure_var_count = 0; + /* Fix up self pointers */ if (tpl->cpool_count > 0) { linked->cpool = (JSValue *)((uint8_t *)linked + cpool_offset); @@ -8649,12 +8708,6 @@ static JSFunctionBytecode *js_link_bytecode (JSContext *ctx, (tpl->arg_count + tpl->var_count) * sizeof (JSVarDef)); } - if (tpl->closure_var_count > 0) { - linked->closure_var = (JSClosureVar *)((uint8_t *)linked + closure_var_offset); - memcpy (linked->closure_var, tpl->closure_var, - tpl->closure_var_count * sizeof (JSClosureVar)); - } - linked->byte_code_buf = (uint8_t *)linked + byte_code_offset; memcpy (linked->byte_code_buf, tpl->byte_code_buf, tpl->byte_code_len); @@ -8716,14 +8769,39 @@ static JSFunctionBytecode *js_link_bytecode (JSContext *ctx, /* OP_get_var_undef is ok - leaves as is for runtime check */ } - /* Patch OP_put_var family -> error (global is immutable) */ + /* Patch OP_put_var family -> OP_set_env_slot or error */ if (op == OP_put_var || op == OP_put_var_init || op == OP_put_var_strict) { uint32_t cpool_idx = get_u32 (bc + pos + 1); JSValue name = linked->cpool[cpool_idx]; + /* Try env first (if provided) - env is writable */ + if (env_rec) { + int slot = rec_find_slot (env_rec, name); + if (slot > 0) { + bc[pos] = OP_set_env_slot; + put_u16 (bc + pos + 1, (uint16_t)slot); + bc[pos + 3] = OP_nop; + bc[pos + 4] = OP_nop; + pos += len; + continue; + } + } + + /* Try global for set_global_slot */ + JSRecord *global = (JSRecord *)JS_VALUE_GET_OBJ (ctx->global_obj); + int slot = rec_find_slot (global, name); + if (slot > 0) { + bc[pos] = OP_set_global_slot; + put_u16 (bc + pos + 1, (uint16_t)slot); + bc[pos + 3] = OP_nop; + bc[pos + 4] = OP_nop; + pos += len; + continue; + } + /* Global object is immutable - can't write to intrinsics */ char buf[64]; - JS_ThrowReferenceError (ctx, "cannot assign to '%s' - global object is immutable", + JS_ThrowReferenceError (ctx, "cannot assign to '%s' - not found in environment", JS_KeyGetStr (ctx, buf, sizeof (buf), name)); pjs_free (linked); return NULL; @@ -8762,6 +8840,162 @@ static JSFunctionBytecode *js_link_bytecode (JSContext *ctx, return linked; } +/* New simplified linker producing JSCompiledUnit. + Converts JSFunctionBytecode template to JSCompiledUnit: + - Copies bytecode, cpool (no vardefs, no closure_var) + - Patches OP_get_var -> OP_get_env_slot or OP_get_global_slot + - Returns standalone unit ready for execution */ +static JSCompiledUnit *js_link_unit (JSContext *ctx, + JSFunctionBytecode *tpl, + JSValue env) { + int function_size; + int cpool_offset, byte_code_offset; + + /* Calculate size: base struct + cpool + bytecode */ + if (tpl->has_debug) { + function_size = sizeof (JSCompiledUnit); + } else { + function_size = offsetof (JSCompiledUnit, debug); + } + + cpool_offset = function_size; + function_size += tpl->cpool_count * sizeof (JSValue); + + byte_code_offset = function_size; + function_size += tpl->byte_code_len; + + /* Allocate */ + JSCompiledUnit *unit = pjs_malloc (function_size); + if (!unit) return NULL; + + /* Initialize header */ + unit->header = objhdr_make (0, OBJ_CODE, false, false, false, false); + unit->has_debug = tpl->has_debug; + unit->read_only_bytecode = 0; + + /* Copy stack requirements */ + unit->local_count = tpl->arg_count + tpl->var_count; + unit->stack_size = tpl->stack_size; + + /* Setup cpool */ + unit->cpool_count = tpl->cpool_count; + if (tpl->cpool_count > 0) { + unit->cpool = (JSValue *)((uint8_t *)unit + cpool_offset); + memcpy (unit->cpool, tpl->cpool, tpl->cpool_count * sizeof (JSValue)); + } else { + unit->cpool = NULL; + } + + /* Copy bytecode */ + unit->byte_code_buf = (uint8_t *)unit + byte_code_offset; + unit->byte_code_len = tpl->byte_code_len; + memcpy (unit->byte_code_buf, tpl->byte_code_buf, tpl->byte_code_len); + + /* Copy debug info if present */ + if (tpl->has_debug) { + unit->debug.filename = tpl->debug.filename; + unit->debug.source_len = tpl->debug.source_len; + unit->debug.pc2line_len = tpl->debug.pc2line_len; + unit->debug.pc2line_buf = tpl->debug.pc2line_buf; + unit->debug.source = tpl->debug.source; + } + + /* Walk bytecode and patch global variable access opcodes */ + uint8_t *bc = unit->byte_code_buf; + int pos = 0; + + /* Get env record if provided */ + JSRecord *env_rec = NULL; + if (!JS_IsNull (env) && JS_IsRecord (env)) { + env_rec = (JSRecord *)JS_VALUE_GET_OBJ (env); + } + + while (pos < unit->byte_code_len) { + uint8_t op = bc[pos]; + int len = short_opcode_info (op).size; + + /* Patch OP_get_var -> OP_get_global_slot or OP_get_env_slot */ + if (op == OP_get_var || op == OP_get_var_undef) { + uint32_t cpool_idx = get_u32 (bc + pos + 1); + JSValue name = unit->cpool[cpool_idx]; + + /* Try env first (if provided) */ + if (env_rec) { + int slot = rec_find_slot (env_rec, name); + if (slot > 0) { + bc[pos] = OP_get_env_slot; + put_u16 (bc + pos + 1, (uint16_t)slot); + bc[pos + 3] = OP_nop; + bc[pos + 4] = OP_nop; + pos += len; + continue; + } + } + + /* Try global_obj (intrinsics like 'print') */ + JSRecord *global = (JSRecord *)JS_VALUE_GET_OBJ (ctx->global_obj); + int slot = rec_find_slot (global, name); + if (slot > 0) { + bc[pos] = OP_get_global_slot; + put_u16 (bc + pos + 1, (uint16_t)slot); + bc[pos + 3] = OP_nop; + bc[pos + 4] = OP_nop; + pos += len; + continue; + } + + /* Link error: variable not found */ + if (op == OP_get_var) { + char buf[64]; + JS_ThrowReferenceError (ctx, "'%s' is not defined", + JS_KeyGetStr (ctx, buf, sizeof (buf), name)); + pjs_free (unit); + return NULL; + } + } + + /* Patch OP_put_var family -> error (global is immutable) */ + if (op == OP_put_var || op == OP_put_var_init || op == OP_put_var_strict) { + uint32_t cpool_idx = get_u32 (bc + pos + 1); + JSValue name = unit->cpool[cpool_idx]; + char buf[64]; + JS_ThrowReferenceError (ctx, "cannot assign to '%s' - global object is immutable", + JS_KeyGetStr (ctx, buf, sizeof (buf), name)); + pjs_free (unit); + return NULL; + } + + /* Patch OP_check_var -> OP_nop (if variable exists) */ + if (op == OP_check_var) { + uint32_t cpool_idx = get_u32 (bc + pos + 1); + JSValue name = unit->cpool[cpool_idx]; + + BOOL found = FALSE; + if (env_rec && rec_find_slot (env_rec, name) > 0) { + found = TRUE; + } + if (!found) { + JSRecord *global = (JSRecord *)JS_VALUE_GET_OBJ (ctx->global_obj); + if (rec_find_slot (global, name) > 0) { + found = TRUE; + } + } + + if (found) { + bc[pos] = OP_nop; + bc[pos + 1] = OP_nop; + bc[pos + 2] = OP_nop; + bc[pos + 3] = OP_nop; + bc[pos + 4] = OP_nop; + } + } + + pos += len; + } + + return unit; +} + static __exception int next_token (JSParseState *s); static void free_token (JSParseState *s, JSToken *token) { @@ -16596,8 +16830,16 @@ static __exception int js_parse_program (JSParseState *s) { if (js_parse_source_element (s)) return -1; } - /* Implicit return null - use explicit 'return value' to return a value */ - emit_return (s, FALSE); + /* For eval-like semantics: if the last statement was an expression, + return its value instead of null. Expression statements emit OP_drop + to discard the value - remove that and emit OP_return instead. */ + if (get_prev_opcode (fd) == OP_drop) { + fd->byte_code.size = fd->last_opcode_pos; + fd->last_opcode_pos = -1; + emit_return (s, TRUE); + } else { + emit_return (s, FALSE); + } return 0; } @@ -16633,7 +16875,6 @@ JSValue JS_Compile (JSContext *ctx, const char *input, size_t input_len, Variables resolve: env first, then global intrinsics. */ JSValue JS_Integrate (JSContext *ctx, JSValue fun_obj, JSValue env) { JSValue ret_val; - JSValue saved_env; uint32_t tag; JSGCRef env_ref, fun_ref; JSFunctionBytecode *tpl, *linked_bc; @@ -16665,16 +16906,17 @@ JSValue JS_Integrate (JSContext *ctx, JSValue fun_obj, JSValue env) { JS_DeleteGCRef (ctx, &fun_ref); JS_DeleteGCRef (ctx, &env_ref); - /* Save and set eval environment for OP_get_env_slot */ - saved_env = ctx->eval_env; - ctx->eval_env = env; - - /* Create closure and execute */ + /* Create closure and set env_record on the function object */ linked = js_closure (ctx, linked, NULL); - ret_val = JS_Call (ctx, linked, ctx->global_obj, 0, NULL); + if (JS_IsException (linked)) { + return JS_EXCEPTION; + } - /* Restore env */ - ctx->eval_env = saved_env; + /* Store env_record on the function for OP_get_env_slot access */ + JSFunction *f = JS_VALUE_GET_FUNCTION (linked); + f->u.func.env_record = env; + + ret_val = JS_Call (ctx, linked, ctx->global_obj, 0, NULL); return ret_val; } @@ -17842,6 +18084,1257 @@ JSValue JS_ReadObject (JSContext *ctx, const uint8_t *buf, size_t buf_len, int f return obj; } +/*******************************************************************/ +/* JSCompiledUnit Serialization */ + +/* Magic number for compiled unit files */ +#define COMPILED_UNIT_MAGIC 0x43454C4C /* "CELL" */ +#define COMPILED_UNIT_VERSION 1 + +/* Write a JSCompiledUnit to a byte buffer. + Returns allocated buffer (caller must free), or NULL on error. + The format is: + - magic (4 bytes): "CELL" + - version (1 byte) + - flags (1 byte): has_debug + - local_count (2 bytes) + - stack_size (2 bytes) + - cpool_count (4 bytes) + - byte_code_len (4 bytes) + - cpool values (variable) + - bytecode (byte_code_len bytes) + - debug section (if has_debug) +*/ +uint8_t *JS_WriteCompiledUnit (JSContext *ctx, JSCompiledUnit *unit, size_t *out_len) { + DynBuf dbuf; + dbuf_init (&dbuf); + + /* Magic */ + dbuf_put_u32 (&dbuf, COMPILED_UNIT_MAGIC); + + /* Version */ + dbuf_putc (&dbuf, COMPILED_UNIT_VERSION); + + /* Flags */ + uint8_t flags = 0; + if (unit->has_debug) flags |= 1; + dbuf_putc (&dbuf, flags); + + /* Stack requirements */ + dbuf_put_u16 (&dbuf, unit->local_count); + dbuf_put_u16 (&dbuf, unit->stack_size); + + /* Counts */ + dbuf_put_u32 (&dbuf, unit->cpool_count); + dbuf_put_u32 (&dbuf, unit->byte_code_len); + + /* Write constant pool (simplified - just strings for now) */ + for (int i = 0; i < unit->cpool_count; i++) { + JSValue val = unit->cpool[i]; + uint32_t tag = JS_VALUE_GET_TAG (val); + + if (tag == JS_TAG_INT) { + dbuf_putc (&dbuf, 1); /* type: int */ + int32_t v = JS_VALUE_GET_INT (val); + dbuf_put_u32 (&dbuf, (uint32_t)v); + } else if (tag == JS_TAG_FLOAT64) { + dbuf_putc (&dbuf, 2); /* type: float */ + double d = JS_VALUE_GET_FLOAT64 (val); + dbuf_put (&dbuf, (uint8_t *)&d, sizeof (d)); + } else if (JS_IsText (val)) { + dbuf_putc (&dbuf, 3); /* type: string */ + const char *str = JS_ToCString (ctx, val); + if (str) { + size_t len = strlen (str); + dbuf_put_u32 (&dbuf, (uint32_t)len); + dbuf_put (&dbuf, (uint8_t *)str, len); + JS_FreeCString (ctx, str); + } else { + dbuf_put_u32 (&dbuf, 0); + } + } else { + dbuf_putc (&dbuf, 0); /* type: null/unsupported */ + } + } + + /* Write bytecode */ + dbuf_put (&dbuf, unit->byte_code_buf, unit->byte_code_len); + + /* Write debug section if present */ + if (unit->has_debug) { + /* Filename */ + const char *fname = JS_ToCString (ctx, unit->debug.filename); + if (fname) { + size_t len = strlen (fname); + dbuf_put_u32 (&dbuf, (uint32_t)len); + dbuf_put (&dbuf, (uint8_t *)fname, len); + JS_FreeCString (ctx, fname); + } else { + dbuf_put_u32 (&dbuf, 0); + } + + /* source_len, pc2line_len */ + dbuf_put_u32 (&dbuf, unit->debug.source_len); + dbuf_put_u32 (&dbuf, unit->debug.pc2line_len); + + /* pc2line_buf */ + if (unit->debug.pc2line_len > 0 && unit->debug.pc2line_buf) { + dbuf_put (&dbuf, unit->debug.pc2line_buf, unit->debug.pc2line_len); + } + + /* source */ + if (unit->debug.source_len > 0 && unit->debug.source) { + dbuf_put (&dbuf, (uint8_t *)unit->debug.source, unit->debug.source_len); + } + } + + *out_len = dbuf.size; + return dbuf.buf; +} + +/* Read a JSCompiledUnit from a byte buffer. + Returns unit on success, NULL on error. */ +JSCompiledUnit *JS_ReadCompiledUnit (JSContext *ctx, const uint8_t *buf, size_t buf_len) { + const uint8_t *p = buf; + const uint8_t *end = buf + buf_len; + + if (buf_len < 18) return NULL; /* Minimum header size */ + + /* Check magic */ + uint32_t magic = get_u32 (p); p += 4; + if (magic != COMPILED_UNIT_MAGIC) return NULL; + + /* Version */ + uint8_t version = *p++; + if (version != COMPILED_UNIT_VERSION) return NULL; + + /* Flags */ + uint8_t flags = *p++; + BOOL has_debug = (flags & 1) != 0; + + /* Stack requirements */ + uint16_t local_count = get_u16 (p); p += 2; + uint16_t stack_size = get_u16 (p); p += 2; + + /* Counts */ + uint32_t cpool_count = get_u32 (p); p += 4; + uint32_t byte_code_len = get_u32 (p); p += 4; + + /* Calculate allocation size */ + size_t unit_size; + if (has_debug) { + unit_size = sizeof (JSCompiledUnit); + } else { + unit_size = offsetof (JSCompiledUnit, debug); + } + size_t cpool_offset = unit_size; + unit_size += cpool_count * sizeof (JSValue); + size_t bc_offset = unit_size; + unit_size += byte_code_len; + + /* Allocate unit */ + JSCompiledUnit *unit = pjs_mallocz (unit_size); + if (!unit) return NULL; + + /* Initialize header */ + unit->header = objhdr_make (0, OBJ_CODE, false, false, false, false); + unit->has_debug = has_debug; + unit->read_only_bytecode = 0; + unit->local_count = local_count; + unit->stack_size = stack_size; + unit->cpool_count = cpool_count; + unit->byte_code_len = byte_code_len; + + /* Setup pointers */ + if (cpool_count > 0) { + unit->cpool = (JSValue *)((uint8_t *)unit + cpool_offset); + } else { + unit->cpool = NULL; + } + unit->byte_code_buf = (uint8_t *)unit + bc_offset; + + /* Read constant pool */ + for (uint32_t i = 0; i < cpool_count; i++) { + if (p >= end) goto fail; + uint8_t type = *p++; + + switch (type) { + case 0: /* null */ + unit->cpool[i] = JS_NULL; + break; + case 1: /* int */ + if (p + 4 > end) goto fail; + unit->cpool[i] = JS_NewInt32 (ctx, (int32_t)get_u32 (p)); + p += 4; + break; + case 2: /* float */ + if (p + 8 > end) goto fail; + { + double d; + memcpy (&d, p, sizeof (d)); + unit->cpool[i] = JS_NewFloat64 (ctx, d); + p += 8; + } + break; + case 3: /* string */ + if (p + 4 > end) goto fail; + { + uint32_t len = get_u32 (p); p += 4; + if (p + len > end) goto fail; + unit->cpool[i] = JS_NewStringLen (ctx, (const char *)p, len); + p += len; + } + break; + default: + unit->cpool[i] = JS_NULL; + break; + } + } + + /* Read bytecode */ + if (p + byte_code_len > end) goto fail; + memcpy (unit->byte_code_buf, p, byte_code_len); + p += byte_code_len; + + /* Read debug section if present */ + if (has_debug) { + /* Filename */ + if (p + 4 > end) goto fail; + uint32_t fname_len = get_u32 (p); p += 4; + if (p + fname_len > end) goto fail; + if (fname_len > 0) { + unit->debug.filename = JS_NewStringLen (ctx, (const char *)p, fname_len); + } else { + unit->debug.filename = JS_NULL; + } + p += fname_len; + + /* source_len, pc2line_len */ + if (p + 8 > end) goto fail; + unit->debug.source_len = get_u32 (p); p += 4; + unit->debug.pc2line_len = get_u32 (p); p += 4; + + /* pc2line_buf */ + if (unit->debug.pc2line_len > 0) { + if (p + unit->debug.pc2line_len > end) goto fail; + unit->debug.pc2line_buf = js_malloc (ctx, unit->debug.pc2line_len); + if (!unit->debug.pc2line_buf) goto fail; + memcpy (unit->debug.pc2line_buf, p, unit->debug.pc2line_len); + p += unit->debug.pc2line_len; + } else { + unit->debug.pc2line_buf = NULL; + } + + /* source */ + if (unit->debug.source_len > 0) { + if (p + unit->debug.source_len > end) goto fail; + unit->debug.source = js_malloc (ctx, unit->debug.source_len + 1); + if (!unit->debug.source) goto fail; + memcpy (unit->debug.source, p, unit->debug.source_len); + unit->debug.source[unit->debug.source_len] = '\0'; + p += unit->debug.source_len; + } else { + unit->debug.source = NULL; + } + } + + return unit; + +fail: + pjs_free (unit); + return NULL; +} + +/*******************************************************************/ +/* CellModule Serialization (context-neutral) */ + +/* Free a CellModule and all its contents */ +void cell_module_free (CellModule *mod) { + if (!mod) return; + + /* Free string table */ + if (mod->string_data) pjs_free (mod->string_data); + if (mod->string_offsets) pjs_free (mod->string_offsets); + + /* Free units */ + if (mod->units) { + for (uint32_t i = 0; i < mod->unit_count; i++) { + CellUnit *u = &mod->units[i]; + if (u->constants) pjs_free (u->constants); + if (u->bytecode) pjs_free (u->bytecode); + if (u->upvalues) pjs_free (u->upvalues); + if (u->externals) pjs_free (u->externals); + if (u->pc2line) pjs_free (u->pc2line); + } + pjs_free (mod->units); + } + + /* Free source */ + if (mod->source) pjs_free (mod->source); + + pjs_free (mod); +} + +/* Write a CellModule to a byte buffer (context-neutral format). + Returns allocated buffer (caller must free with pjs_free), or NULL on error. + Format: + - magic (4 bytes): 0x4C4C4543 "CELL" + - version (1 byte) + - flags (1 byte) + - string_count (4 bytes) + - string_data_size (4 bytes) + - string_data (string_data_size bytes) + - string_offsets (string_count * 4 bytes) + - unit_count (4 bytes) + - for each unit: + - const_count (4 bytes) + - for each const: type (1 byte), value (4-8 bytes) + - bytecode_len (4 bytes) + - bytecode (bytecode_len bytes) + - arg_count (2 bytes) + - var_count (2 bytes) + - stack_size (2 bytes) + - upvalue_count (2 bytes) + - for each upvalue: kind (1 byte), index (2 bytes) + - external_count (4 bytes) + - for each external: pc_offset (4), name_sid (4), kind (1) + - pc2line_len (4 bytes) + - pc2line (pc2line_len bytes) + - name_sid (4 bytes) + - source_len (4 bytes) + - source (source_len bytes) +*/ +uint8_t *cell_module_write (CellModule *mod, size_t *out_len) { + DynBuf buf; + dbuf_init (&buf); + + /* Header */ + dbuf_put_u32 (&buf, mod->magic); + dbuf_putc (&buf, mod->version); + dbuf_putc (&buf, mod->flags); + + /* String table */ + dbuf_put_u32 (&buf, mod->string_count); + dbuf_put_u32 (&buf, mod->string_data_size); + if (mod->string_data_size > 0 && mod->string_data) { + dbuf_put (&buf, mod->string_data, mod->string_data_size); + } + if (mod->string_count > 0 && mod->string_offsets) { + for (uint32_t i = 0; i < mod->string_count; i++) { + dbuf_put_u32 (&buf, mod->string_offsets[i]); + } + } + + /* Units */ + dbuf_put_u32 (&buf, mod->unit_count); + for (uint32_t u = 0; u < mod->unit_count; u++) { + CellUnit *unit = &mod->units[u]; + + /* Constants */ + dbuf_put_u32 (&buf, unit->const_count); + for (uint32_t c = 0; c < unit->const_count; c++) { + CellConst *cc = &unit->constants[c]; + dbuf_putc (&buf, cc->type); + switch (cc->type) { + case CELL_CONST_NULL: + break; + case CELL_CONST_INT: + dbuf_put_u32 (&buf, (uint32_t)cc->i32); + break; + case CELL_CONST_FLOAT: { + JSFloat64Union fu; + fu.d = cc->f64; + dbuf_put_u32 (&buf, (uint32_t)(fu.u64 & 0xFFFFFFFF)); + dbuf_put_u32 (&buf, (uint32_t)(fu.u64 >> 32)); + break; + } + case CELL_CONST_STRING: + case CELL_CONST_UNIT: + dbuf_put_u32 (&buf, cc->string_sid); + break; + } + } + + /* Bytecode */ + dbuf_put_u32 (&buf, unit->bytecode_len); + if (unit->bytecode_len > 0 && unit->bytecode) { + dbuf_put (&buf, unit->bytecode, unit->bytecode_len); + } + + /* Stack requirements */ + dbuf_put_u16 (&buf, unit->arg_count); + dbuf_put_u16 (&buf, unit->var_count); + dbuf_put_u16 (&buf, unit->stack_size); + + /* Upvalues */ + dbuf_put_u16 (&buf, unit->upvalue_count); + for (uint16_t i = 0; i < unit->upvalue_count; i++) { + dbuf_putc (&buf, unit->upvalues[i].kind); + dbuf_put_u16 (&buf, unit->upvalues[i].index); + } + + /* Externals */ + dbuf_put_u32 (&buf, unit->external_count); + for (uint32_t i = 0; i < unit->external_count; i++) { + dbuf_put_u32 (&buf, unit->externals[i].pc_offset); + dbuf_put_u32 (&buf, unit->externals[i].name_sid); + dbuf_putc (&buf, unit->externals[i].kind); + } + + /* Debug */ + dbuf_put_u32 (&buf, unit->pc2line_len); + if (unit->pc2line_len > 0 && unit->pc2line) { + dbuf_put (&buf, unit->pc2line, unit->pc2line_len); + } + dbuf_put_u32 (&buf, unit->name_sid); + } + + /* Source */ + dbuf_put_u32 (&buf, mod->source_len); + if (mod->source_len > 0 && mod->source) { + dbuf_put (&buf, (uint8_t *)mod->source, mod->source_len); + } + + if (buf.error) { + dbuf_free (&buf); + *out_len = 0; + return NULL; + } + + *out_len = buf.size; + return buf.buf; +} + +/* Read a CellModule from a byte buffer. + Returns allocated CellModule (caller must free with cell_module_free), or NULL on error. */ +CellModule *cell_module_read (const uint8_t *buf, size_t buf_len) { + const uint8_t *p = buf; + const uint8_t *end = buf + buf_len; + + if (buf_len < 14) return NULL; /* minimum header size */ + + CellModule *mod = pjs_mallocz (sizeof (CellModule)); + if (!mod) return NULL; + + /* Header */ + mod->magic = get_u32 (p); p += 4; + if (mod->magic != CELL_MODULE_MAGIC) goto fail; + mod->version = *p++; + if (mod->version != CELL_MODULE_VERSION) goto fail; + mod->flags = *p++; + + /* String table */ + if (p + 8 > end) goto fail; + mod->string_count = get_u32 (p); p += 4; + mod->string_data_size = get_u32 (p); p += 4; + + if (mod->string_data_size > 0) { + if (p + mod->string_data_size > end) goto fail; + mod->string_data = pjs_malloc (mod->string_data_size); + if (!mod->string_data) goto fail; + memcpy (mod->string_data, p, mod->string_data_size); + p += mod->string_data_size; + } + + if (mod->string_count > 0) { + if (p + mod->string_count * 4 > end) goto fail; + mod->string_offsets = pjs_malloc (mod->string_count * sizeof (uint32_t)); + if (!mod->string_offsets) goto fail; + for (uint32_t i = 0; i < mod->string_count; i++) { + mod->string_offsets[i] = get_u32 (p); p += 4; + } + } + + /* Units */ + if (p + 4 > end) goto fail; + mod->unit_count = get_u32 (p); p += 4; + + if (mod->unit_count > 0) { + mod->units = pjs_mallocz (mod->unit_count * sizeof (CellUnit)); + if (!mod->units) goto fail; + + for (uint32_t u = 0; u < mod->unit_count; u++) { + CellUnit *unit = &mod->units[u]; + + /* Constants */ + if (p + 4 > end) goto fail; + unit->const_count = get_u32 (p); p += 4; + + if (unit->const_count > 0) { + unit->constants = pjs_mallocz (unit->const_count * sizeof (CellConst)); + if (!unit->constants) goto fail; + + for (uint32_t c = 0; c < unit->const_count; c++) { + if (p + 1 > end) goto fail; + unit->constants[c].type = *p++; + switch (unit->constants[c].type) { + case CELL_CONST_NULL: + break; + case CELL_CONST_INT: + if (p + 4 > end) goto fail; + unit->constants[c].i32 = (int32_t)get_u32 (p); p += 4; + break; + case CELL_CONST_FLOAT: { + if (p + 8 > end) goto fail; + JSFloat64Union fu; + fu.u64 = get_u32 (p); + fu.u64 |= ((uint64_t)get_u32 (p + 4)) << 32; + p += 8; + unit->constants[c].f64 = fu.d; + break; + } + case CELL_CONST_STRING: + case CELL_CONST_UNIT: + if (p + 4 > end) goto fail; + unit->constants[c].string_sid = get_u32 (p); p += 4; + break; + } + } + } + + /* Bytecode */ + if (p + 4 > end) goto fail; + unit->bytecode_len = get_u32 (p); p += 4; + + if (unit->bytecode_len > 0) { + if (p + unit->bytecode_len > end) goto fail; + unit->bytecode = pjs_malloc (unit->bytecode_len); + if (!unit->bytecode) goto fail; + memcpy (unit->bytecode, p, unit->bytecode_len); + p += unit->bytecode_len; + } + + /* Stack requirements */ + if (p + 6 > end) goto fail; + unit->arg_count = get_u16 (p); p += 2; + unit->var_count = get_u16 (p); p += 2; + unit->stack_size = get_u16 (p); p += 2; + + /* Upvalues */ + if (p + 2 > end) goto fail; + unit->upvalue_count = get_u16 (p); p += 2; + + if (unit->upvalue_count > 0) { + unit->upvalues = pjs_malloc (unit->upvalue_count * sizeof (CellCapDesc)); + if (!unit->upvalues) goto fail; + for (uint16_t i = 0; i < unit->upvalue_count; i++) { + if (p + 3 > end) goto fail; + unit->upvalues[i].kind = *p++; + unit->upvalues[i].index = get_u16 (p); p += 2; + } + } + + /* Externals */ + if (p + 4 > end) goto fail; + unit->external_count = get_u32 (p); p += 4; + + if (unit->external_count > 0) { + unit->externals = pjs_malloc (unit->external_count * sizeof (CellExternalReloc)); + if (!unit->externals) goto fail; + for (uint32_t i = 0; i < unit->external_count; i++) { + if (p + 9 > end) goto fail; + unit->externals[i].pc_offset = get_u32 (p); p += 4; + unit->externals[i].name_sid = get_u32 (p); p += 4; + unit->externals[i].kind = *p++; + } + } + + /* Debug */ + if (p + 4 > end) goto fail; + unit->pc2line_len = get_u32 (p); p += 4; + + if (unit->pc2line_len > 0) { + if (p + unit->pc2line_len > end) goto fail; + unit->pc2line = pjs_malloc (unit->pc2line_len); + if (!unit->pc2line) goto fail; + memcpy (unit->pc2line, p, unit->pc2line_len); + p += unit->pc2line_len; + } + + if (p + 4 > end) goto fail; + unit->name_sid = get_u32 (p); p += 4; + } + } + + /* Source */ + if (p + 4 > end) goto fail; + mod->source_len = get_u32 (p); p += 4; + + if (mod->source_len > 0) { + if (p + mod->source_len > end) goto fail; + mod->source = pjs_malloc (mod->source_len + 1); + if (!mod->source) goto fail; + memcpy (mod->source, p, mod->source_len); + mod->source[mod->source_len] = '\0'; + p += mod->source_len; + } + + return mod; + +fail: + cell_module_free (mod); + return NULL; +} + +/* Helper: get string from CellModule string table */ +static const char *cell_module_get_string (CellModule *mod, uint32_t sid, uint32_t *out_len) { + if (sid >= mod->string_count) return NULL; + uint32_t offset = mod->string_offsets[sid]; + uint32_t next_offset = (sid + 1 < mod->string_count) + ? mod->string_offsets[sid + 1] + : mod->string_data_size; + *out_len = next_offset - offset; + return (const char *)(mod->string_data + offset); +} + +/* Integrate a CellModule with an environment and execute. + This materializes the string table into the target context's stone arena, + creates runtime bytecode, patches external relocations, and returns the + main unit wrapped as a callable function. + + Parameters: + - ctx: target context + - mod: context-neutral module (ownership NOT transferred) + - env: stoned record for environment (or JS_NULL) + + Returns: callable function value, or JS_EXCEPTION on error. +*/ +JSValue cell_module_integrate (JSContext *ctx, CellModule *mod, JSValue env) { + JSValue *string_table = NULL; + JSFunctionBytecode **units = NULL; + JSValue result = JS_EXCEPTION; + uint32_t i, j; + + if (mod->unit_count == 0) { + JS_ThrowTypeError (ctx, "module has no units"); + return JS_EXCEPTION; + } + + /* Step 1: Materialize string table into context's stone arena */ + if (mod->string_count > 0) { + string_table = pjs_mallocz (mod->string_count * sizeof (JSValue)); + if (!string_table) goto fail; + + for (i = 0; i < mod->string_count; i++) { + uint32_t len; + const char *str = cell_module_get_string (mod, i, &len); + if (!str) { + string_table[i] = JS_NULL; + } else { + /* Intern as a stoned key */ + string_table[i] = js_key_new_len (ctx, str, len); + } + } + } + + /* Step 2: Create JSFunctionBytecode for each unit */ + units = pjs_mallocz (mod->unit_count * sizeof (JSFunctionBytecode *)); + if (!units) goto fail; + + for (i = 0; i < mod->unit_count; i++) { + CellUnit *cu = &mod->units[i]; + + /* Calculate bytecode structure size */ + int function_size = sizeof (JSFunctionBytecode); + int cpool_offset = function_size; + function_size += cu->const_count * sizeof (JSValue); + int byte_code_offset = function_size; + function_size += cu->bytecode_len; + + JSFunctionBytecode *b = pjs_mallocz (function_size); + if (!b) goto fail; + units[i] = b; + + /* Initialize header */ + b->header = objhdr_make (0, OBJ_CODE, false, false, false, false); + b->arg_count = cu->arg_count; + b->var_count = cu->var_count; + b->defined_arg_count = cu->arg_count; /* Same as arg_count for simple functions */ + b->has_simple_parameter_list = 1; /* Assume simple parameter list */ + b->stack_size = cu->stack_size; + b->cpool_count = cu->const_count; + b->byte_code_len = cu->bytecode_len; + b->realm = ctx; + + /* Set up pointers */ + b->cpool = (JSValue *)((uint8_t *)b + cpool_offset); + b->byte_code_buf = (uint8_t *)b + byte_code_offset; + + /* Materialize constants */ + for (j = 0; j < cu->const_count; j++) { + CellConst *cc = &cu->constants[j]; + switch (cc->type) { + case CELL_CONST_NULL: + b->cpool[j] = JS_NULL; + break; + case CELL_CONST_INT: + b->cpool[j] = JS_NewInt32 (ctx, cc->i32); + break; + case CELL_CONST_FLOAT: + b->cpool[j] = JS_NewFloat64 (ctx, cc->f64); + break; + case CELL_CONST_STRING: + if (cc->string_sid < mod->string_count) { + b->cpool[j] = string_table[cc->string_sid]; + } else { + b->cpool[j] = JS_NULL; + } + break; + case CELL_CONST_UNIT: + /* Will be patched after all units are created */ + b->cpool[j] = JS_NULL; + break; + } + } + + /* Copy bytecode */ + memcpy (b->byte_code_buf, cu->bytecode, cu->bytecode_len); + + /* Set function name from string table */ + if (cu->name_sid < mod->string_count) { + b->func_name = string_table[cu->name_sid]; + } else { + b->func_name = JS_KEY_empty; + } + } + + /* Step 3: Patch unit references in cpool */ + for (i = 0; i < mod->unit_count; i++) { + CellUnit *cu = &mod->units[i]; + JSFunctionBytecode *b = units[i]; + + for (j = 0; j < cu->const_count; j++) { + if (cu->constants[j].type == CELL_CONST_UNIT) { + uint32_t uid = cu->constants[j].unit_id; + if (uid < mod->unit_count) { + b->cpool[j] = JS_MKPTR (units[uid]); + } + } + } + } + + /* Step 4: Patch external relocations for main unit (unit 0) */ + { + CellUnit *cu = &mod->units[0]; + JSFunctionBytecode *b = units[0]; + uint8_t *bc = b->byte_code_buf; + + /* Get env record if provided */ + JSRecord *env_rec = NULL; + if (!JS_IsNull (env) && JS_IsRecord (env)) { + env_rec = (JSRecord *)JS_VALUE_GET_OBJ (env); + } + + for (j = 0; j < cu->external_count; j++) { + CellExternalReloc *rel = &cu->externals[j]; + uint32_t pc = rel->pc_offset; + uint32_t name_sid = rel->name_sid; + + if (name_sid >= mod->string_count) continue; + JSValue name = string_table[name_sid]; + + if (rel->kind == EXT_GET) { + /* Try env first */ + if (env_rec) { + int slot = rec_find_slot (env_rec, name); + if (slot > 0) { + bc[pc] = OP_get_env_slot; + put_u16 (bc + pc + 1, (uint16_t)slot); + bc[pc + 3] = OP_nop; + bc[pc + 4] = OP_nop; + continue; + } + } + + /* Try global */ + JSRecord *global = (JSRecord *)JS_VALUE_GET_OBJ (ctx->global_obj); + int slot = rec_find_slot (global, name); + if (slot > 0) { + bc[pc] = OP_get_global_slot; + put_u16 (bc + pc + 1, (uint16_t)slot); + bc[pc + 3] = OP_nop; + bc[pc + 4] = OP_nop; + continue; + } + + /* Link error */ + char buf[64]; + JS_ThrowReferenceError (ctx, "'%s' is not defined", + JS_KeyGetStr (ctx, buf, sizeof (buf), name)); + goto fail; + + } else if (rel->kind == EXT_SET) { + /* Try env first (writable) */ + if (env_rec) { + int slot = rec_find_slot (env_rec, name); + if (slot > 0) { + bc[pc] = OP_set_env_slot; + put_u16 (bc + pc + 1, (uint16_t)slot); + bc[pc + 3] = OP_nop; + bc[pc + 4] = OP_nop; + continue; + } + } + + /* Try global */ + JSRecord *global = (JSRecord *)JS_VALUE_GET_OBJ (ctx->global_obj); + int slot = rec_find_slot (global, name); + if (slot > 0) { + bc[pc] = OP_set_global_slot; + put_u16 (bc + pc + 1, (uint16_t)slot); + bc[pc + 3] = OP_nop; + bc[pc + 4] = OP_nop; + continue; + } + + /* Link error */ + char buf[64]; + JS_ThrowReferenceError (ctx, "cannot assign to '%s' - not found", + JS_KeyGetStr (ctx, buf, sizeof (buf), name)); + goto fail; + } + } + } + + /* Step 5: Create closure from main unit and set env_record */ + { + JSValue linked = JS_MKPTR (units[0]); + linked = js_closure (ctx, linked, NULL); + if (JS_IsException (linked)) goto fail; + + /* Set env_record on the function */ + JSFunction *f = JS_VALUE_GET_FUNCTION (linked); + f->u.func.env_record = env; + + result = linked; + } + + /* Success - don't free units (now owned by result closure) */ + pjs_free (string_table); + pjs_free (units); + return result; + +fail: + /* Free allocated units on failure */ + if (units) { + for (i = 0; i < mod->unit_count; i++) { + if (units[i]) pjs_free (units[i]); + } + pjs_free (units); + } + if (string_table) pjs_free (string_table); + return JS_EXCEPTION; +} + +/*******************************************************************/ +/* JSFunctionBytecode to CellModule conversion */ + +/* Helper structure for building string table */ +typedef struct { + JSValue *strings; /* array of JSValue strings */ + uint32_t count; + uint32_t capacity; +} StringTableBuilder; + +static void stb_init (StringTableBuilder *stb) { + stb->strings = NULL; + stb->count = 0; + stb->capacity = 0; +} + +static void stb_free (StringTableBuilder *stb) { + if (stb->strings) pjs_free (stb->strings); + stb->strings = NULL; + stb->count = 0; + stb->capacity = 0; +} + +/* Add a string to the builder, return its string_id. + If string already exists, return existing id. */ +static uint32_t stb_add (StringTableBuilder *stb, JSValue str) { + /* Check if already present */ + for (uint32_t i = 0; i < stb->count; i++) { + if (stb->strings[i] == str) return i; + } + + /* Add new entry */ + if (stb->count >= stb->capacity) { + uint32_t new_cap = stb->capacity ? stb->capacity * 2 : 16; + JSValue *new_arr = pjs_realloc (stb->strings, new_cap * sizeof (JSValue)); + if (!new_arr) return UINT32_MAX; + stb->strings = new_arr; + stb->capacity = new_cap; + } + + stb->strings[stb->count] = str; + return stb->count++; +} + +/* Helper structure for collecting bytecodes */ +typedef struct { + JSFunctionBytecode **funcs; + uint32_t count; + uint32_t capacity; +} FuncCollector; + +static void fc_init (FuncCollector *fc) { + fc->funcs = NULL; + fc->count = 0; + fc->capacity = 0; +} + +static void fc_free (FuncCollector *fc) { + if (fc->funcs) pjs_free (fc->funcs); + fc->funcs = NULL; + fc->count = 0; + fc->capacity = 0; +} + +/* Add a function to collector, return its unit_id */ +static uint32_t fc_add (FuncCollector *fc, JSFunctionBytecode *b) { + /* Check if already present */ + for (uint32_t i = 0; i < fc->count; i++) { + if (fc->funcs[i] == b) return i; + } + + /* Add new entry */ + if (fc->count >= fc->capacity) { + uint32_t new_cap = fc->capacity ? fc->capacity * 2 : 8; + JSFunctionBytecode **new_arr = pjs_realloc (fc->funcs, new_cap * sizeof (JSFunctionBytecode *)); + if (!new_arr) return UINT32_MAX; + fc->funcs = new_arr; + fc->capacity = new_cap; + } + + fc->funcs[fc->count] = b; + return fc->count++; +} + +/* Recursively collect all functions in bytecode tree */ +static int collect_functions (FuncCollector *fc, JSFunctionBytecode *b) { + uint32_t uid = fc_add (fc, b); + if (uid == UINT32_MAX) return -1; + + /* Scan cpool for nested functions */ + for (int i = 0; i < b->cpool_count; i++) { + JSValue v = b->cpool[i]; + if (JS_VALUE_GET_TAG (v) == JS_TAG_PTR) { + void *ptr = JS_VALUE_GET_PTR (v); + objhdr_t hdr = *(objhdr_t *)ptr; + if (objhdr_type (hdr) == OBJ_CODE) { + /* This is a nested function */ + if (collect_functions (fc, (JSFunctionBytecode *)ptr) < 0) + return -1; + } + } + } + return 0; +} + +/* Collect all strings from a function's cpool and name */ +static int collect_strings (StringTableBuilder *stb, JSFunctionBytecode *b) { + /* Function name */ + if (JS_IsText (b->func_name)) { + if (stb_add (stb, b->func_name) == UINT32_MAX) return -1; + } + + /* Filename */ + if (b->has_debug && JS_IsText (b->debug.filename)) { + if (stb_add (stb, b->debug.filename) == UINT32_MAX) return -1; + } + + /* Cpool strings */ + for (int i = 0; i < b->cpool_count; i++) { + JSValue v = b->cpool[i]; + if (JS_IsText (v)) { + if (stb_add (stb, v) == UINT32_MAX) return -1; + } + } + + /* Variable names (for debugging) */ + if (b->vardefs) { + for (int i = 0; i < b->arg_count + b->var_count; i++) { + if (JS_IsText (b->vardefs[i].var_name)) { + if (stb_add (stb, b->vardefs[i].var_name) == UINT32_MAX) return -1; + } + } + } + + /* Closure variable names */ + if (b->closure_var) { + for (int i = 0; i < b->closure_var_count; i++) { + if (JS_IsText (b->closure_var[i].var_name)) { + if (stb_add (stb, b->closure_var[i].var_name) == UINT32_MAX) return -1; + } + } + } + + return 0; +} + +/* Find string_id for a JSValue string */ +static uint32_t find_string_id (StringTableBuilder *stb, JSValue str) { + for (uint32_t i = 0; i < stb->count; i++) { + if (stb->strings[i] == str) return i; + } + return UINT32_MAX; +} + +/* Find unit_id for a JSFunctionBytecode */ +static uint32_t find_unit_id (FuncCollector *fc, JSFunctionBytecode *b) { + for (uint32_t i = 0; i < fc->count; i++) { + if (fc->funcs[i] == b) return i; + } + return UINT32_MAX; +} + +/* Convert JSFunctionBytecode tree to CellModule. + This extracts all functions, builds a shared string table, + and creates external relocations for unresolved variables. + + Parameters: + - ctx: context (for string conversion) + - main_func: compiled main function bytecode + + Returns: allocated CellModule (caller must free with cell_module_free), or NULL on error. +*/ +CellModule *cell_module_from_bytecode (JSContext *ctx, JSFunctionBytecode *main_func) { + CellModule *mod = NULL; + StringTableBuilder stb; + FuncCollector fc; + DynBuf string_data; + uint32_t i, j; + + stb_init (&stb); + fc_init (&fc); + dbuf_init (&string_data); + + /* Step 1: Collect all functions */ + if (collect_functions (&fc, main_func) < 0) goto fail; + + /* Step 2: Collect all strings from all functions */ + for (i = 0; i < fc.count; i++) { + if (collect_strings (&stb, fc.funcs[i]) < 0) goto fail; + } + + /* Step 3: Allocate module */ + mod = pjs_mallocz (sizeof (CellModule)); + if (!mod) goto fail; + + mod->magic = CELL_MODULE_MAGIC; + mod->version = CELL_MODULE_VERSION; + mod->flags = 0; + + /* Step 4: Build string table data */ + mod->string_count = stb.count; + if (stb.count > 0) { + mod->string_offsets = pjs_malloc (stb.count * sizeof (uint32_t)); + if (!mod->string_offsets) goto fail; + + for (i = 0; i < stb.count; i++) { + mod->string_offsets[i] = string_data.size; + + /* Convert JSValue string to UTF-8 */ + const char *cstr = JS_ToCString (ctx, stb.strings[i]); + if (cstr) { + size_t len = strlen (cstr); + dbuf_put (&string_data, (uint8_t *)cstr, len); + JS_FreeCString (ctx, cstr); + } + } + + if (string_data.error) goto fail; + + mod->string_data_size = string_data.size; + mod->string_data = string_data.buf; + string_data.buf = NULL; /* Transfer ownership */ + } + + /* Step 5: Create units */ + mod->unit_count = fc.count; + mod->units = pjs_mallocz (fc.count * sizeof (CellUnit)); + if (!mod->units) goto fail; + + for (i = 0; i < fc.count; i++) { + JSFunctionBytecode *b = fc.funcs[i]; + CellUnit *cu = &mod->units[i]; + + /* Function name */ + if (JS_IsText (b->func_name)) { + cu->name_sid = find_string_id (&stb, b->func_name); + } else { + cu->name_sid = UINT32_MAX; + } + + /* Stack requirements */ + cu->arg_count = b->arg_count; + cu->var_count = b->var_count; + cu->stack_size = b->stack_size; + + /* Copy bytecode */ + cu->bytecode_len = b->byte_code_len; + if (b->byte_code_len > 0) { + cu->bytecode = pjs_malloc (b->byte_code_len); + if (!cu->bytecode) goto fail; + memcpy (cu->bytecode, b->byte_code_buf, b->byte_code_len); + } + + /* Build constants */ + cu->const_count = b->cpool_count; + if (b->cpool_count > 0) { + cu->constants = pjs_mallocz (b->cpool_count * sizeof (CellConst)); + if (!cu->constants) goto fail; + + for (j = 0; j < (uint32_t)b->cpool_count; j++) { + JSValue v = b->cpool[j]; + CellConst *cc = &cu->constants[j]; + + if (JS_IsNull (v)) { + cc->type = CELL_CONST_NULL; + } else if (JS_VALUE_GET_TAG (v) == JS_TAG_INT) { + cc->type = CELL_CONST_INT; + cc->i32 = JS_VALUE_GET_INT (v); + } else if (JS_VALUE_GET_TAG (v) == JS_TAG_FLOAT64) { + cc->type = CELL_CONST_FLOAT; + cc->f64 = JS_VALUE_GET_FLOAT64 (v); + } else if (JS_IsText (v)) { + cc->type = CELL_CONST_STRING; + cc->string_sid = find_string_id (&stb, v); + } else if (JS_VALUE_GET_TAG (v) == JS_TAG_PTR) { + void *ptr = JS_VALUE_GET_PTR (v); + objhdr_t hdr = *(objhdr_t *)ptr; + if (objhdr_type (hdr) == OBJ_CODE) { + /* Nested function reference */ + cc->type = CELL_CONST_UNIT; + cc->unit_id = find_unit_id (&fc, (JSFunctionBytecode *)ptr); + } else { + cc->type = CELL_CONST_NULL; + } + } else { + cc->type = CELL_CONST_NULL; + } + } + } + + /* Build upvalue descriptors from closure_var */ + cu->upvalue_count = b->closure_var_count; + if (b->closure_var_count > 0 && b->closure_var) { + cu->upvalues = pjs_malloc (b->closure_var_count * sizeof (CellCapDesc)); + if (!cu->upvalues) goto fail; + + for (j = 0; j < (uint32_t)b->closure_var_count; j++) { + JSClosureVar *cv = &b->closure_var[j]; + cu->upvalues[j].kind = cv->is_local ? CAP_FROM_PARENT_LOCAL : CAP_FROM_PARENT_UPVALUE; + cu->upvalues[j].index = cv->var_idx; + } + } + + /* Scan bytecode for external relocations (OP_get_var, OP_put_var, etc.) */ + { + DynBuf relocs; + dbuf_init (&relocs); + + uint8_t *bc = cu->bytecode; + int pos = 0; + while (pos < (int)cu->bytecode_len) { + uint8_t op = bc[pos]; + int len = short_opcode_info (op).size; + + if (op == OP_get_var || op == OP_get_var_undef) { + CellExternalReloc rel; + rel.pc_offset = pos; + uint32_t cpool_idx = get_u32 (bc + pos + 1); + if (cpool_idx < (uint32_t)b->cpool_count && JS_IsText (b->cpool[cpool_idx])) { + rel.name_sid = find_string_id (&stb, b->cpool[cpool_idx]); + } else { + rel.name_sid = UINT32_MAX; + } + rel.kind = EXT_GET; + dbuf_put (&relocs, (uint8_t *)&rel, sizeof (rel)); + } else if (op == OP_put_var || op == OP_put_var_init || op == OP_put_var_strict) { + CellExternalReloc rel; + rel.pc_offset = pos; + uint32_t cpool_idx = get_u32 (bc + pos + 1); + if (cpool_idx < (uint32_t)b->cpool_count && JS_IsText (b->cpool[cpool_idx])) { + rel.name_sid = find_string_id (&stb, b->cpool[cpool_idx]); + } else { + rel.name_sid = UINT32_MAX; + } + rel.kind = EXT_SET; + dbuf_put (&relocs, (uint8_t *)&rel, sizeof (rel)); + } + + pos += len; + } + + if (relocs.size > 0 && !relocs.error) { + cu->external_count = relocs.size / sizeof (CellExternalReloc); + cu->externals = (CellExternalReloc *)relocs.buf; + relocs.buf = NULL; /* Transfer ownership */ + } + dbuf_free (&relocs); + } + + /* Copy debug info */ + if (b->has_debug) { + cu->pc2line_len = b->debug.pc2line_len; + if (b->debug.pc2line_len > 0 && b->debug.pc2line_buf) { + cu->pc2line = pjs_malloc (b->debug.pc2line_len); + if (!cu->pc2line) goto fail; + memcpy (cu->pc2line, b->debug.pc2line_buf, b->debug.pc2line_len); + } + } + } + + /* Step 6: Copy source from main function */ + if (main_func->has_debug && main_func->debug.source_len > 0 && main_func->debug.source) { + mod->source_len = main_func->debug.source_len; + mod->source = pjs_malloc (mod->source_len + 1); + if (!mod->source) goto fail; + memcpy (mod->source, main_func->debug.source, mod->source_len); + mod->source[mod->source_len] = '\0'; + } + + /* Success */ + stb_free (&stb); + fc_free (&fc); + dbuf_free (&string_data); + return mod; + +fail: + stb_free (&stb); + fc_free (&fc); + dbuf_free (&string_data); + if (mod) cell_module_free (mod); + return NULL; +} + +/* Compile source code directly to CellModule (context-neutral format). + This is a convenience function that combines JS_Compile + cell_module_from_bytecode. + + Parameters: + - ctx: context for compilation + - input: source code (must be null-terminated) + - input_len: length of source code + - filename: source filename for debug info + + Returns: allocated CellModule (caller must free with cell_module_free), or NULL on error. +*/ +CellModule *JS_CompileModule (JSContext *ctx, const char *input, size_t input_len, const char *filename) { + JSValue bytecode = JS_Compile (ctx, input, input_len, filename); + if (JS_IsException (bytecode)) { + return NULL; + } + + JSFunctionBytecode *b = JS_VALUE_GET_PTR (bytecode); + CellModule *mod = cell_module_from_bytecode (ctx, b); + + /* Note: bytecode is not freed here - it's still valid and could be used + with JS_Integrate if desired. Caller can free it if not needed. */ + + return mod; +} + /*******************************************************************/ /* runtime functions & objects */ @@ -24220,7 +25713,6 @@ static void JS_AddIntrinsicBaseObjects (JSContext *ctx) { ctx->throw_type_error = JS_NewCFunction (ctx, js_throw_type_error, NULL, 0); ctx->global_obj = JS_NewObject (ctx); - ctx->eval_env = JS_NULL; /* no eval environment by default */ /* Error */ obj1 = JS_NewCFunctionMagic (ctx, js_error_constructor, "Error", 1, JS_CFUNC_generic_magic, -1); diff --git a/source/quickjs.h b/source/quickjs.h index a5f35bd2..c54eab17 100644 --- a/source/quickjs.h +++ b/source/quickjs.h @@ -92,6 +92,7 @@ static inline int objhdr_s (objhdr_t h) { return (h & OBJHDR_S_MASK) != 0; } typedef struct JSRuntime JSRuntime; // the entire VM typedef struct JSContext JSContext; // Each actor typedef struct JSClass JSClass; +typedef struct JSFunctionBytecode JSFunctionBytecode; typedef uint32_t JSClassID; /* Forward declaration - JSGCRef moved after JSValue definition */ @@ -1099,6 +1100,128 @@ void *js_malloc_rt (size_t size); void *js_mallocz_rt (size_t size); void js_free_rt (void *ptr); +/* ============================================================================ + Context-Neutral Module Format (CellModule) + ============================================================================ */ + +/* Capture descriptor - what a nested function closes over */ +typedef enum { + CAP_FROM_PARENT_LOCAL = 1, /* capture local from parent function */ + CAP_FROM_PARENT_UPVALUE = 2 /* forward upvalue from parent's upvalues */ +} CellCapKind; + +typedef struct CellCapDesc { + uint8_t kind; /* CAP_FROM_PARENT_LOCAL or CAP_FROM_PARENT_UPVALUE */ + uint16_t index; /* local index in parent, or upvalue index in parent */ +} CellCapDesc; + +/* External relocation - for integrate-time patching */ +typedef enum { + EXT_GET = 1, /* OP_get_var -> OP_get_env_slot or OP_get_global_slot */ + EXT_SET = 2 /* OP_put_var -> OP_set_env_slot or OP_set_global_slot */ +} CellExtKind; + +typedef struct CellExternalReloc { + uint32_t pc_offset; /* where operand lives in bytecode */ + uint32_t name_sid; /* string id of the external name */ + uint8_t kind; /* EXT_GET or EXT_SET */ +} CellExternalReloc; + +/* Constant types in cpool */ +typedef enum { + CELL_CONST_NULL = 0, + CELL_CONST_INT = 1, + CELL_CONST_FLOAT = 2, + CELL_CONST_STRING = 3, /* string_sid into module string table */ + CELL_CONST_UNIT = 4 /* unit_id for nested function */ +} CellConstType; + +typedef struct CellConst { + uint8_t type; /* CellConstType */ + union { + int32_t i32; + double f64; + uint32_t string_sid; + uint32_t unit_id; + }; +} CellConst; + +/* Per-unit structure (context-neutral, flattened) */ +typedef struct CellUnit { + /* Constant pool */ + uint32_t const_count; + CellConst *constants; + + /* Bytecode */ + uint32_t bytecode_len; + uint8_t *bytecode; + + /* Stack requirements */ + uint16_t arg_count; + uint16_t var_count; + uint16_t stack_size; + + /* Upvalue (capture) descriptors */ + uint16_t upvalue_count; + CellCapDesc *upvalues; + + /* External relocations */ + uint32_t external_count; + CellExternalReloc *externals; + + /* Debug info (optional) */ + uint32_t pc2line_len; + uint8_t *pc2line; + uint32_t name_sid; /* unit name for stack traces */ +} CellUnit; + +/* Module-level structure (context-neutral) */ +#define CELL_MODULE_MAGIC 0x4C4C4543 /* "CELL" */ +#define CELL_MODULE_VERSION 1 + +typedef struct CellModule { + uint32_t magic; /* CELL_MODULE_MAGIC */ + uint8_t version; /* CELL_MODULE_VERSION */ + uint8_t flags; + + /* Shared string table (module-global) */ + uint32_t string_count; + uint32_t string_data_size; + uint8_t *string_data; /* concatenated UTF-8 strings */ + uint32_t *string_offsets; /* offset for each string */ + + /* Unit table (entry 0 is the main/entry unit) */ + uint32_t unit_count; + CellUnit *units; + + /* Debug: source stored once at module level */ + uint32_t source_len; + char *source; +} CellModule; + +/* Free a CellModule and all its contents */ +void cell_module_free (CellModule *mod); + +/* Write a CellModule to a byte buffer. + Returns allocated buffer (caller must free with pjs_free), or NULL on error. */ +uint8_t *cell_module_write (CellModule *mod, size_t *out_len); + +/* Read a CellModule from a byte buffer. + Returns allocated CellModule (caller must free with cell_module_free), or NULL on error. */ +CellModule *cell_module_read (const uint8_t *buf, size_t buf_len); + +/* Convert compiled JSFunctionBytecode to CellModule. + Returns allocated CellModule (caller must free with cell_module_free), or NULL on error. */ +CellModule *cell_module_from_bytecode (JSContext *ctx, JSFunctionBytecode *main_func); + +/* Compile source code directly to CellModule. + Returns allocated CellModule (caller must free with cell_module_free), or NULL on error. */ +CellModule *JS_CompileModule (JSContext *ctx, const char *input, size_t input_len, const char *filename); + +/* Integrate a CellModule with an environment and execute. + Returns callable function value, or JS_EXCEPTION on error. */ +JSValue cell_module_integrate (JSContext *ctx, CellModule *mod, JSValue env); + #undef js_unlikely #undef inline diff --git a/source/suite.c b/source/suite.c index 555aa195..7d246e3c 100644 --- a/source/suite.c +++ b/source/suite.c @@ -2000,6 +2000,138 @@ TEST(wota_encode_blob) { return 1; } +/* ============================================================================ + CELL MODULE TESTS - Serialize/Deserialize bytecode + ============================================================================ */ + +TEST(cell_module_compile_basic) { + /* Compile simple source to CellModule */ + const char *source = "1 + 2"; + CellModule *mod = JS_CompileModule(ctx, source, strlen(source), ""); + ASSERT_MSG(mod != NULL, "JS_CompileModule returned NULL"); + + /* Check module has units */ + ASSERT_MSG(mod->unit_count > 0, "Module has no units"); + ASSERT_MSG(mod->units[0].bytecode_len > 0, "Unit has no bytecode"); + + cell_module_free(mod); + return 1; +} + +TEST(cell_module_write_read) { + /* Compile, serialize, deserialize */ + const char *source = "var x = 10; x * 2"; + CellModule *mod = JS_CompileModule(ctx, source, strlen(source), ""); + ASSERT_MSG(mod != NULL, "JS_CompileModule returned NULL"); + + /* Serialize */ + size_t len; + uint8_t *buf = cell_module_write(mod, &len); + ASSERT_MSG(buf != NULL, "cell_module_write returned NULL"); + ASSERT_MSG(len > 0, "cell_module_write produced empty buffer"); + + /* Deserialize */ + CellModule *mod2 = cell_module_read(buf, len); + free(buf); + ASSERT_MSG(mod2 != NULL, "cell_module_read returned NULL"); + + /* Verify structure matches */ + ASSERT_MSG(mod2->unit_count == mod->unit_count, "unit_count mismatch"); + ASSERT_MSG(mod2->string_count == mod->string_count, "string_count mismatch"); + + cell_module_free(mod); + cell_module_free(mod2); + return 1; +} + +TEST(cell_module_integrate_basic) { + /* Compile, then integrate and execute */ + const char *source = "3 + 4"; + CellModule *mod = JS_CompileModule(ctx, source, strlen(source), ""); + ASSERT_MSG(mod != NULL, "JS_CompileModule returned NULL"); + + /* Integrate into context */ + JSValue func = cell_module_integrate(ctx, mod, JS_NULL); + if (JS_IsException(func)) { + cell_module_free(mod); + ASSERT_MSG(0, "cell_module_integrate threw exception"); + } + + /* Execute */ + JSValue result = JS_Call(ctx, func, JS_NULL, 0, NULL); + JS_FreeValue(ctx, func); + cell_module_free(mod); + + if (JS_IsException(result)) { + ASSERT_MSG(0, "JS_Call threw exception"); + } + + ASSERT_INT(result, 7); + return 1; +} + +TEST(cell_module_roundtrip_execute) { + /* Full round-trip: compile -> write -> read -> integrate -> execute */ + const char *source = "var a = 5; var b = 3; a * b"; + CellModule *mod = JS_CompileModule(ctx, source, strlen(source), ""); + ASSERT_MSG(mod != NULL, "JS_CompileModule returned NULL"); + + /* Serialize */ + size_t len; + uint8_t *buf = cell_module_write(mod, &len); + cell_module_free(mod); + ASSERT_MSG(buf != NULL, "cell_module_write returned NULL"); + + /* Deserialize */ + CellModule *mod2 = cell_module_read(buf, len); + free(buf); + ASSERT_MSG(mod2 != NULL, "cell_module_read returned NULL"); + + /* Integrate and execute */ + JSValue func = cell_module_integrate(ctx, mod2, JS_NULL); + cell_module_free(mod2); + if (JS_IsException(func)) { + ASSERT_MSG(0, "cell_module_integrate threw exception"); + } + + JSValue result = JS_Call(ctx, func, JS_NULL, 0, NULL); + JS_FreeValue(ctx, func); + + if (JS_IsException(result)) { + ASSERT_MSG(0, "JS_Call threw exception"); + } + + ASSERT_INT(result, 15); + return 1; +} + +TEST(cell_module_string_constant) { + /* Test string constant handling */ + const char *source = "'hello' + ' world'"; + CellModule *mod = JS_CompileModule(ctx, source, strlen(source), ""); + ASSERT_MSG(mod != NULL, "JS_CompileModule returned NULL"); + + /* Verify string table has entries */ + ASSERT_MSG(mod->string_count > 0, "Module has no strings"); + + /* Integrate and execute */ + JSValue func = cell_module_integrate(ctx, mod, JS_NULL); + cell_module_free(mod); + if (JS_IsException(func)) { + ASSERT_MSG(0, "cell_module_integrate threw exception"); + } + + JSValue result = JS_Call(ctx, func, JS_NULL, 0, NULL); + JS_FreeValue(ctx, func); + + if (JS_IsException(result)) { + ASSERT_MSG(0, "JS_Call threw exception"); + } + + ASSERT_STR(result, "hello world"); + return 1; +} + /* ============================================================================ MAIN TEST RUNNER ============================================================================ */ @@ -2209,6 +2341,13 @@ int run_c_test_suite(JSContext *ctx) RUN_TEST(wota_encode_nested_array); RUN_TEST(wota_encode_blob); + // CellModule tests + RUN_TEST(cell_module_compile_basic); + RUN_TEST(cell_module_write_read); + RUN_TEST(cell_module_integrate_basic); + RUN_TEST(cell_module_roundtrip_execute); + RUN_TEST(cell_module_string_constant); + printf("\n=================================\n"); printf("Results: %d passed, %d failed\n", tests_passed, tests_failed); printf("=================================\n\n");