diff --git a/docs/managed_stack_frames.md b/docs/managed_stack_frames.md new file mode 100644 index 00000000..50d51fcb --- /dev/null +++ b/docs/managed_stack_frames.md @@ -0,0 +1,231 @@ +# Managed Stack Frames Implementation Plan + +This document outlines the requirements and invariants for implementing fully managed stack frames in QuickJS, eliminating recursion through the C stack for JS->JS calls. + +## Overview + +The goal is to maintain interpreter state entirely on managed stacks (value stack + frame stack) rather than relying on C stack frames. This enables: +- **Call IC fast path**: Direct dispatch to C functions without js_call_c_function overhead +- **Proper stack traces**: Error().stack works correctly even through optimized paths +- **Tail call optimization**: Possible without C stack growth +- **Debugging/profiling**: Full interpreter state always inspectable + +## Current State + +- Property IC: Implemented with per-function polymorphic IC (up to 4 shapes per site) +- Call IC: Infrastructure exists but disabled (`CALL_IC_ENABLED 0`) because it bypasses stack frame setup required for Error().stack + +## Golden Invariant + +**At any time, the entire live interpreter state must be reconstructible from:** +``` +(ctx->value_stack, value_top) + (ctx->frame_stack, frame_top) +``` + +No critical state may live only in C locals. + +## Implementation Requirements + +### 1. Offset Semantics (use `size_t` / `uint32_t`) + +Replace pointer-based addressing with offset-based addressing: + +```c +typedef struct JSStackFrame { + uint32_t sp_offset; // Offset into ctx->value_stack + uint32_t var_offset; // Start of local variables + uint32_t arg_offset; // Start of arguments + // ... continuation info below +} JSStackFrame; +``` + +**Rationale**: Offsets survive stack reallocation, pointers don't. + +### 2. Consistent `sp_offset` Semantics + +Define clearly and consistently: +- `sp_offset` = current stack pointer offset from `ctx->value_stack` +- On function entry: `sp_offset` points to first free slot after arguments +- On function exit: `sp_offset` restored to caller's expected position + +### 3. Continuation Info (Caller State Restoration) + +Each frame must store enough to restore caller state on return: + +```c +typedef struct JSStackFrame { + // ... other fields + + // Continuation info + const uint8_t *caller_pc; // Return address in caller's bytecode + uint32_t caller_sp_offset; // Caller's stack pointer + JSFunctionBytecode *caller_b; // Caller's bytecode (for IC cache) + + // Current function info + JSFunctionBytecode *b; // Current function's bytecode + JSValue *var_buf; // Can be offset-based + JSValue *arg_buf; // Can be offset-based + JSValue this_val; +} JSStackFrame; +``` + +### 4. Exception Handler Stack Depth Restoration + +Exception handlers must record the `sp_offset` at handler entry so `throw` can restore the correct stack depth: + +```c +typedef struct JSExceptionHandler { + uint32_t sp_offset; // Stack depth to restore on throw + const uint8_t *catch_pc; // Where to jump on exception + // ... +} JSExceptionHandler; +``` + +On `throw`: +1. Unwind frame stack to find appropriate handler +2. Restore `sp_offset` to handler's recorded value +3. Push exception value +4. Jump to `catch_pc` + +### 5. Aliased `argv` Handling + +When `arguments` object exists, `argv` may be aliased. The frame must track this: + +```c +typedef struct JSStackFrame { + // ... + uint16_t flags; + #define JS_FRAME_ALIASED_ARGV (1 << 0) + #define JS_FRAME_STRICT (1 << 1) + // ... + JSObject *arguments_obj; // Non-NULL if arguments object created +} JSStackFrame; +``` + +When `JS_FRAME_ALIASED_ARGV` is set, writes to `arguments[i]` must update the corresponding local variable. + +### 6. Stack Trace Accuracy (`sf->cur_pc`) + +**Critical**: `sf->cur_pc` must be updated before any operation that could: +- Throw an exception +- Call into another function +- Trigger GC + +Currently the interpreter does: +```c +sf->cur_pc = pc; // Before potentially-throwing ops +``` + +With managed frames, ensure this is consistently done or use a different mechanism (e.g., store pc in frame on every call). + +### 7. GC Integration + +The GC must be able to mark all live values on the managed stacks: + +```c +void js_gc_mark_value_stack(JSRuntime *rt) { + for (JSContext *ctx = rt->context_list; ctx; ctx = ctx->link) { + JSValue *p = ctx->value_stack; + JSValue *end = ctx->value_stack + ctx->value_top; + while (p < end) { + JS_MarkValue(rt, *p); + p++; + } + } +} + +void js_gc_mark_frame_stack(JSRuntime *rt) { + for (JSContext *ctx = rt->context_list; ctx; ctx = ctx->link) { + JSStackFrame *sf = ctx->frame_stack; + JSStackFrame *end = ctx->frame_stack + ctx->frame_top; + while (sf < end) { + JS_MarkValue(rt, sf->this_val); + // Mark any other JSValue fields in frame + sf++; + } + } +} +``` + +### 8. Main Interpreter Loop Changes + +Transform from recursive to iterative: + +```c +// Current (recursive): +JSValue JS_CallInternal(...) { + // ... + CASE(OP_call): + // Recursive call to JS_CallInternal + ret = JS_CallInternal(ctx, func, ...); + // ... +} + +// Target (iterative): +JSValue JS_CallInternal(...) { + // ... + CASE(OP_call): + // Push new frame, update pc to callee entry + push_frame(ctx, ...); + pc = new_func->byte_code_buf; + BREAK; // Continue in same loop iteration + + CASE(OP_return): + // Pop frame, restore caller state + ret_val = sp[-1]; + pop_frame(ctx, &pc, &sp, &b); + sp[0] = ret_val; + BREAK; // Continue executing caller + // ... +} +``` + +## Call IC Integration (After Managed Frames) + +Once managed frames are complete, Call IC becomes safe: + +```c +CASE(OP_call_method): + // ... resolve method ... + + if (JS_VALUE_GET_TAG(method) == JS_TAG_OBJECT) { + JSObject *p = JS_VALUE_GET_OBJ(method); + + // Check Call IC + CallICEntry *entry = call_ic_lookup(cache, pc_offset, p->shape); + if (entry && entry->cfunc) { + // Direct C call - safe because frame is on managed stack + push_minimal_frame(ctx, pc, sp_offset); + ret = entry->cfunc(ctx, this_val, argc, argv); + pop_minimal_frame(ctx); + // Handle return... + } + } + + // Slow path: full call +``` + +## Testing Strategy + +1. **Stack trace tests**: Verify Error().stack works through all call patterns +2. **Exception tests**: Verify throw/catch restores correct stack depth +3. **GC stress tests**: Verify all values are properly marked during GC +4. **Benchmark**: Compare performance before/after + +## Migration Steps + +1. [ ] Add offset fields to JSStackFrame alongside existing pointers +2. [ ] Create push_frame/pop_frame helper functions +3. [ ] Convert OP_call to use push_frame instead of recursion (JS->JS calls) +4. [ ] Convert OP_return to use pop_frame +5. [ ] Update exception handling to use offset-based stack restoration +6. [ ] Update GC to walk managed stacks +7. [ ] Remove/deprecate recursive JS_CallInternal calls for JS functions +8. [ ] Enable Call IC for C functions +9. [ ] Benchmark and optimize + +## References + +- Current IC implementation: `source/quickjs.c` lines 12567-12722 (ICCache, prop_ic_*) +- Current stack frame: `source/quickjs.c` JSStackFrame definition +- OP_call_method: `source/quickjs.c` lines 13654-13718 diff --git a/internal/engine.cm b/internal/engine.cm index 11e0da36..1697d172 100644 --- a/internal/engine.cm +++ b/internal/engine.cm @@ -140,12 +140,19 @@ globalThis.isa = function(value, master) { var ENETSERVICE = 0.1 var REPLYTIMEOUT = 60 // seconds before replies are ignored +var nullguard = false function caller_data(depth = 0) { var file = "nofile" var line = 0 var caller = new Error().stack.split("\n")[1+depth] + if (!nullguard && is_null(caller)) { + os.print(`caller_data now getting null`) + os.print("\n") + nullguard = true + } + if (caller) { var md = caller.match(/\((.*)\:/) var m = md ? md[1] : "SCRIPT" @@ -809,19 +816,28 @@ stone(globalThis) var rads = use_core("math/radians") log.console(rads) +log.console("now, should be nofile:0") $_.clock(_ => { + log.console("in clock") // Get capabilities for the main program var file_info = shop.file_info ? shop.file_info(locator.path) : null var inject = shop.script_inject_for ? shop.script_inject_for(file_info) : [] + log.console("injection") + // Build values array for injection var vals = [] + log.console(`number to inject is ${inject.length}`) + log.console('when the log.console statements are in the loop, with backticks, it runs but with errors on the injectables especially substring not seeming to work; without them, it totally fails') for (var i = 0; i < inject.length; i++) { var key = inject[i] + log.console(`injecting ${i}, which is ${key}`) // when this line is present, works; when not present, does not work + if (key && key[0] == '$') key = key.substring(1) if (key == 'fd') vals.push(fd) else vals.push($_[key]) + log.console(`split at 1 was ${key}`) } // Create use function bound to the program's package diff --git a/meson.build b/meson.build index c102f144..31ff2435 100644 --- a/meson.build +++ b/meson.build @@ -74,7 +74,7 @@ foreach file: scripts endforeach srceng = 'source' -includes = [srceng, 'internal', 'debug', 'net', 'archive', 'math'] +includes = [srceng, 'internal', 'debug', 'net', 'archive'] foreach file : src full_path = join_paths(srceng, file) diff --git a/source/quickjs.c b/source/quickjs.c index a7c433de..535947e3 100644 --- a/source/quickjs.c +++ b/source/quickjs.c @@ -642,30 +642,54 @@ typedef enum { IC_STATE_MEGA, } ic_state; -/* Property lookup IC (monomorphic case) */ +/* Max entries for polymorphic IC */ +#define IC_POLY_SIZE 4 + +/* Property lookup IC entry (for mono and poly) */ typedef struct { JSShape *shape; /* expected shape */ uint32_t offset; /* property offset in prop array */ +} PropICEntry; + +/* Property lookup IC with polymorphic support */ +typedef struct { + PropICEntry entries[IC_POLY_SIZE]; + uint8_t count; /* number of valid entries (0-4) */ } GetPropIC; typedef struct { - JSShape *shape; - uint32_t offset; + PropICEntry entries[IC_POLY_SIZE]; + uint8_t count; } SetPropIC; -/* Call IC (monomorphic case) */ +/* Call IC entry - stores enough to dispatch directly to C functions */ typedef struct { - JSObject *func_obj; /* expected function object */ - JSFunctionBytecode *b; /* direct pointer to bytecode */ - uint8_t is_bytecode_func; /* 1 if bytecode function, 0 if native */ - uint8_t expected_argc; /* expected argument count */ + JSObject *func_obj; /* expected function object (for identity check) */ + union { + struct { + JSFunctionBytecode *bytecode; + } js; + struct { + JSCFunctionType c_function; /* direct C function pointer */ + JSContext *realm; /* function's realm */ + uint8_t cproto; /* calling convention */ + int16_t magic; /* magic value for *_magic variants */ + } native; + } u; + uint8_t is_native; /* 1 if C function, 0 if bytecode */ +} CallICEntry; + +/* Call IC with polymorphic support */ +typedef struct { + CallICEntry entries[IC_POLY_SIZE]; + uint8_t count; /* number of valid entries */ } CallIC; /* Unified IC slot with tagged union */ typedef struct ICSlot { uint8_t kind; /* ic_kind */ uint8_t state; /* ic_state */ - uint16_t aux; /* auxiliary flags/data */ + uint16_t aux; /* auxiliary flags/data (e.g., atom for prop IC) */ union { GetPropIC get_prop; SetPropIC set_prop; @@ -12540,6 +12564,231 @@ static void close_lexical_var(JSContext *ctx, JSStackFrame *sf, int var_idx) } } +/* ============================================================================ + * Per-function Inline Cache (IC) System + * + * We use a per-function hash table keyed by pc_offset to avoid TLS collisions. + * The hash table is allocated lazily on first IC miss. This approach: + * - Eliminates the 256-entry TLS cache collision problem + * - Supports polymorphic caching (up to IC_POLY_SIZE shapes per site) + * - Keeps IC state with the function for better locality + * + * The IC cache uses a unified slot structure that can hold property or call ICs. + * ============================================================================ */ + +#define IC_HASH_BITS 10 +#define IC_HASH_SIZE (1 << IC_HASH_BITS) + +/* Property IC slot - supports polymorphic shapes */ +typedef struct PropICSlot { + uint32_t pc_offset; /* bytecode offset of access site */ + struct { + JSShape *shape; /* expected shape */ + uint32_t prop_idx; /* property index in shape */ + } entries[IC_POLY_SIZE]; + uint8_t count; /* number of valid entries (0 = uninit, 1-4 = poly) */ +} PropICSlot; + +/* Call IC slot - for future use with managed stack frames */ +typedef struct CallICSlot { + uint32_t pc_offset; /* bytecode offset of call site */ + JSObject *func_obj; /* expected function object (for identity check) */ + union { + struct { + JSFunctionBytecode *bytecode; + } js; + struct { + JSCFunctionType c_function; /* direct C function pointer */ + JSContext *realm; /* function's realm */ + uint8_t cproto; /* calling convention */ + int16_t magic; /* magic value for *_magic variants */ + } native; + } u; + uint8_t is_native; /* 1 if C function, 0 if bytecode */ + uint8_t valid; /* 1 if entry is valid */ +} CallICSlot; + +/* Unified IC cache - one per function, lazily allocated */ +typedef struct ICCache { + PropICSlot prop_slots[IC_HASH_SIZE]; + /* CallICSlot call_slots[IC_HASH_SIZE]; - for future call IC */ +} ICCache; + +static force_inline uint32_t ic_hash(uint32_t pc_offset) +{ + /* Knuth multiplicative hash - pc offsets are well-distributed */ + return (pc_offset * 2654435761u) >> (32 - IC_HASH_BITS); +} + +/* Ensure IC cache is allocated for a function */ +static force_inline ICCache *ic_ensure_cache(JSRuntime *rt, JSFunctionBytecode *b) +{ + ICCache *cache = (ICCache *)b->ic_slots; + if (likely(cache)) + return cache; + + cache = js_mallocz_rt(rt, sizeof(ICCache)); + if (!cache) + return NULL; + b->ic_slots = (ICSlot *)cache; + b->ic_count = IC_HASH_SIZE; + return cache; +} + +/* + * Property IC lookup - returns the property index if found, or -1 for miss. + * Supports polymorphic lookup across multiple shapes. + */ +static force_inline int prop_ic_lookup(ICCache *cache, uint32_t pc_offset, JSShape *shape) +{ + if (!cache) + return -1; + + uint32_t idx = ic_hash(pc_offset); + PropICSlot *slot = &cache->prop_slots[idx]; + + /* Check if this slot is for our pc_offset */ + if (slot->count == 0 || slot->pc_offset != pc_offset) + return -1; + + /* Search through cached shapes (polymorphic) */ + for (int i = 0; i < slot->count; i++) { + if (slot->entries[i].shape == shape) { + return (int)slot->entries[i].prop_idx; + } + } + + return -1; /* Shape not in cache */ +} + +/* + * Property IC update - add or update a shape->prop_idx mapping. + * If the slot is full (IC_POLY_SIZE entries), we go megamorphic and stop caching. + */ +static void prop_ic_update(JSRuntime *rt, JSFunctionBytecode *b, + uint32_t pc_offset, JSShape *shape, uint32_t prop_idx) +{ + ICCache *cache = ic_ensure_cache(rt, b); + if (!cache) + return; + + uint32_t idx = ic_hash(pc_offset); + PropICSlot *slot = &cache->prop_slots[idx]; + + /* If this is a different pc_offset (hash collision), reset the slot */ + if (slot->count > 0 && slot->pc_offset != pc_offset) { + slot->count = 0; + } + + slot->pc_offset = pc_offset; + + /* Check if shape is already cached */ + for (int i = 0; i < slot->count; i++) { + if (slot->entries[i].shape == shape) { + /* Update existing entry (prop_idx might have changed) */ + slot->entries[i].prop_idx = prop_idx; + return; + } + } + + /* Add new entry if we have room */ + if (slot->count < IC_POLY_SIZE) { + slot->entries[slot->count].shape = shape; + slot->entries[slot->count].prop_idx = prop_idx; + slot->count++; + } + /* else: megamorphic - stop caching this site */ +} + +/* Legacy call IC functions - kept for future use with managed stack frames */ +static force_inline uint32_t call_ic_hash(uint32_t pc_offset) +{ + return ic_hash(pc_offset); +} + +static force_inline CallICSlot *call_ic_lookup(void *cache, + uint32_t pc_offset, + JSObject *func_obj) +{ + (void)cache; (void)pc_offset; (void)func_obj; + return NULL; /* Disabled until managed stack frames are implemented */ +} + +static void call_ic_update(JSRuntime *rt, JSFunctionBytecode *b, + uint32_t pc_offset, JSObject *func_obj) +{ + (void)rt; (void)b; (void)pc_offset; (void)func_obj; + /* Disabled until managed stack frames are implemented */ +} + +/* + * Fast path for calling a C function with cached info. + * + * This sets up a minimal stack frame so that things like new Error().stack work. + * We avoid the overhead of js_call_c_function by: + * - Skipping the class_id lookup and cproto extraction (cached) + * - Skipping stack overflow check for common cases (caller already checked) + * - Inlining the most common calling conventions + * + * Returns JS_TAG_UNINITIALIZED to signal that the caller should use the slow path. + */ +static force_inline JSValue call_ic_invoke_native(JSContext *ctx, + JSRuntime *rt, + CallICSlot *slot, + JSValueConst func_obj, + JSValueConst this_obj, + int argc, JSValueConst *argv, + JSStackFrame *caller_sf) +{ + JSCFunctionType func = slot->u.native.c_function; + JSContext *realm = slot->u.native.realm; + int cproto = slot->u.native.cproto; + int16_t magic = slot->u.native.magic; + JSValue ret_val; + + /* Set up minimal stack frame for stack traces */ + JSStackFrame ic_sf; + ic_sf.prev_frame = caller_sf; + ic_sf.cur_func = func_obj; + ic_sf.arg_count = argc; + ic_sf.arg_buf = (JSValue *)argv; + ic_sf.var_buf = NULL; + ic_sf.cur_pc = NULL; + ic_sf.js_mode = 0; + init_list_head(&ic_sf.var_ref_list); + + rt->current_stack_frame = &ic_sf; + + /* Fast path for the most common calling conventions */ + switch (cproto) { + case JS_CFUNC_generic: + ret_val = func.generic(realm, this_obj, argc, argv); + break; + case JS_CFUNC_generic_magic: + ret_val = func.generic_magic(realm, this_obj, argc, argv, magic); + break; + case JS_CFUNC_getter: + ret_val = func.getter(realm, this_obj); + break; + case JS_CFUNC_getter_magic: + ret_val = func.getter_magic(realm, this_obj, magic); + break; + case JS_CFUNC_setter: + ret_val = func.setter(realm, this_obj, argc > 0 ? argv[0] : JS_NULL); + break; + case JS_CFUNC_setter_magic: + ret_val = func.setter_magic(realm, this_obj, argc > 0 ? argv[0] : JS_NULL, magic); + break; + default: + /* Fall back to full path for less common calling conventions */ + rt->current_stack_frame = caller_sf; + return (JSValue){ .u.int32 = 0, .tag = JS_TAG_UNINITIALIZED }; + } + + rt->current_stack_frame = caller_sf; + return ret_val; +} + #define JS_CALL_FLAG_COPY_ARGV (1 << 1) static JSValue js_call_c_function(JSContext *ctx, JSValueConst func_obj, @@ -13468,6 +13717,10 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, CASE(OP_call_method): CASE(OP_tail_call_method): { +#define CALL_IC_ENABLED 0 /* Set to 1 to enable Call IC */ +#if CALL_IC_ENABLED + const uint8_t *call_pc = pc - 1; /* PC of opcode for IC */ +#endif call_argc = get_u16(pc); pc += 2; call_argv = sp - call_argc; @@ -13476,6 +13729,44 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, /* Record call site */ profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf)); #endif +#if CALL_IC_ENABLED + /* Call IC fast path for C functions */ + { + JSValue func_val = call_argv[-1]; + JSValue this_val = call_argv[-2]; + + if (likely(JS_VALUE_GET_TAG(func_val) == JS_TAG_OBJECT)) { + JSObject *func_obj = JS_VALUE_GET_OBJ(func_val); + uint32_t pc_offset = (uint32_t)(call_pc - b->byte_code_buf); + CallICCache *cache = (CallICCache *)b->ic_slots; + CallICSlot *ic_slot = call_ic_lookup(cache, pc_offset, func_obj); + + if (ic_slot && ic_slot->is_native) { + /* IC hit for native function - use fast path with proper stack frame */ + ret_val = call_ic_invoke_native(ctx, rt, ic_slot, func_val, this_val, + call_argc, (JSValueConst *)call_argv, sf); + if (likely(JS_VALUE_GET_TAG(ret_val) != JS_TAG_UNINITIALIZED)) { + /* Fast path succeeded */ + if (unlikely(JS_IsException(ret_val))) + goto exception; + if (opcode == OP_tail_call_method) + goto done; + for(i = -2; i < call_argc; i++) + JS_FreeValue(ctx, call_argv[i]); + sp -= call_argc + 2; + *sp++ = ret_val; + BREAK; + } + /* JS_TAG_UNINITIALIZED means fall through to slow path */ + } else if (!ic_slot && (func_obj->class_id == JS_CLASS_C_FUNCTION || + func_obj->class_id == JS_CLASS_BYTECODE_FUNCTION)) { + /* IC miss - populate cache for next time */ + call_ic_update(rt, b, pc_offset, func_obj); + } + } + } +#endif + /* Slow path: use regular call machinery */ ret_val = JS_CallInternal_OLD(ctx, call_argv[-1], call_argv[-2], JS_NULL, call_argc, call_argv, 0); if (unlikely(JS_IsException(ret_val))) @@ -14300,9 +14591,9 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, JSValue val; JSAtom atom; JSValue obj; - const uint8_t *ic_pc; + uint32_t pc_offset; - ic_pc = pc - 1; /* PC of opcode, before consuming operands */ + pc_offset = (uint32_t)((pc - 1) - b->byte_code_buf); atom = get_u32(pc); pc += 4; @@ -14313,53 +14604,41 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, #endif obj = sp[-1]; - /* Monomorphic IC fast path: shape-guarded own-property lookup */ + /* Per-function polymorphic IC fast path */ if (likely(JS_VALUE_GET_TAG(obj) == JS_TAG_OBJECT)) { JSObject *p = JS_VALUE_GET_OBJ(obj); JSShape *sh = p->shape; + ICCache *cache = (ICCache *)b->ic_slots; - /* Simple thread-local IC cache using PC as key */ - static __thread struct { - const uint8_t *pc; - JSShape *shape; - uint32_t prop_idx; - JSAtom atom; - } ic_cache[256]; + /* IC lookup - supports multiple shapes per site */ + int prop_idx = prop_ic_lookup(cache, pc_offset, sh); + if (likely(prop_idx >= 0)) { + JSShapeProperty *prs = &get_shape_prop(sh)[prop_idx]; - uint32_t cache_idx = ((uintptr_t)ic_pc >> 3) & 255; - struct { const uint8_t *pc; JSShape *shape; uint32_t prop_idx; JSAtom atom; } *slot = &ic_cache[cache_idx]; - - /* IC hit: shape guard passed */ - if (likely(slot->pc == ic_pc && slot->shape == sh && slot->atom == atom)) { - JSProperty *pr = &p->prop[slot->prop_idx]; - JSShapeProperty *prs = &get_shape_prop(sh)[slot->prop_idx]; - - /* Double-check it's still a normal data property */ + /* Verify it's still a normal data property */ if (likely((prs->flags & JS_PROP_TMASK) == JS_PROP_NORMAL)) { + JSProperty *pr = &p->prop[prop_idx]; val = JS_DupValue(ctx, pr->u.value); JS_FreeValue(ctx, obj); sp[-1] = val; - goto get_field_done; + BREAK; } } - /* IC miss: do lookup and populate cache if it's an own data property */ + /* IC miss: do lookup and update cache if it's an own data property */ { JSProperty *pr; JSShapeProperty *prs = find_own_property(&pr, p, atom); if (prs && (prs->flags & JS_PROP_TMASK) == JS_PROP_NORMAL) { - /* Cache this for next time */ - uint32_t prop_idx = prs - get_shape_prop(sh); - slot->pc = ic_pc; - slot->shape = sh; - slot->prop_idx = prop_idx; - slot->atom = atom; + /* Update cache for next time */ + uint32_t idx = prs - get_shape_prop(sh); + prop_ic_update(rt, b, pc_offset, sh, idx); val = JS_DupValue(ctx, pr->u.value); JS_FreeValue(ctx, obj); sp[-1] = val; - goto get_field_done; + BREAK; } } } @@ -14370,8 +14649,6 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, goto exception; JS_FreeValue(ctx, sp[-1]); sp[-1] = val; - get_field_done: - ; } BREAK; @@ -14380,9 +14657,9 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, JSValue val; JSAtom atom; JSValue obj; - const uint8_t *ic_pc; + uint32_t pc_offset; - ic_pc = pc - 1; + pc_offset = (uint32_t)((pc - 1) - b->byte_code_buf); atom = get_u32(pc); pc += 4; @@ -14393,57 +14670,48 @@ static JSValue JS_CallInternal_OLD(JSContext *caller_ctx, JSValueConst func_obj, #endif obj = sp[-1]; - /* Monomorphic IC fast path */ + /* Per-function polymorphic IC fast path */ if (likely(JS_VALUE_GET_TAG(obj) == JS_TAG_OBJECT)) { JSObject *p = JS_VALUE_GET_OBJ(obj); JSShape *sh = p->shape; + ICCache *cache = (ICCache *)b->ic_slots; - static __thread struct { - const uint8_t *pc; - JSShape *shape; - uint32_t prop_idx; - JSAtom atom; - } ic_cache2[256]; - - uint32_t cache_idx = ((uintptr_t)ic_pc >> 3) & 255; - struct { const uint8_t *pc; JSShape *shape; uint32_t prop_idx; JSAtom atom; } *slot = &ic_cache2[cache_idx]; - - if (likely(slot->pc == ic_pc && slot->shape == sh && slot->atom == atom)) { - JSProperty *pr = &p->prop[slot->prop_idx]; - JSShapeProperty *prs = &get_shape_prop(sh)[slot->prop_idx]; + /* IC lookup - supports multiple shapes per site */ + int prop_idx = prop_ic_lookup(cache, pc_offset, sh); + if (likely(prop_idx >= 0)) { + JSShapeProperty *prs = &get_shape_prop(sh)[prop_idx]; + /* Verify it's still a normal data property */ if (likely((prs->flags & JS_PROP_TMASK) == JS_PROP_NORMAL)) { + JSProperty *pr = &p->prop[prop_idx]; val = JS_DupValue(ctx, pr->u.value); *sp++ = val; - goto get_field2_done; + BREAK; } } + /* IC miss: do lookup and update cache if it's an own data property */ { JSProperty *pr; JSShapeProperty *prs = find_own_property(&pr, p, atom); if (prs && (prs->flags & JS_PROP_TMASK) == JS_PROP_NORMAL) { - uint32_t prop_idx = prs - get_shape_prop(sh); - slot->pc = ic_pc; - slot->shape = sh; - slot->prop_idx = prop_idx; - slot->atom = atom; + /* Update cache for next time */ + uint32_t idx = prs - get_shape_prop(sh); + prop_ic_update(rt, b, pc_offset, sh, idx); val = JS_DupValue(ctx, pr->u.value); *sp++ = val; - goto get_field2_done; + BREAK; } } } - /* Slow path */ + /* Slow path: proto chain, getters, non-objects, etc. */ val = JS_GetProperty(ctx, obj, atom); if (unlikely(JS_IsException(val))) goto exception; *sp++ = val; - get_field2_done: - ; } BREAK; @@ -25522,35 +25790,6 @@ static JSValue js_create_function(JSContext *ctx, JSFunctionDef *fd) return JS_EXCEPTION; } -/* IC helper functions */ -static ICSlot *ic_get_slot(JSFunctionBytecode *b, uint32_t ic_index) -{ - if (ic_index >= b->ic_count) - return NULL; - return &b->ic_slots[ic_index]; -} - -static void ic_init_call(ICSlot *slot) -{ - memset(slot, 0, sizeof(*slot)); - slot->kind = IC_CALL; - slot->state = IC_STATE_UNINIT; -} - -static void ic_init_get_prop(ICSlot *slot) -{ - memset(slot, 0, sizeof(*slot)); - slot->kind = IC_GET_PROP; - slot->state = IC_STATE_UNINIT; -} - -static void ic_init_set_prop(ICSlot *slot) -{ - memset(slot, 0, sizeof(*slot)); - slot->kind = IC_SET_PROP; - slot->state = IC_STATE_UNINIT; -} - static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b) { int i; diff --git a/test.ce b/test.ce index 8b3f4415..c0cf5560 100644 --- a/test.ce +++ b/test.ce @@ -5,6 +5,8 @@ var time = use('time') var json = use('json') var blob = use('blob') +log.console("here") + if (!args) args = [] var target_pkg = null // null = current package diff --git a/tests/suite.cm b/tests/suite.cm index a4b2e6d3..2df489a5 100644 --- a/tests/suite.cm +++ b/tests/suite.cm @@ -2,6 +2,7 @@ // Tests all core features before implementing performance optimizations // (bytecode passes, ICs, quickening, tail call optimization) // + return { // ============================================================================ // ARITHMETIC OPERATORS - Numbers