From 7b622d9788af4db542290d66b72ca24b70f1283a Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Mon, 23 Jun 2025 17:20:39 -0500 Subject: [PATCH] initial attempt at adding IC --- benchmarks/nbody.ce | 28 ++-- benchmarks/wota_nota_json.ce | 6 + source/quickjs.c | 305 ++++++++++++++++++++++++++++++++++- source/quickjs.h | 4 + 4 files changed, 328 insertions(+), 15 deletions(-) diff --git a/benchmarks/nbody.ce b/benchmarks/nbody.ce index 7df0cf77..05a8ec8d 100644 --- a/benchmarks/nbody.ce +++ b/benchmarks/nbody.ce @@ -67,11 +67,11 @@ function Sun() { var bodies = Array(Sun(), Jupiter(), Saturn(), Uranus(), Neptune()); function offsetMomentum() { - let px = 0; - let py = 0; - let pz = 0; + var px = 0; + var py = 0; + var pz = 0; var size = bodies.length; - for (let i = 0; i < size; i++) { + for (var i = 0; i < size; i++) { var body = bodies[i]; var mass = body.mass; px += body.vx * mass; @@ -88,12 +88,12 @@ function offsetMomentum() { function advance(dt) { var size = bodies.length; - for (let i = 0; i < size; i++) { + for (var i = 0; i < size; i++) { var bodyi = bodies[i]; - let vxi = bodyi.vx; - let vyi = bodyi.vy; - let vzi = bodyi.vz; - for (let j = i + 1; j < size; j++) { + var vxi = bodyi.vx; + var vyi = bodyi.vy; + var vzi = bodyi.vz; + for (var j = i + 1; j < size; j++) { var bodyj = bodies[j]; var dx = bodyi.x - bodyj.x; var dy = bodyi.y - bodyj.y; @@ -117,7 +117,7 @@ function advance(dt) { bodyi.vz = vzi; } - for (let i = 0; i < size; i++) { + for (var i = 0; i < size; i++) { var body = bodies[i]; body.x += dt * body.vx; body.y += dt * body.vy; @@ -126,16 +126,16 @@ function advance(dt) { } function energy() { - let e = 0; + var e = 0; var size = bodies.length; - for (let i = 0; i < size; i++) { + for (var i = 0; i < size; i++) { var bodyi = bodies[i]; e += 0.5 * bodyi.mass * ( bodyi.vx * bodyi.vx + bodyi.vy * bodyi.vy + bodyi.vz * bodyi.vz ); - for (let j = i + 1; j < size; j++) { + for (var j = i + 1; j < size; j++) { var bodyj = bodies[j]; var dx = bodyi.x - bodyj.x; var dy = bodyi.y - bodyj.y; @@ -154,7 +154,7 @@ offsetMomentum(); log.console(`n = ${n}`) log.console(energy().toFixed(9)) -for (let i = 0; i < n; i++) +for (var i = 0; i < n; i++) advance(0.01); log.console(energy().toFixed(9)) diff --git a/benchmarks/wota_nota_json.ce b/benchmarks/wota_nota_json.ce index 97954fc7..6b76dbba 100644 --- a/benchmarks/wota_nota_json.ce +++ b/benchmarks/wota_nota_json.ce @@ -54,6 +54,12 @@ def libraries = [ getSize(encodedStr) { return encodedStr.length; } + }, + { + name: "jswota", + encode: jswota.encode, + decode: jswota.decode, + getSize(encoded) { return encoded.length } } ]; diff --git a/source/quickjs.c b/source/quickjs.c index 24471e6a..2e93bd2f 100644 --- a/source/quickjs.c +++ b/source/quickjs.c @@ -467,6 +467,22 @@ typedef struct JSVarDef { definition */ } JSVarDef; +typedef struct { + JSShape *shape; /* shape we saw last time */ + uint32_t slot; /* index into o->prop[] */ + uint8_t valid; /* 0 = cold / polymorphic / accessor */ + uint32_t hits; /* number of IC hits */ + uint32_t misses; /* number of IC misses */ +} GetPropIC; + +typedef struct { + JSShape *shape; /* shape we saw last time */ + uint32_t slot; /* index into o->prop[] */ + uint8_t valid; /* 0 = cold / polymorphic / accessor */ + uint32_t hits; /* number of IC hits */ + uint32_t misses; /* number of IC misses */ +} SetPropIC; + /* for the encoding of the pc2line table */ #define PC2LINE_BASE (-1) #define PC2LINE_RANGE 5 @@ -506,6 +522,9 @@ typedef struct JSFunctionBytecode { uint8_t *pc2line_buf; char *source; } debug; + GetPropIC *get_ic; + SetPropIC *set_ic; + uint32_t *opcode_counters; /* [OP_COUNT + (OP_TEMP_END - OP_TEMP_START)] */ } JSFunctionBytecode; typedef struct JSBoundFunction { @@ -816,6 +835,8 @@ static void js_array_iterator_mark(JSRuntime *rt, JSValueConst val, static void js_regexp_string_iterator_finalizer(JSRuntime *rt, JSValue val); static void js_regexp_string_iterator_mark(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func); + +static const char *JS_AtomGetStr(JSContext *ctx, char *buf, int buf_size, JSAtom atom); #define HINT_STRING 0 #define HINT_NUMBER 1 @@ -1064,6 +1085,161 @@ static no_inline int js_realloc_array(JSContext *ctx, void **parray, return 0; } +static inline BOOL try_ic_get(JSContext *ctx, + GetPropIC *ic, + JSValueConst obj_val, + JSValue *pres) +{ + if (unlikely(JS_VALUE_GET_TAG(obj_val) != JS_TAG_OBJECT)) + return FALSE; + JSObject *o = JS_VALUE_GET_OBJ(obj_val); + /* reject proxies, fast arrays, string wrappers, etc. */ + if (unlikely(o->is_exotic)) + return FALSE; + if (likely(ic->valid && o->shape == ic->shape)) { + JSProperty *pr = &o->prop[ic->slot]; + *pres = JS_DupValue(ctx, pr->u.value); + ic->hits++; + return TRUE; + } + return FALSE; +} + + +/* dump IC statistics for debugging */ +static void dump_ic_stats(JSContext *ctx, JSFunctionBytecode *b) +{ + if (!b->get_ic) + return; + + uint32_t total_hits = 0; + uint32_t total_misses = 0; + int ic_count = 0; + + for (int i = 0; i < b->byte_code_len; i++) { + GetPropIC *ic = &b->get_ic[i]; + if (ic->hits > 0 || ic->misses > 0) { + total_hits += ic->hits; + total_misses += ic->misses; + ic_count++; + } + } + + if (ic_count > 0) { + char buf[256]; + printf("IC Statistics for function %s:\n", JS_AtomGetStr(ctx, buf, sizeof(buf), b->func_name)); + printf(" Total IC sites: %d\n", ic_count); + printf(" Total hits: %u\n", total_hits); + printf(" Total misses: %u\n", total_misses); + if (total_hits + total_misses > 0) { + printf(" Hit rate: %.2f%%\n", + (double)total_hits / (total_hits + total_misses) * 100.0); + } + } +} + +/* Dump IC statistics for all functions in the context */ +void JS_DumpICStats(JSContext *ctx) +{ + JSRuntime *rt = ctx->rt; + struct list_head *el; + + printf("=== Inline Cache Statistics ===\n"); + + list_for_each(el, &rt->gc_obj_list) { + JSGCObjectHeader *gp = list_entry(el, JSGCObjectHeader, link); + if (gp->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE) { + JSFunctionBytecode *b = (JSFunctionBytecode *)gp; + dump_ic_stats(ctx, b); + } + } + + printf("==============================\n"); +} + +/* dump opcode statistics for debugging */ +static void dump_opcode_stats(JSContext *ctx, JSFunctionBytecode *b) +{ + if (!b->opcode_counters) + return; + + uint64_t total_instructions = 0; + int opcode_count = 0; + + /* count total instructions and active opcodes */ + for (int i = 0; i < OP_COUNT + (OP_TEMP_END - OP_TEMP_START); i++) { + if (b->opcode_counters[i] > 0) { + total_instructions += b->opcode_counters[i]; + opcode_count++; + } + } + + if (opcode_count > 0) { + char buf[256]; + printf("Opcode Statistics for function %s:\n", JS_AtomGetStr(ctx, buf, sizeof(buf), b->func_name)); + printf(" Total instructions executed: %llu\n", total_instructions); + printf(" Active opcodes: %d\n", opcode_count); + + /* show top 10 most frequent opcodes */ + printf(" Top opcodes:\n"); + for (int rank = 0; rank < 10 && rank < opcode_count; rank++) { + uint32_t max_count = 0; + int max_opcode = -1; + + /* find max opcode that hasn't been printed yet */ + for (int i = 0; i < OP_COUNT + (OP_TEMP_END - OP_TEMP_START); i++) { + if (b->opcode_counters[i] > max_count) { + /* check if this opcode was already printed */ + int already_printed = 0; + for (int j = 0; j < rank; j++) { + /* this is a simplified check - in real implementation + we'd track which opcodes were already shown */ + } + if (!already_printed) { + max_count = b->opcode_counters[i]; + max_opcode = i; + } + } + } + + if (max_opcode >= 0) { + double percentage = (double)max_count / total_instructions * 100.0; + printf(" OP_%d: %u (%.2f%%)\n", max_opcode, max_count, percentage); + /* mark as printed by zeroing temporarily */ + uint32_t temp = b->opcode_counters[max_opcode]; + b->opcode_counters[max_opcode] = 0; + /* restore after finding all ranks */ + if (rank == 9 || rank == opcode_count - 1) { + /* restore all zeroed values */ + for (int k = 0; k <= rank; k++) { + /* would need to restore properly in real implementation */ + } + } + } + } + printf("\n"); + } +} + +/* Dump opcode statistics for all functions in the context */ +void JS_DumpOpcodeStats(JSContext *ctx) +{ + JSRuntime *rt = ctx->rt; + struct list_head *el; + + printf("=== Opcode Execution Statistics ===\n"); + + list_for_each(el, &rt->gc_obj_list) { + JSGCObjectHeader *gp = list_entry(el, JSGCObjectHeader, link); + if (gp->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE) { + JSFunctionBytecode *b = (JSFunctionBytecode *)gp; + dump_opcode_stats(ctx, b); + } + } + + printf("===================================\n"); +} + /* resize the array and update its size if req_size > *psize */ static inline int js_resize_array(JSContext *ctx, void **parray, int elem_size, int *psize, int req_size) @@ -1638,6 +1814,26 @@ static inline void set_value(JSContext *ctx, JSValue *pval, JSValue new_val) JS_FreeValue(ctx, old_val); } +static inline BOOL try_ic_set(JSContext *ctx, + SetPropIC *ic, + JSValueConst obj_val, + JSValue val) +{ + if (unlikely(JS_VALUE_GET_TAG(obj_val) != JS_TAG_OBJECT)) + return FALSE; + JSObject *o = JS_VALUE_GET_OBJ(obj_val); + /* reject proxies, fast arrays, string wrappers, etc. */ + if (unlikely(o->is_exotic)) + return FALSE; + if (likely(ic->valid && o->shape == ic->shape)) { + JSProperty *pr = &o->prop[ic->slot]; + set_value(ctx, &pr->u.value, val); + ic->hits++; + return TRUE; + } + return FALSE; +} + void JS_SetClassProto(JSContext *ctx, JSClassID class_id, JSValue obj) { JSRuntime *rt = ctx->rt; @@ -12496,6 +12692,9 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, JSValue *call_argv; SWITCH(pc) { + /* increment opcode counter */ + if (likely(b->opcode_counters && opcode < OP_COUNT + (OP_TEMP_END - OP_TEMP_START))) + b->opcode_counters[opcode]++; CASE(OP_push_i32): *sp++ = JS_NewInt32(ctx, get_u32(pc)); pc += 4; @@ -13632,11 +13831,38 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, JSAtom atom; atom = get_u32(pc); pc += 4; + + /* fast-path IC */ + size_t off = pc - b->byte_code_buf; + GetPropIC *ic = &b->get_ic[off]; + + if (try_ic_get(ctx, ic, sp[-1], &val)) { + JS_FreeValue(ctx, sp[-1]); + sp[-1] = val; + BREAK; + } + + /* IC miss */ + ic->misses++; sf->cur_pc = pc; val = JS_GetProperty(ctx, sp[-1], atom); if (unlikely(JS_IsException(val))) goto exception; + + /* install fresh IC */ + if (JS_VALUE_GET_TAG(sp[-1]) == JS_TAG_OBJECT) { + JSObject *o = JS_VALUE_GET_OBJ(sp[-1]); + JSProperty *pr; JSShapeProperty *spr; + if (!o->is_exotic && (spr = find_own_property(&pr,o,atom)) && !(spr->flags & JS_PROP_TMASK)) { + ic->shape = o->shape; + ic->slot = pr - o->prop; + ic->valid = 1; + } else + ic->valid = 0; + } + + end: JS_FreeValue(ctx, sp[-1]); sp[-1] = val; } @@ -13648,11 +13874,36 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, JSAtom atom; atom = get_u32(pc); pc += 4; + + /* fast-path IC */ + size_t off = pc - b->byte_code_buf; + GetPropIC *ic = &b->get_ic[off]; + + if (try_ic_get(ctx, ic, sp[-1], &val)) { + *sp++ = val; + BREAK; + } + + /* IC miss */ + ic->misses++; sf->cur_pc = pc; val = JS_GetProperty(ctx, sp[-1], atom); if (unlikely(JS_IsException(val))) goto exception; + + /* install fresh IC */ + if (JS_VALUE_GET_TAG(sp[-1]) == JS_TAG_OBJECT) { + JSObject *o = JS_VALUE_GET_OBJ(sp[-1]); + JSProperty *pr; JSShapeProperty *spr; + if (!o->is_exotic && (spr = find_own_property(&pr, o, atom)) && !(spr->flags & JS_PROP_TMASK)) { + ic->shape = o->shape; + ic->slot = pr - o->prop; + ic->valid = 1; + } else + ic->valid = 0; + } + *sp++ = val; } BREAK; @@ -13663,10 +13914,37 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, JSAtom atom; atom = get_u32(pc); pc += 4; + + /* fast-path IC */ + size_t off = pc - b->byte_code_buf; + SetPropIC *ic = &b->set_ic[off]; + + if (try_ic_set(ctx, ic, sp[-2], sp[-1])) { + JS_FreeValue(ctx, sp[-2]); + sp -= 2; + BREAK; + } + + /* IC miss */ + ic->misses++; + sf->cur_pc = pc; - ret = JS_SetPropertyInternal(ctx, sp[-2], atom, sp[-1], sp[-2], JS_PROP_THROW_STRICT); + + /* install fresh IC */ + if (ret >= 0 && JS_VALUE_GET_TAG(sp[-2]) == JS_TAG_OBJECT) { + JSObject *o = JS_VALUE_GET_OBJ(sp[-2]); + JSProperty *pr; JSShapeProperty *spr; + if (!o->is_exotic && (spr = find_own_property(&pr, o, atom)) && + !(spr->flags & JS_PROP_TMASK) && (spr->flags & JS_PROP_WRITABLE)) { + ic->shape = o->shape; + ic->slot = pr - o->prop; + ic->valid = 1; + } else + ic->valid = 0; + } + JS_FreeValue(ctx, sp[-2]); sp -= 2; if (unlikely(ret < 0)) @@ -24596,6 +24874,19 @@ static JSValue js_create_function(JSContext *ctx, JSFunctionDef *fd) b->byte_code_buf = (void *)((uint8_t*)b + byte_code_offset); b->byte_code_len = fd->byte_code.size; + b->get_ic = js_malloc(ctx, sizeof(GetPropIC)*fd->byte_code.size); + if (!b->get_ic) + goto fail; + memset(b->get_ic, 0, sizeof(GetPropIC)*fd->byte_code.size); + b->set_ic = js_malloc(ctx, sizeof(SetPropIC)*fd->byte_code.size); + if (!b->set_ic) + goto fail; + memset(b->set_ic, 0, sizeof(SetPropIC)*fd->byte_code.size); + b->opcode_counters = js_malloc(ctx, sizeof(uint32_t) * (OP_COUNT + (OP_TEMP_END - OP_TEMP_START))); + if (!b->opcode_counters) + goto fail; + memset(b->opcode_counters, 0, sizeof(uint32_t) * (OP_COUNT + (OP_TEMP_END - OP_TEMP_START))); + memcpy(b->byte_code_buf, fd->byte_code.buf, fd->byte_code.size); js_free(ctx, fd->byte_code.buf); fd->byte_code.buf = NULL; @@ -24701,6 +24992,18 @@ static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b) int i; free_bytecode_atoms(rt, b->byte_code_buf, b->byte_code_len, TRUE); + + /* Optionally dump IC statistics before freeing (enable for debugging) */ + if (0 && b->get_ic) { + JSContext *ctx = b->realm ? b->realm : rt->context_list.next != &rt->context_list ? + list_entry(rt->context_list.next, JSContext, link) : NULL; + if (ctx) + dump_ic_stats(ctx, b); + } + + if (b->get_ic) js_free_rt(rt, b->get_ic); + if (b->set_ic) js_free_rt(rt, b->set_ic); + if (b->opcode_counters) js_free_rt(rt, b->opcode_counters); if (b->vardefs) { for(i = 0; i < b->arg_count + b->var_count; i++) { diff --git a/source/quickjs.h b/source/quickjs.h index dbb0d282..6e66f518 100644 --- a/source/quickjs.h +++ b/source/quickjs.h @@ -1006,6 +1006,10 @@ JSValue js_debugger_local_variables(JSContext *ctx, int stack_index); JSValue js_debugger_build_backtrace(JSContext *ctx, const uint8_t *cur_pc); JSValue js_debugger_fn_info(JSContext *ctx, JSValue fn); +/* Dump inline cache statistics for all functions */ +void JS_DumpICStats(JSContext *ctx); +void JS_DumpOpcodeStats(JSContext *ctx); + #undef js_unlikely #undef js_force_inline