From 4201be47ee7383a505f8957c967610cf73890e33 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Sun, 28 Dec 2025 21:53:12 -0600 Subject: [PATCH] dump_profile --- source/quickjs.c | 310 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 308 insertions(+), 2 deletions(-) diff --git a/source/quickjs.c b/source/quickjs.c index fa05366f..98f6a95e 100644 --- a/source/quickjs.c +++ b/source/quickjs.c @@ -97,10 +97,58 @@ //#define DUMP_SHAPES /* dump shapes in JS_FreeContext */ //#define DUMP_READ_OBJECT //#define DUMP_ROPE_REBALANCE +/* dump profiling statistics (counters and sampling) when freeing the runtime */ +//#define DUMP_PROFILE /* test the GC by forcing it before each object allocation */ //#define FORCE_GC_AT_MALLOC +#ifdef DUMP_PROFILE +/* Profiling data structures (defined early since they're embedded in JSRuntime/JSFunctionBytecode) */ + +/* Per-function profiling counters (cheap, always-on under DUMP_PROFILE) */ +typedef struct JSFunctionProfile { + uint64_t entry_count; /* how many times this function was called */ + uint64_t self_ticks; /* bytecode instructions executed in this function */ +} JSFunctionProfile; + +/* Per-callsite profiling counters */ +typedef struct JSCallSite { + uint32_t pc_offset; /* bytecode offset of the call instruction */ + uint64_t hit_count; /* how many times this callsite was executed */ +} JSCallSite; + +/* Per-property-access-site profiling counters */ +typedef struct JSPropSite { + uint32_t pc_offset; /* bytecode offset of the get/put instruction */ + JSAtom atom; /* property name/atom */ + uint64_t hit_count; /* how many times this prop site was executed */ +} JSPropSite; + +/* Forward declare JSFunctionBytecode for JSProfileSample */ +typedef struct JSFunctionBytecode JSFunctionBytecode; + +/* Sampling profiler sample */ +typedef struct JSProfileSample { + JSFunctionBytecode *func; /* function being executed */ + uint32_t pc_offset; /* bytecode PC offset */ + uint32_t callsite_id; /* callsite id if available */ +} JSProfileSample; + +#define MAX_PROFILE_SAMPLES 100000 + +/* Runtime-wide profiling state */ +typedef struct JSProfileState { + /* Sampling profiler */ + JSProfileSample *samples; + uint32_t sample_count; + uint32_t sample_capacity; + struct timeval last_sample_time; + uint32_t sample_interval_us; /* microseconds between samples (1000-5000) */ +} JSProfileState; + +#endif /* DUMP_PROFILE */ + enum { /* classid tag */ /* union usage | properties */ JS_CLASS_OBJECT = 1, /* must be first */ @@ -228,6 +276,10 @@ struct JSRuntime { JSShape **shape_hash; void *user_opaque; JSContext *js; + +#ifdef DUMP_PROFILE + JSProfileState profile; +#endif }; struct JSClass { @@ -487,11 +539,22 @@ typedef struct JSFunctionBytecode { struct { /* debug info, move to separate structure to save memory? */ JSAtom filename; - int source_len; + int source_len; int pc2line_len; uint8_t *pc2line_buf; char *source; } debug; + +#ifdef DUMP_PROFILE + /* Profiling data */ + JSFunctionProfile profile; + JSCallSite *call_sites; + uint32_t call_site_count; + uint32_t call_site_capacity; + JSPropSite *prop_sites; + uint32_t prop_site_count; + uint32_t prop_site_capacity; +#endif } JSFunctionBytecode; typedef struct JSBoundFunction { @@ -1195,6 +1258,19 @@ JSRuntime *JS_NewRuntime2(const JSMallocFunctions *mf, void *opaque) rt->current_exception = JS_UNINITIALIZED; +#ifdef DUMP_PROFILE + /* Initialize profiling state */ + memset(&rt->profile, 0, sizeof(rt->profile)); + rt->profile.sample_capacity = MAX_PROFILE_SAMPLES; + rt->profile.samples = mf->js_malloc(&rt->malloc_state, + sizeof(JSProfileSample) * rt->profile.sample_capacity); + if (!rt->profile.samples) { + rt->profile.sample_capacity = 0; + } + rt->profile.sample_interval_us = 2000; /* 2ms default */ + gettimeofday(&rt->profile.last_sample_time, NULL); +#endif + return rt; fail: JS_FreeRuntime(rt); @@ -1554,6 +1630,109 @@ void JS_FreeRuntime(JSRuntime *rt) } #endif +#ifdef DUMP_PROFILE + /* Dump profiling statistics */ + { + struct list_head *el; + JSGCObjectHeader *p; + uint32_t func_count = 0; + uint32_t total_funcs = 0; + + printf("\n=== PROFILING STATISTICS ===\n\n"); + + /* Dump sampling profiler results */ + if (rt->profile.sample_count > 0) { + printf("Sampling Profile (%u samples, interval=%uus):\n", + rt->profile.sample_count, rt->profile.sample_interval_us); + printf(" %-50s %10s %10s\n", "Function", "Samples", "Percent"); + printf(" %s\n", "------------------------------------------------------------------------"); + + /* Count samples per function */ + list_for_each(el, &rt->gc_obj_list) { + p = list_entry(el, JSGCObjectHeader, link); + if (p->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE) { + JSFunctionBytecode *b = (JSFunctionBytecode *)p; + uint32_t sample_hits = 0; + uint32_t i; + + for (i = 0; i < rt->profile.sample_count; i++) { + if (rt->profile.samples[i].func == b) { + sample_hits++; + } + } + + if (sample_hits > 0) { + JSContext *ctx = b->realm ? b->realm : rt->js; + const char *fname = ctx ? JS_AtomToCString(ctx, b->func_name) : NULL; + double percent = (100.0 * sample_hits) / rt->profile.sample_count; + printf(" %-50s %10u %9.2f%%\n", + fname ? fname : "", sample_hits, percent); + if (fname && ctx) JS_FreeCString(ctx, fname); + } + } + } + printf("\n"); + } + + /* Dump function counters */ + printf("Function Statistics:\n"); + printf(" %-50s %12s %15s\n", "Function", "Calls", "Instructions"); + printf(" %s\n", "------------------------------------------------------------------------"); + + list_for_each(el, &rt->gc_obj_list) { + p = list_entry(el, JSGCObjectHeader, link); + if (p->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE) { + JSFunctionBytecode *b = (JSFunctionBytecode *)p; + total_funcs++; + if (b->profile.entry_count > 0 || b->profile.self_ticks > 0) { + JSContext *ctx = b->realm ? b->realm : rt->js; + const char *fname = ctx ? JS_AtomToCString(ctx, b->func_name) : NULL; + printf(" %-50s %12llu %15llu\n", + fname ? fname : "", + (unsigned long long)b->profile.entry_count, + (unsigned long long)b->profile.self_ticks); + if (fname && ctx) JS_FreeCString(ctx, fname); + func_count++; + + /* Dump hot callsites */ + if (b->call_site_count > 0) { + uint32_t i; + printf(" Call sites:\n"); + for (i = 0; i < b->call_site_count && i < 5; i++) { + printf(" PC %6u: %10llu calls\n", + b->call_sites[i].pc_offset, + (unsigned long long)b->call_sites[i].hit_count); + } + } + + /* Dump hot property sites */ + if (b->prop_site_count > 0) { + uint32_t i; + printf(" Property sites:\n"); + for (i = 0; i < b->prop_site_count && i < 5; i++) { + JSContext *pctx = b->realm ? b->realm : rt->js; + const char *pname = pctx ? JS_AtomToCString(pctx, b->prop_sites[i].atom) : NULL; + printf(" PC %6u %-20s: %10llu accesses\n", + b->prop_sites[i].pc_offset, + pname ? pname : "", + (unsigned long long)b->prop_sites[i].hit_count); + if (pname && pctx) JS_FreeCString(pctx, pname); + } + } + } + } + } + + if (func_count == 0) { + printf(" (no functions executed) [%u total functions found]\n", total_funcs); + } + printf("\n"); + + /* Free profiling data */ + js_free_rt(rt, rt->profile.samples); + } +#endif + { JSMallocState ms = rt->malloc_state; rt->mf.js_free(&ms, rt); @@ -12398,6 +12577,79 @@ typedef enum { } OPSpecialObjectEnum; /* argv[] is modified if (flags & JS_CALL_FLAG_COPY_ARGV) = 0. */ +#ifdef DUMP_PROFILE +/* Sampling profiler: try to take a sample */ +static void profile_try_sample(JSRuntime *rt, JSFunctionBytecode *b, const uint8_t *pc) +{ + struct timeval now; + long elapsed_us; + + gettimeofday(&now, NULL); + elapsed_us = (now.tv_sec - rt->profile.last_sample_time.tv_sec) * 1000000L + + (now.tv_usec - rt->profile.last_sample_time.tv_usec); + + /* Only sample if enough time has elapsed */ + if (elapsed_us >= rt->profile.sample_interval_us) { + if (rt->profile.sample_count < rt->profile.sample_capacity) { + JSProfileSample *sample = &rt->profile.samples[rt->profile.sample_count++]; + sample->func = b; + sample->pc_offset = (uint32_t)(pc - b->byte_code_buf); + sample->callsite_id = 0; /* Could be enhanced to track callsite */ + } + rt->profile.last_sample_time = now; + } +} + +/* Helper function to record a call site */ +static void profile_record_call_site(JSRuntime *rt, JSFunctionBytecode *b, uint32_t pc_offset) +{ + uint32_t i; + /* Check if we already have this callsite */ + for (i = 0; i < b->call_site_count; i++) { + if (b->call_sites[i].pc_offset == pc_offset) { + b->call_sites[i].hit_count++; + return; + } + } + /* Add new callsite */ + if (b->call_site_count >= b->call_site_capacity) { + uint32_t new_cap = b->call_site_capacity ? b->call_site_capacity * 2 : 16; + JSCallSite *new_sites = js_realloc_rt(rt, b->call_sites, new_cap * sizeof(JSCallSite)); + if (!new_sites) return; + b->call_sites = new_sites; + b->call_site_capacity = new_cap; + } + b->call_sites[b->call_site_count].pc_offset = pc_offset; + b->call_sites[b->call_site_count].hit_count = 1; + b->call_site_count++; +} + +/* Helper function to record a property access site */ +static void profile_record_prop_site(JSRuntime *rt, JSFunctionBytecode *b, uint32_t pc_offset, JSAtom atom) +{ + uint32_t i; + /* Check if we already have this prop site */ + for (i = 0; i < b->prop_site_count; i++) { + if (b->prop_sites[i].pc_offset == pc_offset && b->prop_sites[i].atom == atom) { + b->prop_sites[i].hit_count++; + return; + } + } + /* Add new prop site */ + if (b->prop_site_count >= b->prop_site_capacity) { + uint32_t new_cap = b->prop_site_capacity ? b->prop_site_capacity * 2 : 16; + JSPropSite *new_sites = js_realloc_rt(rt, b->prop_sites, new_cap * sizeof(JSPropSite)); + if (!new_sites) return; + b->prop_sites = new_sites; + b->prop_site_capacity = new_cap; + } + b->prop_sites[b->prop_site_count].pc_offset = pc_offset; + b->prop_sites[b->prop_site_count].atom = atom; + b->prop_sites[b->prop_site_count].hit_count = 1; + b->prop_site_count++; +} +#endif + static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, JSValueConst this_obj, JSValueConst new_target, int argc, JSValue *argv, int flags) @@ -12452,7 +12704,12 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, (JSValueConst *)argv, flags); } b = p->u.func.function_bytecode; - + +#ifdef DUMP_PROFILE + /* Increment function entry count */ + b->profile.entry_count++; +#endif + if (unlikely(caller_ctx->trace_hook) && (caller_ctx->trace_type & JS_HOOK_CALL)) { js_debug dbg = {0}; js_debug_info(caller_ctx, func_obj, &dbg); @@ -12507,6 +12764,14 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, int call_argc; JSValue *call_argv; +#ifdef DUMP_PROFILE + /* Count bytecode instructions executed (self_ticks) */ + b->profile.self_ticks++; + + /* Try to take a profiling sample */ + profile_try_sample(rt, b, pc); +#endif + SWITCH(pc) { CASE(OP_push_i32): *sp++ = JS_NewInt32(ctx, get_u32(pc)); @@ -12783,6 +13048,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, has_call_argc: call_argv = sp - call_argc; sf->cur_pc = pc; +#ifdef DUMP_PROFILE + /* Record call site */ + profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf)); +#endif ret_val = JS_CallInternal(ctx, call_argv[-1], JS_NULL, JS_NULL, call_argc, call_argv, 0); if (unlikely(JS_IsException(ret_val))) @@ -12801,6 +13070,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, pc += 2; call_argv = sp - call_argc; sf->cur_pc = pc; +#ifdef DUMP_PROFILE + /* Record call site */ + profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf)); +#endif ret_val = JS_CallConstructorInternal(ctx, call_argv[-2], call_argv[-1], call_argc, call_argv, 0); @@ -12819,6 +13092,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, pc += 2; call_argv = sp - call_argc; sf->cur_pc = pc; +#ifdef DUMP_PROFILE + /* Record call site */ + profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf)); +#endif ret_val = JS_CallInternal(ctx, call_argv[-1], call_argv[-2], JS_NULL, call_argc, call_argv, 0); if (unlikely(JS_IsException(ret_val))) @@ -13646,6 +13923,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, pc += 4; sf->cur_pc = pc; +#ifdef DUMP_PROFILE + /* Record property access site */ + profile_record_prop_site(rt, b, (uint32_t)(pc - b->byte_code_buf), atom); +#endif val = JS_GetProperty(ctx, sp[-1], atom); if (unlikely(JS_IsException(val))) goto exception; @@ -13662,6 +13943,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, pc += 4; sf->cur_pc = pc; +#ifdef DUMP_PROFILE + /* Record property access site */ + profile_record_prop_site(rt, b, (uint32_t)(pc - b->byte_code_buf), atom); +#endif val = JS_GetProperty(ctx, sp[-1], atom); if (unlikely(JS_IsException(val))) goto exception; @@ -13677,6 +13962,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, pc += 4; sf->cur_pc = pc; +#ifdef DUMP_PROFILE + /* Record property access site */ + profile_record_prop_site(rt, b, (uint32_t)(pc - b->byte_code_buf), atom); +#endif ret = JS_SetPropertyInternal(ctx, sp[-2], atom, sp[-1], sp[-2], JS_PROP_THROW_STRICT); JS_FreeValue(ctx, sp[-2]); @@ -24730,6 +25019,17 @@ static JSValue js_create_function(JSContext *ctx, JSFunctionDef *fd) b->has_simple_parameter_list = fd->has_simple_parameter_list; b->js_mode = fd->js_mode; b->is_derived_class_constructor = fd->is_derived_class_constructor; + +#ifdef DUMP_PROFILE + /* Initialize profiling data */ + memset(&b->profile, 0, sizeof(b->profile)); + b->call_sites = NULL; + b->call_site_count = 0; + b->call_site_capacity = 0; + b->prop_sites = NULL; + b->prop_site_count = 0; + b->prop_site_capacity = 0; +#endif b->new_target_allowed = fd->new_target_allowed; b->is_direct_or_indirect_eval = (fd->eval_type == JS_EVAL_TYPE_DIRECT || fd->eval_type == JS_EVAL_TYPE_INDIRECT); @@ -24783,6 +25083,12 @@ static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b) js_free_rt(rt, b->debug.source); } +#ifdef DUMP_PROFILE + /* Free profiling data */ + js_free_rt(rt, b->call_sites); + js_free_rt(rt, b->prop_sites); +#endif + remove_gc_object(&b->header); if (rt->gc_phase == JS_GC_PHASE_REMOVE_CYCLES && b->header.ref_count != 0) { list_add_tail(&b->header.link, &rt->gc_zero_ref_count_list);