dump_profile
This commit is contained in:
306
source/quickjs.c
306
source/quickjs.c
@@ -97,10 +97,58 @@
|
||||
//#define DUMP_SHAPES /* dump shapes in JS_FreeContext */
|
||||
//#define DUMP_READ_OBJECT
|
||||
//#define DUMP_ROPE_REBALANCE
|
||||
/* dump profiling statistics (counters and sampling) when freeing the runtime */
|
||||
//#define DUMP_PROFILE
|
||||
|
||||
/* test the GC by forcing it before each object allocation */
|
||||
//#define FORCE_GC_AT_MALLOC
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Profiling data structures (defined early since they're embedded in JSRuntime/JSFunctionBytecode) */
|
||||
|
||||
/* Per-function profiling counters (cheap, always-on under DUMP_PROFILE) */
|
||||
typedef struct JSFunctionProfile {
|
||||
uint64_t entry_count; /* how many times this function was called */
|
||||
uint64_t self_ticks; /* bytecode instructions executed in this function */
|
||||
} JSFunctionProfile;
|
||||
|
||||
/* Per-callsite profiling counters */
|
||||
typedef struct JSCallSite {
|
||||
uint32_t pc_offset; /* bytecode offset of the call instruction */
|
||||
uint64_t hit_count; /* how many times this callsite was executed */
|
||||
} JSCallSite;
|
||||
|
||||
/* Per-property-access-site profiling counters */
|
||||
typedef struct JSPropSite {
|
||||
uint32_t pc_offset; /* bytecode offset of the get/put instruction */
|
||||
JSAtom atom; /* property name/atom */
|
||||
uint64_t hit_count; /* how many times this prop site was executed */
|
||||
} JSPropSite;
|
||||
|
||||
/* Forward declare JSFunctionBytecode for JSProfileSample */
|
||||
typedef struct JSFunctionBytecode JSFunctionBytecode;
|
||||
|
||||
/* Sampling profiler sample */
|
||||
typedef struct JSProfileSample {
|
||||
JSFunctionBytecode *func; /* function being executed */
|
||||
uint32_t pc_offset; /* bytecode PC offset */
|
||||
uint32_t callsite_id; /* callsite id if available */
|
||||
} JSProfileSample;
|
||||
|
||||
#define MAX_PROFILE_SAMPLES 100000
|
||||
|
||||
/* Runtime-wide profiling state */
|
||||
typedef struct JSProfileState {
|
||||
/* Sampling profiler */
|
||||
JSProfileSample *samples;
|
||||
uint32_t sample_count;
|
||||
uint32_t sample_capacity;
|
||||
struct timeval last_sample_time;
|
||||
uint32_t sample_interval_us; /* microseconds between samples (1000-5000) */
|
||||
} JSProfileState;
|
||||
|
||||
#endif /* DUMP_PROFILE */
|
||||
|
||||
enum {
|
||||
/* classid tag */ /* union usage | properties */
|
||||
JS_CLASS_OBJECT = 1, /* must be first */
|
||||
@@ -228,6 +276,10 @@ struct JSRuntime {
|
||||
JSShape **shape_hash;
|
||||
void *user_opaque;
|
||||
JSContext *js;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
JSProfileState profile;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct JSClass {
|
||||
@@ -492,6 +544,17 @@ typedef struct JSFunctionBytecode {
|
||||
uint8_t *pc2line_buf;
|
||||
char *source;
|
||||
} debug;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Profiling data */
|
||||
JSFunctionProfile profile;
|
||||
JSCallSite *call_sites;
|
||||
uint32_t call_site_count;
|
||||
uint32_t call_site_capacity;
|
||||
JSPropSite *prop_sites;
|
||||
uint32_t prop_site_count;
|
||||
uint32_t prop_site_capacity;
|
||||
#endif
|
||||
} JSFunctionBytecode;
|
||||
|
||||
typedef struct JSBoundFunction {
|
||||
@@ -1195,6 +1258,19 @@ JSRuntime *JS_NewRuntime2(const JSMallocFunctions *mf, void *opaque)
|
||||
|
||||
rt->current_exception = JS_UNINITIALIZED;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Initialize profiling state */
|
||||
memset(&rt->profile, 0, sizeof(rt->profile));
|
||||
rt->profile.sample_capacity = MAX_PROFILE_SAMPLES;
|
||||
rt->profile.samples = mf->js_malloc(&rt->malloc_state,
|
||||
sizeof(JSProfileSample) * rt->profile.sample_capacity);
|
||||
if (!rt->profile.samples) {
|
||||
rt->profile.sample_capacity = 0;
|
||||
}
|
||||
rt->profile.sample_interval_us = 2000; /* 2ms default */
|
||||
gettimeofday(&rt->profile.last_sample_time, NULL);
|
||||
#endif
|
||||
|
||||
return rt;
|
||||
fail:
|
||||
JS_FreeRuntime(rt);
|
||||
@@ -1554,6 +1630,109 @@ void JS_FreeRuntime(JSRuntime *rt)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Dump profiling statistics */
|
||||
{
|
||||
struct list_head *el;
|
||||
JSGCObjectHeader *p;
|
||||
uint32_t func_count = 0;
|
||||
uint32_t total_funcs = 0;
|
||||
|
||||
printf("\n=== PROFILING STATISTICS ===\n\n");
|
||||
|
||||
/* Dump sampling profiler results */
|
||||
if (rt->profile.sample_count > 0) {
|
||||
printf("Sampling Profile (%u samples, interval=%uus):\n",
|
||||
rt->profile.sample_count, rt->profile.sample_interval_us);
|
||||
printf(" %-50s %10s %10s\n", "Function", "Samples", "Percent");
|
||||
printf(" %s\n", "------------------------------------------------------------------------");
|
||||
|
||||
/* Count samples per function */
|
||||
list_for_each(el, &rt->gc_obj_list) {
|
||||
p = list_entry(el, JSGCObjectHeader, link);
|
||||
if (p->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE) {
|
||||
JSFunctionBytecode *b = (JSFunctionBytecode *)p;
|
||||
uint32_t sample_hits = 0;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < rt->profile.sample_count; i++) {
|
||||
if (rt->profile.samples[i].func == b) {
|
||||
sample_hits++;
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_hits > 0) {
|
||||
JSContext *ctx = b->realm ? b->realm : rt->js;
|
||||
const char *fname = ctx ? JS_AtomToCString(ctx, b->func_name) : NULL;
|
||||
double percent = (100.0 * sample_hits) / rt->profile.sample_count;
|
||||
printf(" %-50s %10u %9.2f%%\n",
|
||||
fname ? fname : "<anonymous>", sample_hits, percent);
|
||||
if (fname && ctx) JS_FreeCString(ctx, fname);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/* Dump function counters */
|
||||
printf("Function Statistics:\n");
|
||||
printf(" %-50s %12s %15s\n", "Function", "Calls", "Instructions");
|
||||
printf(" %s\n", "------------------------------------------------------------------------");
|
||||
|
||||
list_for_each(el, &rt->gc_obj_list) {
|
||||
p = list_entry(el, JSGCObjectHeader, link);
|
||||
if (p->gc_obj_type == JS_GC_OBJ_TYPE_FUNCTION_BYTECODE) {
|
||||
JSFunctionBytecode *b = (JSFunctionBytecode *)p;
|
||||
total_funcs++;
|
||||
if (b->profile.entry_count > 0 || b->profile.self_ticks > 0) {
|
||||
JSContext *ctx = b->realm ? b->realm : rt->js;
|
||||
const char *fname = ctx ? JS_AtomToCString(ctx, b->func_name) : NULL;
|
||||
printf(" %-50s %12llu %15llu\n",
|
||||
fname ? fname : "<anonymous>",
|
||||
(unsigned long long)b->profile.entry_count,
|
||||
(unsigned long long)b->profile.self_ticks);
|
||||
if (fname && ctx) JS_FreeCString(ctx, fname);
|
||||
func_count++;
|
||||
|
||||
/* Dump hot callsites */
|
||||
if (b->call_site_count > 0) {
|
||||
uint32_t i;
|
||||
printf(" Call sites:\n");
|
||||
for (i = 0; i < b->call_site_count && i < 5; i++) {
|
||||
printf(" PC %6u: %10llu calls\n",
|
||||
b->call_sites[i].pc_offset,
|
||||
(unsigned long long)b->call_sites[i].hit_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump hot property sites */
|
||||
if (b->prop_site_count > 0) {
|
||||
uint32_t i;
|
||||
printf(" Property sites:\n");
|
||||
for (i = 0; i < b->prop_site_count && i < 5; i++) {
|
||||
JSContext *pctx = b->realm ? b->realm : rt->js;
|
||||
const char *pname = pctx ? JS_AtomToCString(pctx, b->prop_sites[i].atom) : NULL;
|
||||
printf(" PC %6u %-20s: %10llu accesses\n",
|
||||
b->prop_sites[i].pc_offset,
|
||||
pname ? pname : "<unknown>",
|
||||
(unsigned long long)b->prop_sites[i].hit_count);
|
||||
if (pname && pctx) JS_FreeCString(pctx, pname);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (func_count == 0) {
|
||||
printf(" (no functions executed) [%u total functions found]\n", total_funcs);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
/* Free profiling data */
|
||||
js_free_rt(rt, rt->profile.samples);
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
JSMallocState ms = rt->malloc_state;
|
||||
rt->mf.js_free(&ms, rt);
|
||||
@@ -12398,6 +12577,79 @@ typedef enum {
|
||||
} OPSpecialObjectEnum;
|
||||
|
||||
/* argv[] is modified if (flags & JS_CALL_FLAG_COPY_ARGV) = 0. */
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Sampling profiler: try to take a sample */
|
||||
static void profile_try_sample(JSRuntime *rt, JSFunctionBytecode *b, const uint8_t *pc)
|
||||
{
|
||||
struct timeval now;
|
||||
long elapsed_us;
|
||||
|
||||
gettimeofday(&now, NULL);
|
||||
elapsed_us = (now.tv_sec - rt->profile.last_sample_time.tv_sec) * 1000000L +
|
||||
(now.tv_usec - rt->profile.last_sample_time.tv_usec);
|
||||
|
||||
/* Only sample if enough time has elapsed */
|
||||
if (elapsed_us >= rt->profile.sample_interval_us) {
|
||||
if (rt->profile.sample_count < rt->profile.sample_capacity) {
|
||||
JSProfileSample *sample = &rt->profile.samples[rt->profile.sample_count++];
|
||||
sample->func = b;
|
||||
sample->pc_offset = (uint32_t)(pc - b->byte_code_buf);
|
||||
sample->callsite_id = 0; /* Could be enhanced to track callsite */
|
||||
}
|
||||
rt->profile.last_sample_time = now;
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper function to record a call site */
|
||||
static void profile_record_call_site(JSRuntime *rt, JSFunctionBytecode *b, uint32_t pc_offset)
|
||||
{
|
||||
uint32_t i;
|
||||
/* Check if we already have this callsite */
|
||||
for (i = 0; i < b->call_site_count; i++) {
|
||||
if (b->call_sites[i].pc_offset == pc_offset) {
|
||||
b->call_sites[i].hit_count++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* Add new callsite */
|
||||
if (b->call_site_count >= b->call_site_capacity) {
|
||||
uint32_t new_cap = b->call_site_capacity ? b->call_site_capacity * 2 : 16;
|
||||
JSCallSite *new_sites = js_realloc_rt(rt, b->call_sites, new_cap * sizeof(JSCallSite));
|
||||
if (!new_sites) return;
|
||||
b->call_sites = new_sites;
|
||||
b->call_site_capacity = new_cap;
|
||||
}
|
||||
b->call_sites[b->call_site_count].pc_offset = pc_offset;
|
||||
b->call_sites[b->call_site_count].hit_count = 1;
|
||||
b->call_site_count++;
|
||||
}
|
||||
|
||||
/* Helper function to record a property access site */
|
||||
static void profile_record_prop_site(JSRuntime *rt, JSFunctionBytecode *b, uint32_t pc_offset, JSAtom atom)
|
||||
{
|
||||
uint32_t i;
|
||||
/* Check if we already have this prop site */
|
||||
for (i = 0; i < b->prop_site_count; i++) {
|
||||
if (b->prop_sites[i].pc_offset == pc_offset && b->prop_sites[i].atom == atom) {
|
||||
b->prop_sites[i].hit_count++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* Add new prop site */
|
||||
if (b->prop_site_count >= b->prop_site_capacity) {
|
||||
uint32_t new_cap = b->prop_site_capacity ? b->prop_site_capacity * 2 : 16;
|
||||
JSPropSite *new_sites = js_realloc_rt(rt, b->prop_sites, new_cap * sizeof(JSPropSite));
|
||||
if (!new_sites) return;
|
||||
b->prop_sites = new_sites;
|
||||
b->prop_site_capacity = new_cap;
|
||||
}
|
||||
b->prop_sites[b->prop_site_count].pc_offset = pc_offset;
|
||||
b->prop_sites[b->prop_site_count].atom = atom;
|
||||
b->prop_sites[b->prop_site_count].hit_count = 1;
|
||||
b->prop_site_count++;
|
||||
}
|
||||
#endif
|
||||
|
||||
static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
JSValueConst this_obj, JSValueConst new_target,
|
||||
int argc, JSValue *argv, int flags)
|
||||
@@ -12453,6 +12705,11 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
}
|
||||
b = p->u.func.function_bytecode;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Increment function entry count */
|
||||
b->profile.entry_count++;
|
||||
#endif
|
||||
|
||||
if (unlikely(caller_ctx->trace_hook) && (caller_ctx->trace_type & JS_HOOK_CALL)) {
|
||||
js_debug dbg = {0};
|
||||
js_debug_info(caller_ctx, func_obj, &dbg);
|
||||
@@ -12507,6 +12764,14 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
int call_argc;
|
||||
JSValue *call_argv;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Count bytecode instructions executed (self_ticks) */
|
||||
b->profile.self_ticks++;
|
||||
|
||||
/* Try to take a profiling sample */
|
||||
profile_try_sample(rt, b, pc);
|
||||
#endif
|
||||
|
||||
SWITCH(pc) {
|
||||
CASE(OP_push_i32):
|
||||
*sp++ = JS_NewInt32(ctx, get_u32(pc));
|
||||
@@ -12783,6 +13048,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
has_call_argc:
|
||||
call_argv = sp - call_argc;
|
||||
sf->cur_pc = pc;
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Record call site */
|
||||
profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf));
|
||||
#endif
|
||||
ret_val = JS_CallInternal(ctx, call_argv[-1], JS_NULL,
|
||||
JS_NULL, call_argc, call_argv, 0);
|
||||
if (unlikely(JS_IsException(ret_val)))
|
||||
@@ -12801,6 +13070,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
pc += 2;
|
||||
call_argv = sp - call_argc;
|
||||
sf->cur_pc = pc;
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Record call site */
|
||||
profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf));
|
||||
#endif
|
||||
ret_val = JS_CallConstructorInternal(ctx, call_argv[-2],
|
||||
call_argv[-1],
|
||||
call_argc, call_argv, 0);
|
||||
@@ -12819,6 +13092,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
pc += 2;
|
||||
call_argv = sp - call_argc;
|
||||
sf->cur_pc = pc;
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Record call site */
|
||||
profile_record_call_site(rt, b, (uint32_t)(pc - b->byte_code_buf));
|
||||
#endif
|
||||
ret_val = JS_CallInternal(ctx, call_argv[-1], call_argv[-2],
|
||||
JS_NULL, call_argc, call_argv, 0);
|
||||
if (unlikely(JS_IsException(ret_val)))
|
||||
@@ -13646,6 +13923,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
pc += 4;
|
||||
|
||||
sf->cur_pc = pc;
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Record property access site */
|
||||
profile_record_prop_site(rt, b, (uint32_t)(pc - b->byte_code_buf), atom);
|
||||
#endif
|
||||
val = JS_GetProperty(ctx, sp[-1], atom);
|
||||
if (unlikely(JS_IsException(val)))
|
||||
goto exception;
|
||||
@@ -13662,6 +13943,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
pc += 4;
|
||||
|
||||
sf->cur_pc = pc;
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Record property access site */
|
||||
profile_record_prop_site(rt, b, (uint32_t)(pc - b->byte_code_buf), atom);
|
||||
#endif
|
||||
val = JS_GetProperty(ctx, sp[-1], atom);
|
||||
if (unlikely(JS_IsException(val)))
|
||||
goto exception;
|
||||
@@ -13677,6 +13962,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
|
||||
pc += 4;
|
||||
sf->cur_pc = pc;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Record property access site */
|
||||
profile_record_prop_site(rt, b, (uint32_t)(pc - b->byte_code_buf), atom);
|
||||
#endif
|
||||
ret = JS_SetPropertyInternal(ctx, sp[-2], atom, sp[-1], sp[-2],
|
||||
JS_PROP_THROW_STRICT);
|
||||
JS_FreeValue(ctx, sp[-2]);
|
||||
@@ -24730,6 +25019,17 @@ static JSValue js_create_function(JSContext *ctx, JSFunctionDef *fd)
|
||||
b->has_simple_parameter_list = fd->has_simple_parameter_list;
|
||||
b->js_mode = fd->js_mode;
|
||||
b->is_derived_class_constructor = fd->is_derived_class_constructor;
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Initialize profiling data */
|
||||
memset(&b->profile, 0, sizeof(b->profile));
|
||||
b->call_sites = NULL;
|
||||
b->call_site_count = 0;
|
||||
b->call_site_capacity = 0;
|
||||
b->prop_sites = NULL;
|
||||
b->prop_site_count = 0;
|
||||
b->prop_site_capacity = 0;
|
||||
#endif
|
||||
b->new_target_allowed = fd->new_target_allowed;
|
||||
b->is_direct_or_indirect_eval = (fd->eval_type == JS_EVAL_TYPE_DIRECT ||
|
||||
fd->eval_type == JS_EVAL_TYPE_INDIRECT);
|
||||
@@ -24783,6 +25083,12 @@ static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b)
|
||||
js_free_rt(rt, b->debug.source);
|
||||
}
|
||||
|
||||
#ifdef DUMP_PROFILE
|
||||
/* Free profiling data */
|
||||
js_free_rt(rt, b->call_sites);
|
||||
js_free_rt(rt, b->prop_sites);
|
||||
#endif
|
||||
|
||||
remove_gc_object(&b->header);
|
||||
if (rt->gc_phase == JS_GC_PHASE_REMOVE_CYCLES && b->header.ref_count != 0) {
|
||||
list_add_tail(&b->header.link, &rt->gc_zero_ref_count_list);
|
||||
|
||||
Reference in New Issue
Block a user