quickjs bump arena/semi space collector #34

Open
opened 2025-06-09 05:14:43 +00:00 by john · 0 comments
Owner

Below is a concrete engineering plan for turning QuickJS’s hybrid ref-count + cycle collector into a copying / semi-space collector that compacts the heap into a fresh bump arena.
Because you do not use modules, async/generators, weak maps/refs, ArrayBuffers, DataViews, or any typed arrays, the set of object kinds you must handle is small:

GC header type Kinds you keep Notes you can ignore
JS_OBJECT Plain objects, Arrays, Errors, Boolean / Number / String wrappers, RegExps, Proxy, Map / Set WeakMap/WeakSet, WeakRef, FinalizationRegistry
FUNCTION_BYTECODE Normal (non-async, non-generator) functions Async, generator, async-generator
SHAPE All needed (shapes are GC headers) –
VAR_REF Needed for closures –
JS_STRING / STRING_ROPE Needed –
BIG_INT Needed –
Everything else in the big enum can be omitted when you enumerate children or compute sizes.

1 Arena infrastructure

typedef struct {
uint8_t base;
size_t cap;
size_t top; /
bump pointer */
} region_t;

static region_t *region_new(size_t cap) {
region_t *r = malloc(sizeof *r);
r->base = mmap(NULL, cap, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
r->cap = cap;
r->top = 0;
return r;
}
static inline void *region_alloc(region_t r, size_t sz, size_t align)
{
uintptr_t p = (uintptr_t)r->base + r->top;
p = (p + (align-1)) & ~(uintptr_t)(align-1);
if (p + sz - (uintptr_t)r->base > r->cap) return NULL;
r->top = (p + sz) - (uintptr_t)r->base;
return (void
)p;
}
static inline void region_reset(region_t *r) { r->top = 0; }
Each JSRuntime owns exactly two regions:

typedef struct {
region_t *from_space;
region_t to_space;
} js_zone_t; /
stored in JSRuntime->user_opaque */
2 Add a forwarding pointer to every GC header

We steal the spare bits already present:

struct JSGCObjectHeader {
int32_t ref_count;
uint8_t gc_obj_type : 4;
uint8_t mark : 4;
uint8_t has_forward : 1; /* new */
uint8_t dummy1 : 7;
uint16_t dummy2;
struct list_head link;
void forwarding; / only valid while has_forward is 1 */
};
Size of the struct stays 32 bytes on 64-bit systems.

3 Custom allocator hooks

static void *js_zone_malloc(JSMallocState *s,size_t sz){
js_zone_t *z = s->opaque;
void p = region_alloc(z->from_space, sz, _Alignof(max_align_t));
if (!p) { copying_gc(z->rt); p = region_alloc(z->from_space, sz,
_Alignof(max_align_t)); }
return p;
}
static void js_zone_free(JSMallocState s,voidptr){ /
no-op */ }
static void js_zone_realloc(JSMallocState s,voidold,size_t sz){
if(!old) return js_zone_malloc(s,sz);
size_t oldsz = s->js_malloc_usable_size(old); /
0 is fine /
void
newp = js_zone_malloc(s,sz);
memcpy(newp,old,oldsz < sz ? oldsz : sz);
return newp;
}
JS_NewRuntime2(&mf, zone) supplies those hooks.

4 Roots you must copy

per runtime (one copy each)
rt->atom_array entries are permanent ⇒ ignore
rt->class_array[i].prototype / .exotic – none of them are GC objects
rt->context_list – iterate each context.
per context
ctx->global_obj, ctx->global_var_obj
ctx->class_proto[JS_CLASS_…]
ctx->function_proto / function_ctor / array_ctor / regexp_ctor
The value stack of every live JSStackFrame (arg_buf, var_buf, cur_sp).
No modules, no AsyncJobs, no WeakRef lists, no SAB ⇒ whole worker-pool of roots is ~ a few dozen pointers.

5 Object-size helpers

You only need size formulas for types you keep:

static size_t obj_size(JSRuntime *rt, JSGCObjectHeader h)
{
switch (h->gc_obj_type) {
case JS_GC_OBJ_TYPE_JS_OBJECT:{
JSObject o = (JSObject)h;
return sizeof(JSObject) +
o->prop_size * sizeof(JSShapeProperty);
}
case JS_GC_OBJ_TYPE_FUNCTION_BYTECODE:{
JSFunctionBytecode f = (JSFunctionBytecode)h;
return sizeof(JSFunctionBytecode) +
f->byte_code_len +
f->var_count * sizeof(JSVarDef) +
f->closure_var_count * sizeof(JSClosureVar) +
f->cpool_count * sizeof(JSValue);
}
case JS_GC_OBJ_TYPE_SHAPE:
return sizeof(JSShape) +
h->... /
prop hash table, etc. /;
case JS_GC_OBJ_TYPE_VAR_REF:
return sizeof(JSVarRef);
default:
/
string, rope, bigint are allocated with their payload
already contiguous; js_malloc_usable_size() returns full size */
return js_def_malloc_usable_size(h);
}
}
Because you do not use ArrayBuffers / typed arrays, you can skip those branches.

6 Copy algorithm (Cheney)

static inline JSValue relocate(JSRuntime *rt, JSValueConst v);

static JSGCObjectHeader *copy_hdr(JSRuntime *rt, JSGCObjectHeader old)
{
if (old->has_forward) return old->forwarding; /
already moved */

js_zone_t *z = JS_GetRuntimeOpaque(rt);
size_t sz = obj_size(rt, old);
JSGCObjectHeader *nw = region_alloc(z->to_space, sz, _Alignof(max_align_t));
memcpy(nw, old, sz);

old->has_forward = 1;
old->forwarding = nw;

/* fix children pointers inside nw /
switch (nw->gc_obj_type) {
case JS_GC_OBJ_TYPE_JS_OBJECT:{
JSObject o = (JSObject)nw;
for (uint32_t i=0;iprop_size;i++)
o->prop[i].value = relocate(rt, o->prop[i].value);
o->proto = JS_VALUE_GET_OBJ(relocate(rt, JS_MKPTR(JS_TAG_OBJECT,o->proto)));
break;}
case JS_GC_OBJ_TYPE_FUNCTION_BYTECODE:{
JSFunctionBytecode f=(JSFunctionBytecode)nw;
for (int i=0;icpool_count;i++)
f->cpool[i] = relocate(rt, f->cpool[i]);
break;}
case JS_GC_OBJ_TYPE_VAR_REF:{
JSVarRef vr=(JSVarRef)nw;
if (!vr->is_detached)
vr->pvalue = &((JSVarRef
)old->forwarding)->value; /* remains self-contained /
vr->value = relocate(rt, vr->value);
break;}
/
string / rope / bigint have no interior GC refs */
default: break;
}
return nw;
}

static inline JSValue relocate(JSRuntime *rt, JSValueConst v)
{
int tag = JS_VALUE_GET_TAG(v);
if (tag == JS_TAG_OBJECT || tag == JS_TAG_STRING ||
tag == JS_TAG_STRING_ROPE || tag == JS_TAG_BIG_INT) {
JSGCObjectHeader new_hdr = copy_hdr(rt, JS_VALUE_GET_PTR(v));
return JS_MKPTR(tag, new_hdr);
}
return v; /
primitive stays as-is */
}
7 The compaction driver

static void copying_gc(JSRuntime *rt)
{
js_zone_t *z = JS_GetRuntimeOpaque(rt);
region_reset(z->to_space);

/* 1) copy roots */
for (JSContext *ctx= list_entry(rt->context_list.next,JSContext,link);
&ctx->link != &rt->context_list;
ctx = list_entry(ctx->link.next,JSContext,link)) {

  ctx->global_obj      = relocate(rt, ctx->global_obj);
  ctx->global_var_obj  = relocate(rt, ctx->global_var_obj);
  for (int i=0;i<JS_CLASS_INIT_COUNT;i++)
      ctx->class_proto[i] = relocate(rt, ctx->class_proto[i]);

  /* stack frames */
  for (JSStackFrame *fr=ctx->current_stack_frame; fr; fr=fr->prev_frame) {
      fr->cur_func = relocate(rt, fr->cur_func);
      for (int i=0;i<fr->arg_count;i++)
          fr->arg_buf[i] = relocate(rt, fr->arg_buf[i]);
      /* locals */
      /* … similar … */
  }

}

/* 2) scan to-space linearly and copy children of newly moved objs /
/
(Cheney’s algorithm: two fingers) */
uint8_t scan = z->to_space->base;
while (scan < z->to_space->base + z->to_space->top) {
JSGCObjectHeader h = (void)scan;
/
walk children again and relocate them – they may yet live in from_space /
switch(h->gc_obj_type) { /
same switch as copy_hdr but on h */ }
scan += obj_size(rt, h);
}

/* 3) flip spaces */
region_t old = z->from_space; z->from_space = z->to_space;
z->to_space = old;
region_reset(z->to_space); /
old space entirely freed */

/* 4) clear forwarding bits for next cycle */
scan = z->from_space->base;
while (scan < z->from_space->base + z->from_space->top) {
JSGCObjectHeader h = (void)scan;
h->has_forward = 0;
scan += obj_size(rt, h);
}
}
8 Why life is easier without the omitted features

Feature you removed What you don’t have to implement
Async/generator/async-gen JSAsyncFunctionState, JS_CLASS_GENERATOR, extra roots in job queue
WeakMap / WeakSet / WeakRef / FinalizationRegistry Special “keep-alive but don’t move” rules, separate weak-ref pass
TypedArray / ArrayBuffer Non-movable (shared), extra pointer into SAB’s raw data
Modules Root list of evaluated modules, namespace objects
DataView Same backing-store complications as ArrayBuffer
That means children enumeration boils down to five structures:
JSObject, JSFunctionBytecode, JSShape, JSVarRef, JSStringRope.
No “weak” barriers, no promise job queues, no detached buffers.
The forwarding table can live inside JSGCObjectHeader because none of your objects rely on spare bits there.

What remains the same
Ref-counting stays; every decref is still done.
With the bump allocator every immediate “free” is a no-op, so pointer writes are cheap.
Your allocator still hands out memory with one atomic top += size.
Garbage is reclaimed only during copying_gc, which runs only when the arena is full.
Object addresses do change after compaction, so any native code that stores raw pointers to JS objects must fetch them via a JSValue each time (QuickJS’s API already enforces that).
With these simplifications you have a fully compacting, fragmentation-proof heap that costs:

1–2 pointer writes per allocation
no cost per free
one linear copy of live objects when the region fills (amortised, ≈ O(1) per object)
and you didn’t have to touch the vast parts of QuickJS devoted to weak refs, typed arrays, modules, or async tasks.

Below is a concrete engineering plan for turning QuickJS’s hybrid ref-count + cycle collector into a copying / semi-space collector that compacts the heap into a fresh bump arena. Because you do not use modules, async/generators, weak maps/refs, ArrayBuffers, DataViews, or any typed arrays, the set of object kinds you must handle is small: GC header type Kinds you keep Notes you can ignore JS_OBJECT Plain objects, Arrays, Errors, Boolean / Number / String wrappers, RegExps, Proxy, Map / Set WeakMap/WeakSet, WeakRef, FinalizationRegistry FUNCTION_BYTECODE Normal (non-async, non-generator) functions Async, generator, async-generator SHAPE All needed (shapes are GC headers) – VAR_REF Needed for closures – JS_STRING / STRING_ROPE Needed – BIG_INT Needed – Everything else in the big enum can be omitted when you enumerate children or compute sizes. 1 Arena infrastructure typedef struct { uint8_t *base; size_t cap; size_t top; /* bump pointer */ } region_t; static region_t *region_new(size_t cap) { region_t *r = malloc(sizeof *r); r->base = mmap(NULL, cap, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); r->cap = cap; r->top = 0; return r; } static inline void *region_alloc(region_t *r, size_t sz, size_t align) { uintptr_t p = (uintptr_t)r->base + r->top; p = (p + (align-1)) & ~(uintptr_t)(align-1); if (p + sz - (uintptr_t)r->base > r->cap) return NULL; r->top = (p + sz) - (uintptr_t)r->base; return (void*)p; } static inline void region_reset(region_t *r) { r->top = 0; } Each JSRuntime owns exactly two regions: typedef struct { region_t *from_space; region_t *to_space; } js_zone_t; /* stored in JSRuntime->user_opaque */ 2 Add a forwarding pointer to every GC header We steal the spare bits already present: struct JSGCObjectHeader { int32_t ref_count; uint8_t gc_obj_type : 4; uint8_t mark : 4; uint8_t has_forward : 1; /* new */ uint8_t dummy1 : 7; uint16_t dummy2; struct list_head link; void *forwarding; /* only valid while `has_forward` is 1 */ }; Size of the struct stays 32 bytes on 64-bit systems. 3 Custom allocator hooks static void *js_zone_malloc(JSMallocState *s,size_t sz){ js_zone_t *z = s->opaque; void *p = region_alloc(z->from_space, sz, _Alignof(max_align_t)); if (!p) { copying_gc(z->rt); p = region_alloc(z->from_space, sz, _Alignof(max_align_t)); } return p; } static void js_zone_free(JSMallocState *s,void*ptr){ /* no-op */ } static void *js_zone_realloc(JSMallocState *s,void*old,size_t sz){ if(!old) return js_zone_malloc(s,sz); size_t oldsz = s->js_malloc_usable_size(old); /* 0 is fine */ void*newp = js_zone_malloc(s,sz); memcpy(newp,old,oldsz < sz ? oldsz : sz); return newp; } JS_NewRuntime2(&mf, zone) supplies those hooks. 4 Roots you must copy per runtime (one copy each) rt->atom_array entries are permanent ⇒ ignore rt->class_array[i].prototype / .exotic – none of them are GC objects rt->context_list – iterate each context. per context ctx->global_obj, ctx->global_var_obj ctx->class_proto[JS_CLASS_…] ctx->function_proto / function_ctor / array_ctor / regexp_ctor The value stack of every live JSStackFrame (arg_buf, var_buf, cur_sp). No modules, no AsyncJobs, no WeakRef lists, no SAB ⇒ whole worker-pool of roots is ~ a few dozen pointers. 5 Object-size helpers You only need size formulas for types you keep: static size_t obj_size(JSRuntime *rt, JSGCObjectHeader *h) { switch (h->gc_obj_type) { case JS_GC_OBJ_TYPE_JS_OBJECT:{ JSObject *o = (JSObject*)h; return sizeof(JSObject) + o->prop_size * sizeof(JSShapeProperty); } case JS_GC_OBJ_TYPE_FUNCTION_BYTECODE:{ JSFunctionBytecode *f = (JSFunctionBytecode*)h; return sizeof(JSFunctionBytecode) + f->byte_code_len + f->var_count * sizeof(JSVarDef) + f->closure_var_count * sizeof(JSClosureVar) + f->cpool_count * sizeof(JSValue); } case JS_GC_OBJ_TYPE_SHAPE: return sizeof(JSShape) + h->... /* prop hash table, etc. */; case JS_GC_OBJ_TYPE_VAR_REF: return sizeof(JSVarRef); default: /* string, rope, bigint are allocated with their payload already contiguous; js_malloc_usable_size() returns full size */ return js_def_malloc_usable_size(h); } } Because you do not use ArrayBuffers / typed arrays, you can skip those branches. 6 Copy algorithm (Cheney) static inline JSValue relocate(JSRuntime *rt, JSValueConst v); static JSGCObjectHeader *copy_hdr(JSRuntime *rt, JSGCObjectHeader *old) { if (old->has_forward) return old->forwarding; /* already moved */ js_zone_t *z = JS_GetRuntimeOpaque(rt); size_t sz = obj_size(rt, old); JSGCObjectHeader *nw = region_alloc(z->to_space, sz, _Alignof(max_align_t)); memcpy(nw, old, sz); old->has_forward = 1; old->forwarding = nw; /* fix children pointers inside nw */ switch (nw->gc_obj_type) { case JS_GC_OBJ_TYPE_JS_OBJECT:{ JSObject *o = (JSObject*)nw; for (uint32_t i=0;i<o->prop_size;i++) o->prop[i].value = relocate(rt, o->prop[i].value); o->proto = JS_VALUE_GET_OBJ(relocate(rt, JS_MKPTR(JS_TAG_OBJECT,o->proto))); break;} case JS_GC_OBJ_TYPE_FUNCTION_BYTECODE:{ JSFunctionBytecode *f=(JSFunctionBytecode*)nw; for (int i=0;i<f->cpool_count;i++) f->cpool[i] = relocate(rt, f->cpool[i]); break;} case JS_GC_OBJ_TYPE_VAR_REF:{ JSVarRef *vr=(JSVarRef*)nw; if (!vr->is_detached) vr->pvalue = &((JSVarRef*)old->forwarding)->value; /* remains self-contained */ vr->value = relocate(rt, vr->value); break;} /* string / rope / bigint have no interior GC refs */ default: break; } return nw; } static inline JSValue relocate(JSRuntime *rt, JSValueConst v) { int tag = JS_VALUE_GET_TAG(v); if (tag == JS_TAG_OBJECT || tag == JS_TAG_STRING || tag == JS_TAG_STRING_ROPE || tag == JS_TAG_BIG_INT) { JSGCObjectHeader *new_hdr = copy_hdr(rt, JS_VALUE_GET_PTR(v)); return JS_MKPTR(tag, new_hdr); } return v; /* primitive stays as-is */ } 7 The compaction driver static void copying_gc(JSRuntime *rt) { js_zone_t *z = JS_GetRuntimeOpaque(rt); region_reset(z->to_space); /* 1) copy roots */ for (JSContext *ctx= list_entry(rt->context_list.next,JSContext,link); &ctx->link != &rt->context_list; ctx = list_entry(ctx->link.next,JSContext,link)) { ctx->global_obj = relocate(rt, ctx->global_obj); ctx->global_var_obj = relocate(rt, ctx->global_var_obj); for (int i=0;i<JS_CLASS_INIT_COUNT;i++) ctx->class_proto[i] = relocate(rt, ctx->class_proto[i]); /* stack frames */ for (JSStackFrame *fr=ctx->current_stack_frame; fr; fr=fr->prev_frame) { fr->cur_func = relocate(rt, fr->cur_func); for (int i=0;i<fr->arg_count;i++) fr->arg_buf[i] = relocate(rt, fr->arg_buf[i]); /* locals */ /* … similar … */ } } /* 2) scan to-space linearly and copy children of newly moved objs */ /* (Cheney’s algorithm: two fingers) */ uint8_t *scan = z->to_space->base; while (scan < z->to_space->base + z->to_space->top) { JSGCObjectHeader *h = (void*)scan; /* walk children again and relocate them – they may yet live in from_space */ switch(h->gc_obj_type) { /* same switch as copy_hdr but on h */ } scan += obj_size(rt, h); } /* 3) flip spaces */ region_t *old = z->from_space; z->from_space = z->to_space; z->to_space = old; region_reset(z->to_space); /* old space entirely freed */ /* 4) clear forwarding bits for next cycle */ scan = z->from_space->base; while (scan < z->from_space->base + z->from_space->top) { JSGCObjectHeader *h = (void*)scan; h->has_forward = 0; scan += obj_size(rt, h); } } 8 Why life is easier without the omitted features Feature you removed What you don’t have to implement Async/generator/async-gen JSAsyncFunctionState, JS_CLASS_GENERATOR, extra roots in job queue WeakMap / WeakSet / WeakRef / FinalizationRegistry Special “keep-alive but don’t move” rules, separate weak-ref pass TypedArray / ArrayBuffer Non-movable (shared), extra pointer into SAB’s raw data Modules Root list of evaluated modules, namespace objects DataView Same backing-store complications as ArrayBuffer That means children enumeration boils down to five structures: JSObject, JSFunctionBytecode, JSShape, JSVarRef, JSStringRope. No “weak” barriers, no promise job queues, no detached buffers. The forwarding table can live inside JSGCObjectHeader because none of your objects rely on spare bits there. What remains the same Ref-counting stays; every decref is still done. With the bump allocator every immediate “free” is a no-op, so pointer writes are cheap. Your allocator still hands out memory with one atomic top += size. Garbage is reclaimed only during copying_gc, which runs only when the arena is full. Object addresses do change after compaction, so any native code that stores raw pointers to JS objects must fetch them via a JSValue each time (QuickJS’s API already enforces that). With these simplifications you have a fully compacting, fragmentation-proof heap that costs: 1–2 pointer writes per allocation no cost per free one linear copy of live objects when the region fills (amortised, ≈ O(1) per object) and you didn’t have to touch the vast parts of QuickJS devoted to weak refs, typed arrays, modules, or async tasks.
john added the
enhancement
label 2025-06-09 05:36:50 +00:00
Sign in to join this conversation.
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: john/cell#34
No description provided.