diff --git a/debug/js.c b/debug/js.c index 3979dcfd..2a075cd1 100644 --- a/debug/js.c +++ b/debug/js.c @@ -15,8 +15,7 @@ JSC_CCALL(os_calc_mem, JS_SetPropertyStr(js,ret,"memory_used_size",number2js(js,mu.memory_used_size)); JS_SetPropertyStr(js,ret,"malloc_count",number2js(js,mu.malloc_count)); JS_SetPropertyStr(js,ret,"memory_used_count",number2js(js,mu.memory_used_count)); - JS_SetPropertyStr(js,ret,"atom_count",number2js(js,mu.atom_count)); - JS_SetPropertyStr(js,ret,"atom_size",number2js(js,mu.atom_size)); + /* atom_count and atom_size removed - atoms are now just strings */ JS_SetPropertyStr(js,ret,"str_count",number2js(js,mu.str_count)); JS_SetPropertyStr(js,ret,"str_size",number2js(js,mu.str_size)); JS_SetPropertyStr(js,ret,"obj_count",number2js(js,mu.obj_count)); diff --git a/plan.md b/plan.md new file mode 100644 index 00000000..4f3d10b1 --- /dev/null +++ b/plan.md @@ -0,0 +1,547 @@ +# Refactoring QuickJS to Mist Memory Format + +## Summary + +Complete rework of `quickjs.h` and `quickjs.c` to align with `docs/memory.md` and the new JSValue encoding scheme using LSB-based type discrimination with short floats. + +## Key Design Decisions (from user) + +1. **Remove NaN-boxing entirely** - Use LSB-based type tags instead +2. **Short float for numbers** - Truncated double (3 fewer exponent bits), out-of-range → NULL +3. **Optional 32-bit float mode** - Compile-time option, stored like ints +4. **Remove KeyId** - Use JSValue directly as keys in objects +5. **Remove JSStringRope** - No lazy concatenation, immediate text creation +6. **Remove JSObject/shapes** - Move to JSRecord only with direct key/value storage +7. **Remove atoms from objects** - String interning for literals/properties only + +## New JSValue Encoding (64-bit) + +Based on the provided header, using LSB-based discrimination: + +``` +LSB = 0 → 31-bit signed integer (value >> 1) +LSB = 01 → 61-bit pointer +LSB = 101 → Short float (truncated double, 3 fewer exponent bits) +LSB = 11 → Special tag (next 3 bits for subtype, 5 bits total) +``` + +**Special tags (5 bits, LSB = 11):** +- `00011` (3) = JS_TAG_BOOL (payload bit 5 = value) +- `00111` (7) = JS_TAG_NULL +- `01011` (11) = JS_TAG_UNDEFINED (may not be needed - use NULL) +- `01111` (15) = JS_TAG_EXCEPTION +- `10111` (23) = JS_TAG_UNINITIALIZED +- `11011` (27) = JS_TAG_STRING_ASCII (immediate string: 3-bit len + up to 7 ASCII bytes) +- `11111` (31) = JS_TAG_CATCH_OFFSET + +## Critical Files + +- `/Users/johnalanbrook/work/cell/source/quickjs.h` - Complete rewrite of JSValue encoding +- `/Users/johnalanbrook/work/cell/source/quickjs.c` - Remove shapes, atoms from objects, string ropes + +## Implementation Plan + +### Phase 1: New JSValue Encoding in quickjs.h + +Replace the entire JSValue system with LSB-based tags: + +```c +#if INTPTR_MAX >= INT64_MAX +#define JS_PTR64 +typedef uint64_t JSValue; +#define JSW 8 +#define JS_USE_SHORT_FLOAT +#else +typedef uint32_t JSValue; +#define JSW 4 +#endif + +enum { + JS_TAG_INT = 0, /* LSB = 0, 31-bit int */ + JS_TAG_PTR = 1, /* LSB = 01, pointer */ + JS_TAG_SPECIAL = 3, /* LSB = 11, special values */ + JS_TAG_BOOL = JS_TAG_SPECIAL | (0 << 2), /* 5 bits */ + JS_TAG_NULL = JS_TAG_SPECIAL | (1 << 2), + JS_TAG_EXCEPTION = JS_TAG_SPECIAL | (3 << 2), + JS_TAG_UNINITIALIZED = JS_TAG_SPECIAL | (5 << 2), + JS_TAG_STRING_ASCII = JS_TAG_SPECIAL | (6 << 2), /* immediate ASCII string */ + JS_TAG_CATCH_OFFSET = JS_TAG_SPECIAL | (7 << 2), +#ifdef JS_USE_SHORT_FLOAT + JS_TAG_SHORT_FLOAT = 5, /* LSB = 101 */ +#endif +}; + +/* Value extraction */ +#define JS_VALUE_GET_INT(v) ((int32_t)(v) >> 1) +#define JS_VALUE_GET_PTR(v) ((void *)((v) & ~(JSW - 1))) +#define JS_VALUE_GET_SPECIAL_TAG(v) ((v) & 0x1F) +#define JS_VALUE_GET_SPECIAL_VALUE(v) ((int32_t)(v) >> 5) + +/* Value creation */ +#define JS_MKINT(val) (((JSValue)(val) << 1) | JS_TAG_INT) +#define JS_MKPTR(ptr) (((JSValue)(uintptr_t)(ptr)) | JS_TAG_PTR) +#define JS_MKSPECIAL(tag, val) ((JSValue)(tag) | ((JSValue)(val) << 5)) + +/* Type checks */ +static inline JS_BOOL JS_IsInt(JSValue v) { return (v & 1) == JS_TAG_INT; } +static inline JS_BOOL JS_IsPtr(JSValue v) { return (v & (JSW-1)) == JS_TAG_PTR; } +static inline JS_BOOL JS_IsNull(JSValue v) { return v == JS_MKSPECIAL(JS_TAG_NULL, 0); } +static inline JS_BOOL JS_IsException(JSValue v) { return JS_VALUE_GET_SPECIAL_TAG(v) == JS_TAG_EXCEPTION; } + +#ifdef JS_USE_SHORT_FLOAT +static inline JS_BOOL JS_IsShortFloat(JSValue v) { return (v & 7) == JS_TAG_SHORT_FLOAT; } +#endif + +/* Constants */ +#define JS_NULL JS_MKSPECIAL(JS_TAG_NULL, 0) +#define JS_FALSE JS_MKSPECIAL(JS_TAG_BOOL, 0) +#define JS_TRUE JS_MKSPECIAL(JS_TAG_BOOL, 1) +#define JS_EXCEPTION JS_MKSPECIAL(JS_TAG_EXCEPTION, 0) +#define JS_UNINITIALIZED JS_MKSPECIAL(JS_TAG_UNINITIALIZED, 0) +``` + +### Phase 2: Short Float Implementation + +Short float uses 3 fewer exponent bits than double. Numbers outside range become NULL. + +```c +/* Short float: 61 bits = 1 sign + 8 exp + 52 mantissa (vs double's 11 exp) + * Range: approximately +-3.4e38 (vs double's +-1.8e308) + * Out of range values become JS_NULL + * Zero and subnormals: 0.0 is representable, subnormals become 0.0 + */ +static inline JSValue JS_NewFloat64(JSContext *ctx, double d) { + union { double d; uint64_t u; } u; + u.d = d; + + /* Extract sign, exponent, mantissa */ + uint64_t sign = u.u >> 63; + int exp = (u.u >> 52) & 0x7FF; + uint64_t mantissa = u.u & ((1ULL << 52) - 1); + + /* Special case: zero (exp=0, mantissa=0) */ + if (exp == 0 && mantissa == 0) { + /* Encode +0.0 or -0.0 */ + return (sign << 63) | JS_TAG_SHORT_FLOAT; /* short_exp=0, mantissa=0 */ + } + + /* Check for NaN/Inf (exp=0x7FF) */ + if (exp == 0x7FF) { + return JS_NULL; /* NaN or Infinity → null */ + } + + /* Subnormals (exp=0, mantissa!=0): flush to zero */ + if (exp == 0) { + return (sign << 63) | JS_TAG_SHORT_FLOAT; /* becomes +/-0.0 */ + } + + /* Normal numbers: convert exponent bias */ + /* Double bias = 1023, short float bias = 127 */ + int short_exp = exp - 1023 + 127; + if (short_exp < 1 || short_exp > 254) { + return JS_NULL; /* Out of range (short_exp 0 and 255 are special) */ + } + + /* Check if it fits in int32 (prefer integer encoding) */ + if (d >= INT32_MIN && d <= INT32_MAX) { + int32_t i = (int32_t)d; + if ((double)i == d) { + return JS_MKINT(i); + } + } + + /* Encode as short float: + * [sign:1][short_exp:8][mantissa:52][tag:3] */ + JSValue v = (sign << 63) | ((uint64_t)short_exp << 55) | (mantissa << 3) | JS_TAG_SHORT_FLOAT; + return v; +} + +static inline double JS_VALUE_GET_FLOAT64(JSValue v) { + /* Decode short float back to double */ + uint64_t sign = v >> 63; + uint64_t short_exp = (v >> 55) & 0xFF; + uint64_t mantissa = (v >> 3) & ((1ULL << 52) - 1); + + /* Convert exponent: short bias 127 → double bias 1023 */ + uint64_t exp = short_exp - 127 + 1023; + + union { double d; uint64_t u; } u; + u.u = (sign << 63) | (exp << 52) | mantissa; + return u.d; +} +``` + +### Phase 3: Immediate ASCII String (JS_TAG_STRING_ASCII) + +Up to 7 ASCII characters stored directly in JSValue payload. + +**Layout (64-bit):** +- Bits 0-4: Tag (JS_TAG_STRING_ASCII = 27) +- Bits 5-7: Length (0-7) +- Bits 8-63: Up to 7 ASCII bytes (char[0] in bits 8-15, etc.) + +```c +#define JS_ASCII_MAX_LEN 7 + +/* Check if value is immediate ASCII string */ +static inline JS_BOOL JS_IsImmediateASCII(JSValue v) { + return JS_VALUE_GET_SPECIAL_TAG(v) == JS_TAG_STRING_ASCII; +} + +/* Get immediate ASCII string length (bits 5-7) */ +static inline size_t JS_GetImmediateASCIILen(JSValue v) { + return (v >> 5) & 0x7; +} + +/* Get immediate ASCII string character at index */ +static inline char JS_GetImmediateASCIIChar(JSValue v, int idx) { + return (char)((v >> (8 + idx * 8)) & 0xFF); +} + +/* Try to create immediate ASCII string, returns JS_NULL if doesn't fit */ +static inline JSValue JS_TryNewImmediateASCII(const char *str, size_t len) { + if (len > JS_ASCII_MAX_LEN) return JS_NULL; + for (size_t i = 0; i < len; i++) { + if ((uint8_t)str[i] >= 0x80) return JS_NULL; /* non-ASCII */ + } + /* Tag (5 bits) | Length (3 bits) | chars (56 bits) */ + JSValue v = JS_TAG_STRING_ASCII | ((JSValue)len << 5); + for (size_t i = 0; i < len; i++) { + v |= ((JSValue)(uint8_t)str[i]) << (8 + i * 8); + } + return v; +} + +/* Hash an immediate ASCII string (hash the entire JSValue) */ +static inline uint64_t js_hash_immediate_ascii(JSValue v) { + fash64_state s; + fash64_begin(&s); + fash64_word(&s, v); + return fash64_end(&s); +} +``` + +### Phase 4: Remove JSStringRope + +Delete `JSStringRope` structure and all rope-related functions: +- `js_new_string_rope()` (line 4815) +- `js_rebalancee_string_rope()` (line 4952) +- `string_rope_iter_*` functions +- `JS_TAG_STRING_ROPE` usage + +String concatenation creates new `mist_text` objects immediately. + +### Phase 5: UTF-32 Text Objects (mist_text) + +The `mist_text` structure already exists. Complete integration: + +```c +/* Text object: UTF-32 packed 2 chars per 64-bit word + * Pretext (mutable, stone=0): hdr.cap = char capacity, length field = current length + * Text (immutable, stone=1): hdr.cap = length, length field = hash + */ +typedef struct mist_text { + objhdr_t hdr; /* type=OBJ_TEXT, cap=char count, stone bit */ + uint64_t length; /* pretext: char count | text: hash */ + uint64_t packed[]; /* UTF-32 chars, 2 per word (high then low) */ +} mist_text; + +/* Create new text from UTF-8 C string */ +JSValue JS_NewStringLen(JSContext *ctx, const char *str, size_t len) { + /* Try immediate text first */ + JSValue imm = JS_TryNewImmediateText(str, len); + if (!JS_IsNull(imm)) return imm; + + /* Convert UTF-8 to UTF-32 */ + uint32_t *utf32 = js_malloc(ctx, len * sizeof(uint32_t)); + size_t utf32_len = utf8_to_utf32(str, len, utf32); + + /* Allocate mist_text */ + size_t word_count = (utf32_len + 1) / 2; + mist_text *text = js_mallocz(ctx, sizeof(mist_text) + word_count * sizeof(uint64_t)); + text->hdr = objhdr_make(utf32_len, OBJ_TEXT, false, false, false, false); + text->length = utf32_len; + + /* Pack UTF-32 into words */ + for (size_t i = 0; i < utf32_len; i += 2) { + uint64_t hi = utf32[i]; + uint64_t lo = (i + 1 < utf32_len) ? utf32[i + 1] : 0; + text->packed[i / 2] = (hi << 32) | lo; + } + + js_free(ctx, utf32); + /* Add to GC list and return as JSValue */ + return JS_MKPTR(text); +} +``` + +### Phase 6: Remove JSObject, Use JSRecord Only + +**Delete:** +- `JSObject` structure (line 1664) +- `JSShape` and `JSShapeProperty` structures +- Shape hash table in JSRuntime +- All shape-related functions +- `find_own_property()` and shape-based property access + +**Keep only JSRecord with direct key/value storage:** + +```c +/* Record: open-addressing hash table with JSValue keys + * Slot 0 reserved: key[0] = class_id<<32 | rec_key_id, value[0] = opaque + */ +/* Slot: key/value pair stored together */ +typedef struct JSSlot { + JSValue key; + JSValue val; +} JSSlot; + +typedef struct JSRecord { + JSGCObjectHeader header; + objhdr_t mist_hdr; /* type=OBJ_RECORD, cap=slot_mask */ + struct JSRecord *proto; /* prototype chain */ + uint32_t len; /* number of live entries */ + uint32_t tombs; /* tombstone count */ + JSSlot *slots; /* key/value pairs, size = mask+1 */ +} JSRecord; + +/* Three key types for property lookup: + * 1. Immediate ASCII (JS_TAG_STRING_ASCII): hash from JSValue itself + * 2. Text object (mist_text pointer): hash from object's stored hash + * 3. Record object used as key: hash from monotonic ID in record's key[0] + * + * Per memory.md: when a record is used as a key, it gets assigned a + * monotonically increasing 32-bit ID stored in lower 32 bits of keys[0]. + */ + +/* Get hash for any key JSValue */ +static uint64_t js_key_hash(JSValue key) { + if (JS_IsImmediateASCII(key)) { + /* Hash the entire JSValue for immediate ASCII */ + return fash64_hash_one(key); + } + + if (!JS_IsPtr(key)) + return 0; /* Invalid key */ + + void *ptr = JS_VALUE_GET_PTR(key); + objhdr_t hdr = *(objhdr_t *)ptr; /* Read object header */ + uint8_t type = objhdr_type(hdr); + + if (type == OBJ_TEXT) { + /* Text object: hash stored in length field (if stoned) or computed */ + mist_text *text = (mist_text *)ptr; + return get_text_hash(text); + } + + if (type == OBJ_RECORD) { + /* Record used as key: hash from monotonic ID in slots[0].key */ + JSRecord *rec = (JSRecord *)ptr; + uint32_t rec_id = (uint32_t)rec->slots[0].key; /* lower 32 bits */ + return fash64_hash_one(rec_id); + } + + return 0; /* Unknown type */ +} + +/* Ensure record has a key ID assigned (for use as property key) */ +static void rec_ensure_key_id(JSRuntime *rt, JSRecord *rec) { + uint32_t id = (uint32_t)rec->slots[0].key; + if (id == 0) { + /* Assign new monotonically increasing ID */ + id = ++rt->rec_key_next; + if (id == 0) id = ++rt->rec_key_next; /* Skip 0 */ + rec->slots[0].key = (rec->slots[0].key & 0xFFFFFFFF00000000ULL) | id; + } +} + +/* Compare two keys for equality */ +static JS_BOOL js_key_equal(JSValue a, JSValue b) { + /* Fast path: identical values */ + if (a == b) return TRUE; + + /* Immediate ASCII: must be identical (handled above) */ + if (JS_IsImmediateASCII(a) || JS_IsImmediateASCII(b)) + return FALSE; + + /* Both must be pointers */ + if (!JS_IsPtr(a) || !JS_IsPtr(b)) + return FALSE; + + void *pa = JS_VALUE_GET_PTR(a); + void *pb = JS_VALUE_GET_PTR(b); + uint8_t ta = objhdr_type(*(objhdr_t *)pa); + uint8_t tb = objhdr_type(*(objhdr_t *)pb); + + /* Record keys: pointer equality (identity) */ + if (ta == OBJ_RECORD || tb == OBJ_RECORD) + return FALSE; /* Already checked a == b above */ + + /* Text objects: string content comparison */ + if (ta == OBJ_TEXT && tb == OBJ_TEXT) + return text_content_equal((mist_text *)pa, (mist_text *)pb); + + return FALSE; +} + +/* Property lookup using open-addressing hash table */ +static int rec_find_slot(JSRecord *rec, JSValue key) { + uint32_t mask = (uint32_t)objhdr_cap56(rec->mist_hdr); + uint64_t hash = js_key_hash(key); + uint32_t idx = hash & mask; + if (idx == 0) idx = 1; /* slot 0 reserved */ + + for (uint32_t i = 0; i <= mask; i++) { + JSValue k = rec->slots[idx].key; + if (JS_IsNull(k)) return -1; /* empty, not found */ + if (k == JS_EXCEPTION) { /* tombstone, continue */ + idx = (idx + 1) & mask; + if (idx == 0) idx = 1; + continue; + } + if (js_key_equal(k, key)) return idx; + idx = (idx + 1) & mask; + if (idx == 0) idx = 1; + } + return -1; +} +``` + +### Phase 7: Update Property Access Functions + +Replace all JSObject property functions with JSRecord equivalents: + +```c +JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst this_obj, + JSValue prop, JS_BOOL throw_ref_error) { + if (!JS_IsPtr(this_obj)) { + if (throw_ref_error) + return JS_ThrowTypeError(ctx, "not an object"); + return JS_NULL; + } + + JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(this_obj); + + while (rec) { + int idx = rec_find_slot(rec, prop); + if (idx >= 0) { + return rec->slots[idx].val; /* No dup needed if no ref counting */ + } + rec = rec->proto; + } + + if (throw_ref_error) + return JS_ThrowReferenceError(ctx, "property not found"); + return JS_NULL; +} + +int JS_SetPropertyInternal(JSContext *ctx, JSValueConst this_obj, + JSValue prop, JSValue val) { + if (!JS_IsPtr(this_obj)) + return -1; + + JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(this_obj); + int idx = rec_find_slot(rec, prop); + + if (idx >= 0) { + rec->slots[idx].val = val; + return 0; + } + + /* Add new property */ + return rec_add_property(ctx, rec, prop, val); +} +``` + +### Phase 8: C Class Storage in Slot 0 + +Per memory.md, slot 0 is reserved for internal use: + +```c +/* slots[0].key: lower 32 bits = rec_key_id (for identity-based keys) + * upper 32 bits = class_id (C class) + * slots[0].val: opaque C pointer + */ + +void JS_SetOpaque(JSContext *ctx, JSValue obj, void *opaque) { + JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(obj); + rec->slots[0].val = (JSValue)(uintptr_t)opaque; +} + +void *JS_GetOpaque(JSContext *ctx, JSValue obj, uint32_t class_id) { + JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(obj); + uint32_t stored_class = (uint32_t)(rec->slots[0].key >> 32); + if (stored_class != class_id) return NULL; + return (void *)(uintptr_t)rec->slots[0].val; +} + +void JS_SetClassID(JSRecord *rec, uint32_t class_id) { + rec->slots[0].key = (rec->slots[0].key & 0xFFFFFFFF) | ((uint64_t)class_id << 32); +} +``` + +### Phase 9: Update GC + +The GC needs updates for the new object format: + +```c +static void mark_children(JSRuntime *rt, JSGCObjectHeader *gp, ...) { + switch (gp->gc_obj_type) { + case JS_GC_OBJ_TYPE_RECORD: + { + JSRecord *rec = (JSRecord *)gp; + uint32_t mask = objhdr_cap56(rec->mist_hdr); + + if (rec->proto) + mark_func(rt, &rec->proto->header); + + for (uint32_t i = 1; i <= mask; i++) { + if (!JS_IsNull(rec->keys[i]) && + rec->keys[i] != JS_EXCEPTION) { /* tombstone */ + /* Mark key if it's a pointer */ + if (JS_IsPtr(rec->keys[i])) + JS_MarkValue(rt, rec->keys[i], mark_func); + /* Mark value if it's a pointer */ + if (JS_IsPtr(rec->values[i])) + JS_MarkValue(rt, rec->values[i], mark_func); + } + } + } + break; + // ... other cases + } +} +``` + +## Cleanup - Items to Remove + +1. **quickjs.h:** + - Old NaN-boxing macros (JS_VALUE_GET_TAG, JS_MKVAL, etc.) + - JS_TAG_STRING, JS_TAG_STRING_ROPE, JS_TAG_OBJECT, JS_TAG_ARRAY, JS_TAG_FUNCTION + - JSValueConst (just use JSValue) + +2. **quickjs.c:** + - JSStringRope structure and functions + - JSShape and JSShapeProperty structures + - Shape hash table and functions + - Atom-based property access (keep atoms for parser/compiler) + - JSObject structure (replace with JSRecord) + - `find_own_property()`, `add_shape_property()`, etc. + +## Verification + +1. **Build:** `make` completes without errors +2. **Basic test:** Create objects, set/get properties +3. **Number test:** Verify short float encoding/decoding, out-of-range → null +4. **String test:** Immediate text for short strings, mist_text for long +5. **GC test:** Create cycles, verify collection works +6. **C class test:** SetOpaque/GetOpaque work with slot 0 storage + +## Notes + +- This is a **major rework** affecting most of the codebase +- Atoms remain for parser/compiler but not for object property storage +- Reference counting may be simplified since fewer pointer types +- The short float range (+-3.4e38) covers most practical use cases +- Out-of-range numbers becoming NULL is intentional per memory.md + diff --git a/source/quickjs.c b/source/quickjs.c index d2c714f7..66374770 100644 --- a/source/quickjs.c +++ b/source/quickjs.c @@ -260,16 +260,24 @@ struct JSRuntime { /* Record-key IDs (for K_REC keys) */ uint32_t rec_key_next; + /* Stone arena for interned strings */ + struct StoneArenaPage *st_pages; + struct mist_text **st_text_array; /* indexed by ID */ + uint32_t *st_text_hash; /* hash table mapping to IDs */ + uint32_t st_text_count; /* number of interned texts */ + uint32_t st_text_size; /* hash table size */ + uint32_t st_text_resize; /* threshold for resize */ + #ifdef DUMP_PROFILE JSProfileState profile; #endif }; struct JSClass { - uint32_t class_id; /* 0 means free entry */ + const char *class_name; JSClassFinalizer *finalizer; JSClassGCMark *gc_mark; - const char *class_name; // this could be removed in debug builds + uint32_t class_id; /* 0 means free entry */ }; #define JS_MODE_BACKTRACE_BARRIER \ @@ -712,6 +720,54 @@ st_free_all (JSRuntime *rt) { rt->st_pages = NULL; } +/* Pack UTF-32 characters into 64-bit words (2 chars per word) */ +static void +pack_utf32_to_words (const uint32_t *utf32, uint32_t len, uint64_t *packed) { + for (uint32_t i = 0; i < len; i += 2) { + uint64_t hi = utf32[i]; + uint64_t lo = (i + 1 < len) ? utf32[i + 1] : 0; + packed[i / 2] = (hi << 32) | lo; + } +} + +/* Resize the stone text intern table */ +static int +st_text_resize (JSRuntime *rt) { + uint32_t new_size = rt->st_text_size ? rt->st_text_size * 2 : 256; + uint32_t new_mask = new_size - 1; + + uint32_t *new_hash = js_mallocz_rt (rt, new_size * sizeof (uint32_t)); + if (!new_hash) return -1; + + mist_text **new_array = js_realloc_rt (rt, rt->st_text_array, + (new_size + 1) * sizeof (mist_text *)); + if (!new_array) { + js_free_rt (rt, new_hash); + return -1; + } + + /* Rehash existing entries */ + for (uint32_t i = 0; i < rt->st_text_size; i++) { + uint32_t id = rt->st_text_hash ? rt->st_text_hash[i] : 0; + if (id != 0) { + mist_text *text = rt->st_text_array[id]; + uint64_t hash = text->length; /* hash stored in length for stoned text */ + uint32_t slot = hash & new_mask; + while (new_hash[slot] != 0) + slot = (slot + 1) & new_mask; + new_hash[slot] = id; + } + } + + js_free_rt (rt, rt->st_text_hash); + rt->st_text_hash = new_hash; + rt->st_text_array = new_array; + rt->st_text_size = new_size; + rt->st_text_resize = new_size * 3 / 4; /* 75% load factor */ + + return 0; +} + /* Compare two packed UTF-32 texts for equality */ static int text_equal (mist_text *a, const uint64_t *packed_b, uint32_t len_b) { @@ -1102,8 +1158,8 @@ js_key_hash (JSValue key) { if (type == OBJ_TEXT) return get_text_hash ((mist_text *)ptr); if (type == OBJ_RECORD) { JSRecord *rec = (JSRecord *)ptr; - if (rec->rec_key_id == 0) return 0; - return fash64_hash_one (rec->rec_key_id); + if (rec->rec_id == 0) return 0; + return fash64_hash_one (rec->rec_id); } return 0; @@ -1301,7 +1357,7 @@ js_new_record (JSContext *ctx, uint32_t initial_mask) { rec->proto = NULL; rec->len = 0; rec->tombs = 0; - rec->rec_key_id = alloc_rec_key_id (rt); + rec->rec_id = ++rt->rec_key_next; rec->tab = js_mallocz (ctx, sizeof (JSRecordEntry) * (initial_mask + 1)); if (!rec->tab) { diff --git a/source/quickjs.h b/source/quickjs.h index b979eab7..13a7cf24 100644 --- a/source/quickjs.h +++ b/source/quickjs.h @@ -94,6 +94,9 @@ typedef uint32_t JSValue; #define JSValue JSValue +/* JSValueConst is just JSValue (const is not needed in value semantics) */ +typedef JSValue JSValueConst; + /* LSB-based tags */ enum { /* Primary tags (low bits) */ @@ -688,7 +691,7 @@ JSValue JS_NewArray (JSContext *ctx); JSValue JS_NewArrayLen (JSContext *ctx, uint32_t len); int JS_ArrayPush (JSContext *ctx, JSValue obj, JSValue val); JSValue JS_ArrayPop (JSContext *ctx, JSValue obj); -JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop) { +JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop); // For records JSValue JS_GetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop); @@ -705,6 +708,7 @@ JSValue JS_GetPropertyUint32 (JSContext *ctx, JSValue this_obj, uint32_t idx); int JS_SetPropertyUint32 (JSContext *ctx, JSValue this_obj, uint32_t idx, JSValue val); int JS_SetPropertyInt64 (JSContext *ctx, JSValue this_obj, int64_t idx, JSValue val); +/* Get property keys as array of JSValue strings */ int JS_GetOwnPropertyNames (JSContext *ctx, JSValue **tab, uint32_t *plen, JSValue obj); JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv); diff --git a/status.md b/status.md new file mode 100644 index 00000000..3b5e2481 --- /dev/null +++ b/status.md @@ -0,0 +1,94 @@ +QuickJS Mist Memory Format Refactoring +Current Status +The codebase is partially refactored but doesn't compile due to missing KeyId type definitions. + +Incremental Refactoring Tasks +Phase 0: Fix Compilation (Prerequisite) + Define missing KeyId type as transitional typedef (will be replaced by JSValue later) + Define K_EMPTY, K_TOMB, +key_text() +, +key_is_text() +, +key_payload() + macros/functions + Verify build compiles and tests pass +Phase 1: New JSValue Encoding in quickjs.h + Add new LSB-based tag constants alongside existing tags + Add JS_TAG_SHORT_FLOAT for 61-bit truncated double + Add JS_TAG_STRING_ASCII for immediate 7-byte ASCII strings + Add new value extraction/creation macros + Add type check inline functions + Keep existing NaN-boxing code active (compile-time switch) +Phase 2: Short Float Implementation + Implement JS_NewFloat64_ShortFloat() with range checking + Implement JS_VALUE_GET_FLOAT64_ShortFloat() for decoding + Out-of-range values return JS_NULL + Prefer integer encoding when exact +Phase 3: Immediate ASCII String +Phase 3: Immediate ASCII String + Implement JS_TryNewImmediateASCII() for strings up to 7 chars + Implement JS_IsImmediateASCII() type check + Implement JS_GetImmediateASCIILen() and JS_GetImmediateASCIIChar() +Integrate with +JS_NewStringLen() + to try immediate first +Phase 4: Remove JSStringRope + Delete JSStringRope structure + Remove JS_TAG_STRING_ROPE handling + Update string concatenation to create immediate mist_text objects + Remove rope-related iterator functions +Phase 5: Refactor JSString to UTF-32 (mist_text) + Modify struct JSString to store UTF-32 characters only + Remove is_wide_char flag and 8.16 unions + Update +js_alloc_string + to allocate UTF-32 buffer + Update string creation functions ( +js_new_string8 +, etc.) + Update all string accessors to use UTF-32 + Implement immediate-to-UTF32 conversion helper + Update string operations ( +concat +, +compare +) to work on UTF-32 +Phase 6: Replace KeyId with JSValue in Records + Change JSRecordEntry.key from KeyId to JSValue + Update +rec_hash_key() + to hash JSValue keys directly + Update +rec_find_slot() + for JSValue key comparison + Update +rec_get_own() +, +rec_get() +, +rec_set_own() + for JSValue keys + Remove KeyId typedef and related functions +Phase 7: Consolidate JSObject → JSRecord + Remove JSShape and JSShapeProperty structures + Remove shape hash table from JSRuntime + Update all property access to use JSRecord + Migrate JSObject users to JSRecord + Remove JSObject structure +Phase 8: Update GC for New Format + Update mark_children for JSRecord with JSValue keys + Update free_record for JSValue keys + Handle immediate values correctly (no marking needed) + Test for cycles and correct collection +Phase 9: C Class Storage in Slot 0 + Implement slot 0 reservation for class_id and opaque pointer + Update JS_SetOpaque() / JS_GetOpaque() + Migrate existing class storage +Verification Checklist + Build compiles without errors + Existing tests pass + Property access works correctly + GC correctly handles cycles + Short float encoding/decoding verified + Immediate ASCII strings work \ No newline at end of file