This commit is contained in:
2026-01-30 09:58:02 -06:00
parent 3ccaf68a5b
commit 24ecff3f1c
5 changed files with 708 additions and 8 deletions

View File

@@ -15,8 +15,7 @@ JSC_CCALL(os_calc_mem,
JS_SetPropertyStr(js,ret,"memory_used_size",number2js(js,mu.memory_used_size));
JS_SetPropertyStr(js,ret,"malloc_count",number2js(js,mu.malloc_count));
JS_SetPropertyStr(js,ret,"memory_used_count",number2js(js,mu.memory_used_count));
JS_SetPropertyStr(js,ret,"atom_count",number2js(js,mu.atom_count));
JS_SetPropertyStr(js,ret,"atom_size",number2js(js,mu.atom_size));
/* atom_count and atom_size removed - atoms are now just strings */
JS_SetPropertyStr(js,ret,"str_count",number2js(js,mu.str_count));
JS_SetPropertyStr(js,ret,"str_size",number2js(js,mu.str_size));
JS_SetPropertyStr(js,ret,"obj_count",number2js(js,mu.obj_count));

547
plan.md Normal file
View File

@@ -0,0 +1,547 @@
# Refactoring QuickJS to Mist Memory Format
## Summary
Complete rework of `quickjs.h` and `quickjs.c` to align with `docs/memory.md` and the new JSValue encoding scheme using LSB-based type discrimination with short floats.
## Key Design Decisions (from user)
1. **Remove NaN-boxing entirely** - Use LSB-based type tags instead
2. **Short float for numbers** - Truncated double (3 fewer exponent bits), out-of-range → NULL
3. **Optional 32-bit float mode** - Compile-time option, stored like ints
4. **Remove KeyId** - Use JSValue directly as keys in objects
5. **Remove JSStringRope** - No lazy concatenation, immediate text creation
6. **Remove JSObject/shapes** - Move to JSRecord only with direct key/value storage
7. **Remove atoms from objects** - String interning for literals/properties only
## New JSValue Encoding (64-bit)
Based on the provided header, using LSB-based discrimination:
```
LSB = 0 → 31-bit signed integer (value >> 1)
LSB = 01 → 61-bit pointer
LSB = 101 → Short float (truncated double, 3 fewer exponent bits)
LSB = 11 → Special tag (next 3 bits for subtype, 5 bits total)
```
**Special tags (5 bits, LSB = 11):**
- `00011` (3) = JS_TAG_BOOL (payload bit 5 = value)
- `00111` (7) = JS_TAG_NULL
- `01011` (11) = JS_TAG_UNDEFINED (may not be needed - use NULL)
- `01111` (15) = JS_TAG_EXCEPTION
- `10111` (23) = JS_TAG_UNINITIALIZED
- `11011` (27) = JS_TAG_STRING_ASCII (immediate string: 3-bit len + up to 7 ASCII bytes)
- `11111` (31) = JS_TAG_CATCH_OFFSET
## Critical Files
- `/Users/johnalanbrook/work/cell/source/quickjs.h` - Complete rewrite of JSValue encoding
- `/Users/johnalanbrook/work/cell/source/quickjs.c` - Remove shapes, atoms from objects, string ropes
## Implementation Plan
### Phase 1: New JSValue Encoding in quickjs.h
Replace the entire JSValue system with LSB-based tags:
```c
#if INTPTR_MAX >= INT64_MAX
#define JS_PTR64
typedef uint64_t JSValue;
#define JSW 8
#define JS_USE_SHORT_FLOAT
#else
typedef uint32_t JSValue;
#define JSW 4
#endif
enum {
JS_TAG_INT = 0, /* LSB = 0, 31-bit int */
JS_TAG_PTR = 1, /* LSB = 01, pointer */
JS_TAG_SPECIAL = 3, /* LSB = 11, special values */
JS_TAG_BOOL = JS_TAG_SPECIAL | (0 << 2), /* 5 bits */
JS_TAG_NULL = JS_TAG_SPECIAL | (1 << 2),
JS_TAG_EXCEPTION = JS_TAG_SPECIAL | (3 << 2),
JS_TAG_UNINITIALIZED = JS_TAG_SPECIAL | (5 << 2),
JS_TAG_STRING_ASCII = JS_TAG_SPECIAL | (6 << 2), /* immediate ASCII string */
JS_TAG_CATCH_OFFSET = JS_TAG_SPECIAL | (7 << 2),
#ifdef JS_USE_SHORT_FLOAT
JS_TAG_SHORT_FLOAT = 5, /* LSB = 101 */
#endif
};
/* Value extraction */
#define JS_VALUE_GET_INT(v) ((int32_t)(v) >> 1)
#define JS_VALUE_GET_PTR(v) ((void *)((v) & ~(JSW - 1)))
#define JS_VALUE_GET_SPECIAL_TAG(v) ((v) & 0x1F)
#define JS_VALUE_GET_SPECIAL_VALUE(v) ((int32_t)(v) >> 5)
/* Value creation */
#define JS_MKINT(val) (((JSValue)(val) << 1) | JS_TAG_INT)
#define JS_MKPTR(ptr) (((JSValue)(uintptr_t)(ptr)) | JS_TAG_PTR)
#define JS_MKSPECIAL(tag, val) ((JSValue)(tag) | ((JSValue)(val) << 5))
/* Type checks */
static inline JS_BOOL JS_IsInt(JSValue v) { return (v & 1) == JS_TAG_INT; }
static inline JS_BOOL JS_IsPtr(JSValue v) { return (v & (JSW-1)) == JS_TAG_PTR; }
static inline JS_BOOL JS_IsNull(JSValue v) { return v == JS_MKSPECIAL(JS_TAG_NULL, 0); }
static inline JS_BOOL JS_IsException(JSValue v) { return JS_VALUE_GET_SPECIAL_TAG(v) == JS_TAG_EXCEPTION; }
#ifdef JS_USE_SHORT_FLOAT
static inline JS_BOOL JS_IsShortFloat(JSValue v) { return (v & 7) == JS_TAG_SHORT_FLOAT; }
#endif
/* Constants */
#define JS_NULL JS_MKSPECIAL(JS_TAG_NULL, 0)
#define JS_FALSE JS_MKSPECIAL(JS_TAG_BOOL, 0)
#define JS_TRUE JS_MKSPECIAL(JS_TAG_BOOL, 1)
#define JS_EXCEPTION JS_MKSPECIAL(JS_TAG_EXCEPTION, 0)
#define JS_UNINITIALIZED JS_MKSPECIAL(JS_TAG_UNINITIALIZED, 0)
```
### Phase 2: Short Float Implementation
Short float uses 3 fewer exponent bits than double. Numbers outside range become NULL.
```c
/* Short float: 61 bits = 1 sign + 8 exp + 52 mantissa (vs double's 11 exp)
* Range: approximately +-3.4e38 (vs double's +-1.8e308)
* Out of range values become JS_NULL
* Zero and subnormals: 0.0 is representable, subnormals become 0.0
*/
static inline JSValue JS_NewFloat64(JSContext *ctx, double d) {
union { double d; uint64_t u; } u;
u.d = d;
/* Extract sign, exponent, mantissa */
uint64_t sign = u.u >> 63;
int exp = (u.u >> 52) & 0x7FF;
uint64_t mantissa = u.u & ((1ULL << 52) - 1);
/* Special case: zero (exp=0, mantissa=0) */
if (exp == 0 && mantissa == 0) {
/* Encode +0.0 or -0.0 */
return (sign << 63) | JS_TAG_SHORT_FLOAT; /* short_exp=0, mantissa=0 */
}
/* Check for NaN/Inf (exp=0x7FF) */
if (exp == 0x7FF) {
return JS_NULL; /* NaN or Infinity → null */
}
/* Subnormals (exp=0, mantissa!=0): flush to zero */
if (exp == 0) {
return (sign << 63) | JS_TAG_SHORT_FLOAT; /* becomes +/-0.0 */
}
/* Normal numbers: convert exponent bias */
/* Double bias = 1023, short float bias = 127 */
int short_exp = exp - 1023 + 127;
if (short_exp < 1 || short_exp > 254) {
return JS_NULL; /* Out of range (short_exp 0 and 255 are special) */
}
/* Check if it fits in int32 (prefer integer encoding) */
if (d >= INT32_MIN && d <= INT32_MAX) {
int32_t i = (int32_t)d;
if ((double)i == d) {
return JS_MKINT(i);
}
}
/* Encode as short float:
* [sign:1][short_exp:8][mantissa:52][tag:3] */
JSValue v = (sign << 63) | ((uint64_t)short_exp << 55) | (mantissa << 3) | JS_TAG_SHORT_FLOAT;
return v;
}
static inline double JS_VALUE_GET_FLOAT64(JSValue v) {
/* Decode short float back to double */
uint64_t sign = v >> 63;
uint64_t short_exp = (v >> 55) & 0xFF;
uint64_t mantissa = (v >> 3) & ((1ULL << 52) - 1);
/* Convert exponent: short bias 127 → double bias 1023 */
uint64_t exp = short_exp - 127 + 1023;
union { double d; uint64_t u; } u;
u.u = (sign << 63) | (exp << 52) | mantissa;
return u.d;
}
```
### Phase 3: Immediate ASCII String (JS_TAG_STRING_ASCII)
Up to 7 ASCII characters stored directly in JSValue payload.
**Layout (64-bit):**
- Bits 0-4: Tag (JS_TAG_STRING_ASCII = 27)
- Bits 5-7: Length (0-7)
- Bits 8-63: Up to 7 ASCII bytes (char[0] in bits 8-15, etc.)
```c
#define JS_ASCII_MAX_LEN 7
/* Check if value is immediate ASCII string */
static inline JS_BOOL JS_IsImmediateASCII(JSValue v) {
return JS_VALUE_GET_SPECIAL_TAG(v) == JS_TAG_STRING_ASCII;
}
/* Get immediate ASCII string length (bits 5-7) */
static inline size_t JS_GetImmediateASCIILen(JSValue v) {
return (v >> 5) & 0x7;
}
/* Get immediate ASCII string character at index */
static inline char JS_GetImmediateASCIIChar(JSValue v, int idx) {
return (char)((v >> (8 + idx * 8)) & 0xFF);
}
/* Try to create immediate ASCII string, returns JS_NULL if doesn't fit */
static inline JSValue JS_TryNewImmediateASCII(const char *str, size_t len) {
if (len > JS_ASCII_MAX_LEN) return JS_NULL;
for (size_t i = 0; i < len; i++) {
if ((uint8_t)str[i] >= 0x80) return JS_NULL; /* non-ASCII */
}
/* Tag (5 bits) | Length (3 bits) | chars (56 bits) */
JSValue v = JS_TAG_STRING_ASCII | ((JSValue)len << 5);
for (size_t i = 0; i < len; i++) {
v |= ((JSValue)(uint8_t)str[i]) << (8 + i * 8);
}
return v;
}
/* Hash an immediate ASCII string (hash the entire JSValue) */
static inline uint64_t js_hash_immediate_ascii(JSValue v) {
fash64_state s;
fash64_begin(&s);
fash64_word(&s, v);
return fash64_end(&s);
}
```
### Phase 4: Remove JSStringRope
Delete `JSStringRope` structure and all rope-related functions:
- `js_new_string_rope()` (line 4815)
- `js_rebalancee_string_rope()` (line 4952)
- `string_rope_iter_*` functions
- `JS_TAG_STRING_ROPE` usage
String concatenation creates new `mist_text` objects immediately.
### Phase 5: UTF-32 Text Objects (mist_text)
The `mist_text` structure already exists. Complete integration:
```c
/* Text object: UTF-32 packed 2 chars per 64-bit word
* Pretext (mutable, stone=0): hdr.cap = char capacity, length field = current length
* Text (immutable, stone=1): hdr.cap = length, length field = hash
*/
typedef struct mist_text {
objhdr_t hdr; /* type=OBJ_TEXT, cap=char count, stone bit */
uint64_t length; /* pretext: char count | text: hash */
uint64_t packed[]; /* UTF-32 chars, 2 per word (high then low) */
} mist_text;
/* Create new text from UTF-8 C string */
JSValue JS_NewStringLen(JSContext *ctx, const char *str, size_t len) {
/* Try immediate text first */
JSValue imm = JS_TryNewImmediateText(str, len);
if (!JS_IsNull(imm)) return imm;
/* Convert UTF-8 to UTF-32 */
uint32_t *utf32 = js_malloc(ctx, len * sizeof(uint32_t));
size_t utf32_len = utf8_to_utf32(str, len, utf32);
/* Allocate mist_text */
size_t word_count = (utf32_len + 1) / 2;
mist_text *text = js_mallocz(ctx, sizeof(mist_text) + word_count * sizeof(uint64_t));
text->hdr = objhdr_make(utf32_len, OBJ_TEXT, false, false, false, false);
text->length = utf32_len;
/* Pack UTF-32 into words */
for (size_t i = 0; i < utf32_len; i += 2) {
uint64_t hi = utf32[i];
uint64_t lo = (i + 1 < utf32_len) ? utf32[i + 1] : 0;
text->packed[i / 2] = (hi << 32) | lo;
}
js_free(ctx, utf32);
/* Add to GC list and return as JSValue */
return JS_MKPTR(text);
}
```
### Phase 6: Remove JSObject, Use JSRecord Only
**Delete:**
- `JSObject` structure (line 1664)
- `JSShape` and `JSShapeProperty` structures
- Shape hash table in JSRuntime
- All shape-related functions
- `find_own_property()` and shape-based property access
**Keep only JSRecord with direct key/value storage:**
```c
/* Record: open-addressing hash table with JSValue keys
* Slot 0 reserved: key[0] = class_id<<32 | rec_key_id, value[0] = opaque
*/
/* Slot: key/value pair stored together */
typedef struct JSSlot {
JSValue key;
JSValue val;
} JSSlot;
typedef struct JSRecord {
JSGCObjectHeader header;
objhdr_t mist_hdr; /* type=OBJ_RECORD, cap=slot_mask */
struct JSRecord *proto; /* prototype chain */
uint32_t len; /* number of live entries */
uint32_t tombs; /* tombstone count */
JSSlot *slots; /* key/value pairs, size = mask+1 */
} JSRecord;
/* Three key types for property lookup:
* 1. Immediate ASCII (JS_TAG_STRING_ASCII): hash from JSValue itself
* 2. Text object (mist_text pointer): hash from object's stored hash
* 3. Record object used as key: hash from monotonic ID in record's key[0]
*
* Per memory.md: when a record is used as a key, it gets assigned a
* monotonically increasing 32-bit ID stored in lower 32 bits of keys[0].
*/
/* Get hash for any key JSValue */
static uint64_t js_key_hash(JSValue key) {
if (JS_IsImmediateASCII(key)) {
/* Hash the entire JSValue for immediate ASCII */
return fash64_hash_one(key);
}
if (!JS_IsPtr(key))
return 0; /* Invalid key */
void *ptr = JS_VALUE_GET_PTR(key);
objhdr_t hdr = *(objhdr_t *)ptr; /* Read object header */
uint8_t type = objhdr_type(hdr);
if (type == OBJ_TEXT) {
/* Text object: hash stored in length field (if stoned) or computed */
mist_text *text = (mist_text *)ptr;
return get_text_hash(text);
}
if (type == OBJ_RECORD) {
/* Record used as key: hash from monotonic ID in slots[0].key */
JSRecord *rec = (JSRecord *)ptr;
uint32_t rec_id = (uint32_t)rec->slots[0].key; /* lower 32 bits */
return fash64_hash_one(rec_id);
}
return 0; /* Unknown type */
}
/* Ensure record has a key ID assigned (for use as property key) */
static void rec_ensure_key_id(JSRuntime *rt, JSRecord *rec) {
uint32_t id = (uint32_t)rec->slots[0].key;
if (id == 0) {
/* Assign new monotonically increasing ID */
id = ++rt->rec_key_next;
if (id == 0) id = ++rt->rec_key_next; /* Skip 0 */
rec->slots[0].key = (rec->slots[0].key & 0xFFFFFFFF00000000ULL) | id;
}
}
/* Compare two keys for equality */
static JS_BOOL js_key_equal(JSValue a, JSValue b) {
/* Fast path: identical values */
if (a == b) return TRUE;
/* Immediate ASCII: must be identical (handled above) */
if (JS_IsImmediateASCII(a) || JS_IsImmediateASCII(b))
return FALSE;
/* Both must be pointers */
if (!JS_IsPtr(a) || !JS_IsPtr(b))
return FALSE;
void *pa = JS_VALUE_GET_PTR(a);
void *pb = JS_VALUE_GET_PTR(b);
uint8_t ta = objhdr_type(*(objhdr_t *)pa);
uint8_t tb = objhdr_type(*(objhdr_t *)pb);
/* Record keys: pointer equality (identity) */
if (ta == OBJ_RECORD || tb == OBJ_RECORD)
return FALSE; /* Already checked a == b above */
/* Text objects: string content comparison */
if (ta == OBJ_TEXT && tb == OBJ_TEXT)
return text_content_equal((mist_text *)pa, (mist_text *)pb);
return FALSE;
}
/* Property lookup using open-addressing hash table */
static int rec_find_slot(JSRecord *rec, JSValue key) {
uint32_t mask = (uint32_t)objhdr_cap56(rec->mist_hdr);
uint64_t hash = js_key_hash(key);
uint32_t idx = hash & mask;
if (idx == 0) idx = 1; /* slot 0 reserved */
for (uint32_t i = 0; i <= mask; i++) {
JSValue k = rec->slots[idx].key;
if (JS_IsNull(k)) return -1; /* empty, not found */
if (k == JS_EXCEPTION) { /* tombstone, continue */
idx = (idx + 1) & mask;
if (idx == 0) idx = 1;
continue;
}
if (js_key_equal(k, key)) return idx;
idx = (idx + 1) & mask;
if (idx == 0) idx = 1;
}
return -1;
}
```
### Phase 7: Update Property Access Functions
Replace all JSObject property functions with JSRecord equivalents:
```c
JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst this_obj,
JSValue prop, JS_BOOL throw_ref_error) {
if (!JS_IsPtr(this_obj)) {
if (throw_ref_error)
return JS_ThrowTypeError(ctx, "not an object");
return JS_NULL;
}
JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(this_obj);
while (rec) {
int idx = rec_find_slot(rec, prop);
if (idx >= 0) {
return rec->slots[idx].val; /* No dup needed if no ref counting */
}
rec = rec->proto;
}
if (throw_ref_error)
return JS_ThrowReferenceError(ctx, "property not found");
return JS_NULL;
}
int JS_SetPropertyInternal(JSContext *ctx, JSValueConst this_obj,
JSValue prop, JSValue val) {
if (!JS_IsPtr(this_obj))
return -1;
JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(this_obj);
int idx = rec_find_slot(rec, prop);
if (idx >= 0) {
rec->slots[idx].val = val;
return 0;
}
/* Add new property */
return rec_add_property(ctx, rec, prop, val);
}
```
### Phase 8: C Class Storage in Slot 0
Per memory.md, slot 0 is reserved for internal use:
```c
/* slots[0].key: lower 32 bits = rec_key_id (for identity-based keys)
* upper 32 bits = class_id (C class)
* slots[0].val: opaque C pointer
*/
void JS_SetOpaque(JSContext *ctx, JSValue obj, void *opaque) {
JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(obj);
rec->slots[0].val = (JSValue)(uintptr_t)opaque;
}
void *JS_GetOpaque(JSContext *ctx, JSValue obj, uint32_t class_id) {
JSRecord *rec = (JSRecord *)JS_VALUE_GET_PTR(obj);
uint32_t stored_class = (uint32_t)(rec->slots[0].key >> 32);
if (stored_class != class_id) return NULL;
return (void *)(uintptr_t)rec->slots[0].val;
}
void JS_SetClassID(JSRecord *rec, uint32_t class_id) {
rec->slots[0].key = (rec->slots[0].key & 0xFFFFFFFF) | ((uint64_t)class_id << 32);
}
```
### Phase 9: Update GC
The GC needs updates for the new object format:
```c
static void mark_children(JSRuntime *rt, JSGCObjectHeader *gp, ...) {
switch (gp->gc_obj_type) {
case JS_GC_OBJ_TYPE_RECORD:
{
JSRecord *rec = (JSRecord *)gp;
uint32_t mask = objhdr_cap56(rec->mist_hdr);
if (rec->proto)
mark_func(rt, &rec->proto->header);
for (uint32_t i = 1; i <= mask; i++) {
if (!JS_IsNull(rec->keys[i]) &&
rec->keys[i] != JS_EXCEPTION) { /* tombstone */
/* Mark key if it's a pointer */
if (JS_IsPtr(rec->keys[i]))
JS_MarkValue(rt, rec->keys[i], mark_func);
/* Mark value if it's a pointer */
if (JS_IsPtr(rec->values[i]))
JS_MarkValue(rt, rec->values[i], mark_func);
}
}
}
break;
// ... other cases
}
}
```
## Cleanup - Items to Remove
1. **quickjs.h:**
- Old NaN-boxing macros (JS_VALUE_GET_TAG, JS_MKVAL, etc.)
- JS_TAG_STRING, JS_TAG_STRING_ROPE, JS_TAG_OBJECT, JS_TAG_ARRAY, JS_TAG_FUNCTION
- JSValueConst (just use JSValue)
2. **quickjs.c:**
- JSStringRope structure and functions
- JSShape and JSShapeProperty structures
- Shape hash table and functions
- Atom-based property access (keep atoms for parser/compiler)
- JSObject structure (replace with JSRecord)
- `find_own_property()`, `add_shape_property()`, etc.
## Verification
1. **Build:** `make` completes without errors
2. **Basic test:** Create objects, set/get properties
3. **Number test:** Verify short float encoding/decoding, out-of-range → null
4. **String test:** Immediate text for short strings, mist_text for long
5. **GC test:** Create cycles, verify collection works
6. **C class test:** SetOpaque/GetOpaque work with slot 0 storage
## Notes
- This is a **major rework** affecting most of the codebase
- Atoms remain for parser/compiler but not for object property storage
- Reference counting may be simplified since fewer pointer types
- The short float range (+-3.4e38) covers most practical use cases
- Out-of-range numbers becoming NULL is intentional per memory.md

View File

@@ -260,16 +260,24 @@ struct JSRuntime {
/* Record-key IDs (for K_REC keys) */
uint32_t rec_key_next;
/* Stone arena for interned strings */
struct StoneArenaPage *st_pages;
struct mist_text **st_text_array; /* indexed by ID */
uint32_t *st_text_hash; /* hash table mapping to IDs */
uint32_t st_text_count; /* number of interned texts */
uint32_t st_text_size; /* hash table size */
uint32_t st_text_resize; /* threshold for resize */
#ifdef DUMP_PROFILE
JSProfileState profile;
#endif
};
struct JSClass {
uint32_t class_id; /* 0 means free entry */
const char *class_name;
JSClassFinalizer *finalizer;
JSClassGCMark *gc_mark;
const char *class_name; // this could be removed in debug builds
uint32_t class_id; /* 0 means free entry */
};
#define JS_MODE_BACKTRACE_BARRIER \
@@ -712,6 +720,54 @@ st_free_all (JSRuntime *rt) {
rt->st_pages = NULL;
}
/* Pack UTF-32 characters into 64-bit words (2 chars per word) */
static void
pack_utf32_to_words (const uint32_t *utf32, uint32_t len, uint64_t *packed) {
for (uint32_t i = 0; i < len; i += 2) {
uint64_t hi = utf32[i];
uint64_t lo = (i + 1 < len) ? utf32[i + 1] : 0;
packed[i / 2] = (hi << 32) | lo;
}
}
/* Resize the stone text intern table */
static int
st_text_resize (JSRuntime *rt) {
uint32_t new_size = rt->st_text_size ? rt->st_text_size * 2 : 256;
uint32_t new_mask = new_size - 1;
uint32_t *new_hash = js_mallocz_rt (rt, new_size * sizeof (uint32_t));
if (!new_hash) return -1;
mist_text **new_array = js_realloc_rt (rt, rt->st_text_array,
(new_size + 1) * sizeof (mist_text *));
if (!new_array) {
js_free_rt (rt, new_hash);
return -1;
}
/* Rehash existing entries */
for (uint32_t i = 0; i < rt->st_text_size; i++) {
uint32_t id = rt->st_text_hash ? rt->st_text_hash[i] : 0;
if (id != 0) {
mist_text *text = rt->st_text_array[id];
uint64_t hash = text->length; /* hash stored in length for stoned text */
uint32_t slot = hash & new_mask;
while (new_hash[slot] != 0)
slot = (slot + 1) & new_mask;
new_hash[slot] = id;
}
}
js_free_rt (rt, rt->st_text_hash);
rt->st_text_hash = new_hash;
rt->st_text_array = new_array;
rt->st_text_size = new_size;
rt->st_text_resize = new_size * 3 / 4; /* 75% load factor */
return 0;
}
/* Compare two packed UTF-32 texts for equality */
static int
text_equal (mist_text *a, const uint64_t *packed_b, uint32_t len_b) {
@@ -1102,8 +1158,8 @@ js_key_hash (JSValue key) {
if (type == OBJ_TEXT) return get_text_hash ((mist_text *)ptr);
if (type == OBJ_RECORD) {
JSRecord *rec = (JSRecord *)ptr;
if (rec->rec_key_id == 0) return 0;
return fash64_hash_one (rec->rec_key_id);
if (rec->rec_id == 0) return 0;
return fash64_hash_one (rec->rec_id);
}
return 0;
@@ -1301,7 +1357,7 @@ js_new_record (JSContext *ctx, uint32_t initial_mask) {
rec->proto = NULL;
rec->len = 0;
rec->tombs = 0;
rec->rec_key_id = alloc_rec_key_id (rt);
rec->rec_id = ++rt->rec_key_next;
rec->tab = js_mallocz (ctx, sizeof (JSRecordEntry) * (initial_mask + 1));
if (!rec->tab) {

View File

@@ -94,6 +94,9 @@ typedef uint32_t JSValue;
#define JSValue JSValue
/* JSValueConst is just JSValue (const is not needed in value semantics) */
typedef JSValue JSValueConst;
/* LSB-based tags */
enum {
/* Primary tags (low bits) */
@@ -688,7 +691,7 @@ JSValue JS_NewArray (JSContext *ctx);
JSValue JS_NewArrayLen (JSContext *ctx, uint32_t len);
int JS_ArrayPush (JSContext *ctx, JSValue obj, JSValue val);
JSValue JS_ArrayPop (JSContext *ctx, JSValue obj);
JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop) {
JSValue JS_GetProperty (JSContext *ctx, JSValue this_obj, JSValue prop);
// For records
JSValue JS_GetPropertyStr (JSContext *ctx, JSValue this_obj, const char *prop);
@@ -705,6 +708,7 @@ JSValue JS_GetPropertyUint32 (JSContext *ctx, JSValue this_obj, uint32_t idx);
int JS_SetPropertyUint32 (JSContext *ctx, JSValue this_obj, uint32_t idx, JSValue val);
int JS_SetPropertyInt64 (JSContext *ctx, JSValue this_obj, int64_t idx, JSValue val);
/* Get property keys as array of JSValue strings */
int JS_GetOwnPropertyNames (JSContext *ctx, JSValue **tab, uint32_t *plen, JSValue obj);
JSValue JS_Call (JSContext *ctx, JSValue func_obj, JSValue this_obj, int argc, JSValue *argv);

94
status.md Normal file
View File

@@ -0,0 +1,94 @@
QuickJS Mist Memory Format Refactoring
Current Status
The codebase is partially refactored but doesn't compile due to missing KeyId type definitions.
Incremental Refactoring Tasks
Phase 0: Fix Compilation (Prerequisite)
Define missing KeyId type as transitional typedef (will be replaced by JSValue later)
Define K_EMPTY, K_TOMB,
key_text()
,
key_is_text()
,
key_payload()
macros/functions
Verify build compiles and tests pass
Phase 1: New JSValue Encoding in quickjs.h
Add new LSB-based tag constants alongside existing tags
Add JS_TAG_SHORT_FLOAT for 61-bit truncated double
Add JS_TAG_STRING_ASCII for immediate 7-byte ASCII strings
Add new value extraction/creation macros
Add type check inline functions
Keep existing NaN-boxing code active (compile-time switch)
Phase 2: Short Float Implementation
Implement JS_NewFloat64_ShortFloat() with range checking
Implement JS_VALUE_GET_FLOAT64_ShortFloat() for decoding
Out-of-range values return JS_NULL
Prefer integer encoding when exact
Phase 3: Immediate ASCII String
Phase 3: Immediate ASCII String
Implement JS_TryNewImmediateASCII() for strings up to 7 chars
Implement JS_IsImmediateASCII() type check
Implement JS_GetImmediateASCIILen() and JS_GetImmediateASCIIChar()
Integrate with
JS_NewStringLen()
to try immediate first
Phase 4: Remove JSStringRope
Delete JSStringRope structure
Remove JS_TAG_STRING_ROPE handling
Update string concatenation to create immediate mist_text objects
Remove rope-related iterator functions
Phase 5: Refactor JSString to UTF-32 (mist_text)
Modify struct JSString to store UTF-32 characters only
Remove is_wide_char flag and 8.16 unions
Update
js_alloc_string
to allocate UTF-32 buffer
Update string creation functions (
js_new_string8
, etc.)
Update all string accessors to use UTF-32
Implement immediate-to-UTF32 conversion helper
Update string operations (
concat
,
compare
) to work on UTF-32
Phase 6: Replace KeyId with JSValue in Records
Change JSRecordEntry.key from KeyId to JSValue
Update
rec_hash_key()
to hash JSValue keys directly
Update
rec_find_slot()
for JSValue key comparison
Update
rec_get_own()
,
rec_get()
,
rec_set_own()
for JSValue keys
Remove KeyId typedef and related functions
Phase 7: Consolidate JSObject → JSRecord
Remove JSShape and JSShapeProperty structures
Remove shape hash table from JSRuntime
Update all property access to use JSRecord
Migrate JSObject users to JSRecord
Remove JSObject structure
Phase 8: Update GC for New Format
Update mark_children for JSRecord with JSValue keys
Update free_record for JSValue keys
Handle immediate values correctly (no marking needed)
Test for cycles and correct collection
Phase 9: C Class Storage in Slot 0
Implement slot 0 reservation for class_id and opaque pointer
Update JS_SetOpaque() / JS_GetOpaque()
Migrate existing class storage
Verification Checklist
Build compiles without errors
Existing tests pass
Property access works correctly
GC correctly handles cycles
Short float encoding/decoding verified
Immediate ASCII strings work