From c1d341eecdaa7520991378aa4a13271e8cca688f Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Sun, 8 Jun 2025 08:35:12 -0500 Subject: [PATCH] faster wota encoding --- .cell/cell.toml | 4 +- benchmarks/wota_nota_json.ce | 10 +- prosperon/examples/chess/chess.ce | 1 - prosperon/prosperon.ce | 11 +- prosperon/sdl_video.ce | 2 +- scripts/engine.cm | 11 +- scripts/jswota.cm | 170 ++++++++++++++---------------- source/jsffi.c | 3 + source/qjs_wota.c | 106 ++++++++++++------- source/wota.h | 137 ++++++------------------ tests/jswota.ce | 6 +- 11 files changed, 193 insertions(+), 268 deletions(-) diff --git a/.cell/cell.toml b/.cell/cell.toml index 1fbc5e4a..fdf5e83b 100644 --- a/.cell/cell.toml +++ b/.cell/cell.toml @@ -6,7 +6,7 @@ ar_timer = 60 actor_memory = 0 net_service = 0.1 reply_timeout = 60 -actor_max = 10_000 +actor_max = "10_000" stack_max = 0 [actors] [actors.prosperon/sdl_video] @@ -14,4 +14,4 @@ main = true [actors.prosperon/prosperon] main = true [actors.prosperon] -main = false \ No newline at end of file +main = true diff --git a/benchmarks/wota_nota_json.ce b/benchmarks/wota_nota_json.ce index 9376826b..2e737744 100644 --- a/benchmarks/wota_nota_json.ce +++ b/benchmarks/wota_nota_json.ce @@ -8,6 +8,7 @@ var wota = use('wota'); var nota = use('nota'); var json = use('json'); + var jswota = use('jswota') var os = use('os'); // @@ -22,7 +23,7 @@ const libraries = [ decode: wota.decode, // Wota produces an ArrayBuffer. We'll count `buffer.byteLength` as size. getSize(encoded) { - return encoded.byteLength; + return encoded.length; } }, { @@ -31,7 +32,7 @@ const libraries = [ decode: nota.decode, // Nota also produces an ArrayBuffer: getSize(encoded) { - return encoded.byteLength; + return encoded.length; } }, { @@ -91,11 +92,6 @@ const benchmarks = [ data: [ Array.from({length:1000}, (_, i) => i) ], iterations: 1000 }, - { - name: "Large Binary Blob (256KB)", - data: [ new Uint8Array(256 * 1024).buffer ], - iterations: 200 - } ]; //////////////////////////////////////////////////////////////////////////////// diff --git a/prosperon/examples/chess/chess.ce b/prosperon/examples/chess/chess.ce index 16119abc..6731d772 100644 --- a/prosperon/examples/chess/chess.ce +++ b/prosperon/examples/chess/chess.ce @@ -274,7 +274,6 @@ function update(dt) function draw() { - return {} draw2d.clear() drawBoard() drawPieces() diff --git a/prosperon/prosperon.ce b/prosperon/prosperon.ce index 30446c7b..f0dfa57f 100644 --- a/prosperon/prosperon.ce +++ b/prosperon/prosperon.ce @@ -13,7 +13,6 @@ $_.start(e => { video = e.actor graphics = use('graphics', video) send(video, {kind:"window", op:"makeRenderer"}, e => { - log.console("MADE A WINDOW! so now renderer") $_.start(e => { if (gameactor) return gameactor = e.actor @@ -246,13 +245,7 @@ function loop(time) } }) - send(gameactor, {kind:'update', dt:1/60}); - $_.delay(loop, 1/60); - return; - - // Update the game - send(gameactor, {kind:'che', dt:1/60}, e => { - // Get draw commands from game + send(gameactor, {kind:'update', dt:1/60}, e => { send(gameactor, {kind:'draw'}, draw_commands => { var batch_commands = [] @@ -300,5 +293,3 @@ $_.receiver(e => { e.d_pos.y *= -1 } }) - -log.console("main prosperon end") \ No newline at end of file diff --git a/prosperon/sdl_video.ce b/prosperon/sdl_video.ce index 4f1aa319..9d05e7f0 100644 --- a/prosperon/sdl_video.ce +++ b/prosperon/sdl_video.ce @@ -430,7 +430,7 @@ function handle_renderer(msg) { results.push(result); } - return {results: results}; + return {success:true}; default: return {error: "Unknown renderer operation: " + msg.op}; diff --git a/scripts/engine.cm b/scripts/engine.cm index 21900c4e..e4933104 100644 --- a/scripts/engine.cm +++ b/scripts/engine.cm @@ -555,8 +555,6 @@ function actor_send_immediate(actor, send) { } } -var jswota = use('jswota') - function actor_send(actor, message) { if (actor[HEADER] && !actor[HEADER].replycc) // attempting to respond to a message but sender is not expecting; silently drop return @@ -573,14 +571,7 @@ function actor_send(actor, message) { // message to actor in same flock if (actor[ACTORDATA].id && actor_mod.mailbox_exist(actor[ACTORDATA].id)) { - var st = time.number() - var m1 = jswota.encode(message) - var m1t = time.number()-st - st = time.number() - var m2 = wota.encode(message) - var m2t = time.number()-st - log.console(`jswota: ${m1.length} bits in ${m1t}. wota: ${m2.length} bits in ${m2t}.`) - actor_mod.mailbox_push(actor[ACTORDATA].id, m2) + actor_mod.mailbox_push(actor[ACTORDATA].id, wota.encode(message)) return } diff --git a/scripts/jswota.cm b/scripts/jswota.cm index 4cd04943..ef082d35 100644 --- a/scripts/jswota.cm +++ b/scripts/jswota.cm @@ -4,9 +4,10 @@ var utf8 = use('utf8') var INT = new blob(8, false) stone(INT) -var FP = new blob(8) -FP.write_fit(1,8) -stone(FP) +var FP_HEADER = new blob(64) +FP_HEADER.write_fit(0,56) +FP_HEADER.write_fit(1,8) +stone(FP_HEADER) var ARRAY = new blob(8) ARRAY.write_fit(2,8) @@ -24,103 +25,87 @@ var TEXT = new blob(8) TEXT.write_fit(5,8) stone(TEXT) -var SYMBOL = new blob(8) -SYMBOL.write_fit(7,8) -stone(SYMBOL) +var NULL_SYMBOL = new blob(64) +NULL_SYMBOL.write_fit(0,56) +NULL_SYMBOL.write_fit(7,8) +stone(NULL_SYMBOL) -var NULL = new blob(56) -NULL.write_fit(0,56) -stone(NULL) +var FALSE_SYMBOL = new blob(64) +FALSE_SYMBOL.write_fit(2,56) +FALSE_SYMBOL.write_fit(7,8) +stone(FALSE_SYMBOL) -var FALSE = new blob(56) -FALSE.write_fit(2,56) -stone(FALSE) +var TRUE_SYMBOL = new blob(64) +TRUE_SYMBOL.write_fit(3,56) +TRUE_SYMBOL.write_fit(7,8) +stone(TRUE_SYMBOL) -var TRUE = new blob(56) -TRUE.write_fit(3, 56) -stone(TRUE) +var PRIVATE_SYMBOL = new blob(64) +PRIVATE_SYMBOL.write_fit(8,56) +PRIVATE_SYMBOL.write_fit(7,8) +stone(PRIVATE_SYMBOL) -var PRIVATE = new blob(56) -PRIVATE.write_fit(8, 56) -stone(PRIVATE) +var SYSTEM_SYMBOL = new blob(64) +SYSTEM_SYMBOL.write_fit(9,56) +SYSTEM_SYMBOL.write_fit(7,8) +stone(SYSTEM_SYMBOL) -var SYSTEM = new blob(56) -SYSTEM.write_fit(9, 56) -stone(SYSTEM) +var key_cache = {} -var encoders = {} - -encoders.number = function(b, val) +function encode_key(key) { - // encoding all as floats - b.write_fit(0,56) - b.write_blob(FP) - b.write_number(val) -} - -function encode_array(b, val) -{ - b.write_fit(val.length, 56) - b.write_blob(ARRAY) - for (var v of val) - encode_val(b, v) -} - -function encode_object(b, val) -{ - var keys = Object.keys(val) - b.write_fit(b, keys.length) - b.write_blob(RECORD) - for (var key of keys) { - if (typeof val[key] === 'function') continue - encoders.string(b, key) - encode_val(b, val[key]) - } -} - -function encode_blob(b, val) -{ - b.write_fit(val.length, 56) - b.write_blob(BLOB) - b.write_blob(val) -} - -encoders.object = function(b, val) -{ - if (Array.isArray(val)) - encode_array(b,val) - else if (val instanceof blob) - encode_blob(b,val) - else - encode_object(b,val) -} - -encoders.string = function(b, val) -{ - // encoding as utf8 - b.write_fit(utf8.byte_length(val), 56) - b.write_blob(TEXT) - b.write_blob(utf8.encode(val)) -} - -encoders.boolean = function(b, val) -{ - if (val) - b.write_blob(TRUE) - else - b.write_blob(FALSE) - b.write_blob(SYMBOL) -} - -encoders.undefined = function(b, val) -{ - b.write_blob(NULL) - b.write_blob(SYMBOL) + if (key_cache[key]) + return key_cache[key] + + var encoded_key = utf8.encode(key) + var cached_blob = new blob(64 + encoded_key.length) + cached_blob.write_fit(utf8.byte_length(key), 56) + cached_blob.write_blob(TEXT) + cached_blob.write_blob(encoded_key) + stone(cached_blob) + + key_cache[key] = cached_blob + return cached_blob } function encode_val(b, val) { - encoders[typeof val](b, val) + var type = typeof val + if (type === 'number') { + b.write_blob(FP_HEADER) + b.write_number(val) + } else if (type === 'string') { + b.write_fit(utf8.byte_length(val), 56) + b.write_blob(TEXT) + b.write_blob(utf8.encode(val)) + } else if (type === 'boolean') { + if (val) + b.write_blob(TRUE_SYMBOL) + else + b.write_blob(FALSE_SYMBOL) + } else if (type === 'undefined') { + b.write_blob(NULL_SYMBOL) + } else if (type === 'object') { + if (Array.isArray(val)) { + b.write_fit(val.length, 56) + b.write_blob(ARRAY) + for (var v of val) + encode_val(b, v) + } else if (val instanceof blob) { + b.write_fit(val.length, 56) + b.write_blob(BLOB) + b.write_blob(val) + } else { + var keys = Object.keys(val) + b.write_fit(keys.length, 56) + b.write_blob(RECORD) + for (var key of keys) { + if (typeof val[key] === 'function') continue + b.write_blob(encode_key(key)) + encode_val(b, val[key]) + } + } + } } function encode(val) @@ -131,4 +116,9 @@ function encode(val) return stone(b) } -return { INT,FP,ARRAY,RECORD,BLOB,TEXT,SYMBOL, encode } \ No newline at end of file +function decode(b) +{ + return undefined +} + +return { INT, FP_HEADER, ARRAY, RECORD, BLOB, TEXT, NULL_SYMBOL, FALSE_SYMBOL, TRUE_SYMBOL, PRIVATE_SYMBOL, SYSTEM_SYMBOL, encode, decode } \ No newline at end of file diff --git a/source/jsffi.c b/source/jsffi.c index 334e788e..9fcc05f0 100644 --- a/source/jsffi.c +++ b/source/jsffi.c @@ -1531,6 +1531,7 @@ JSC_CCALL(os_value_id, #include "qjs_http.h" #include "qjs_wota.h" #include "qjs_socket.h" +#include "qjs_nota.h" //JSValue js_imgui_use(JSContext *js); #define MISTLINE(NAME) (ModuleEntry){#NAME, js_##NAME##_use} @@ -1560,6 +1561,8 @@ void ffi_load(JSContext *js) arrput(rt->module_registry, MISTLINE(kim)); arrput(rt->module_registry, MISTLINE(utf8)); arrput(rt->module_registry, MISTLINE(fit)); + arrput(rt->module_registry, MISTLINE(wota)); + arrput(rt->module_registry, MISTLINE(nota)); // power user arrput(rt->module_registry, MISTLINE(js)); diff --git a/source/qjs_wota.c b/source/qjs_wota.c index eaccaf2e..c06db959 100644 --- a/source/qjs_wota.c +++ b/source/qjs_wota.c @@ -4,9 +4,14 @@ #include "wota.h" #include +typedef struct ObjectRef { + void *ptr; + struct ObjectRef *next; +} ObjectRef; + typedef struct WotaEncodeContext { JSContext *ctx; - JSValue visited_stack; + ObjectRef *visited_stack; WotaBuffer wb; int cycle; JSValue replacer; @@ -14,34 +19,46 @@ typedef struct WotaEncodeContext { static void wota_stack_push(WotaEncodeContext *enc, JSValueConst val) { - JSContext *ctx = enc->ctx; - int len = JS_ArrayLength(ctx, enc->visited_stack); - JS_SetPropertyInt64(ctx, enc->visited_stack, len, JS_DupValue(ctx, val)); + if (!JS_IsObject(val)) return; + + ObjectRef *ref = malloc(sizeof(ObjectRef)); + if (!ref) return; + + ref->ptr = JS_VALUE_GET_PTR(val); + ref->next = enc->visited_stack; + enc->visited_stack = ref; } static void wota_stack_pop(WotaEncodeContext *enc) { - JSContext *ctx = enc->ctx; - int len = JS_ArrayLength(ctx, enc->visited_stack); - JS_SetPropertyStr(ctx, enc->visited_stack, "length", JS_NewUint32(ctx, len - 1)); + if (!enc->visited_stack) return; + + ObjectRef *top = enc->visited_stack; + enc->visited_stack = top->next; + free(top); } static int wota_stack_has(WotaEncodeContext *enc, JSValueConst val) { - JSContext *ctx = enc->ctx; - int len = JS_ArrayLength(ctx, enc->visited_stack); - for (int i = 0; i < len; i++) { - JSValue elem = JS_GetPropertyUint32(ctx, enc->visited_stack, i); - if (JS_IsObject(elem) && JS_IsObject(val)) - if (JS_StrictEq(ctx, elem, val)) { - JS_FreeValue(ctx, elem); - return 1; - } - JS_FreeValue(ctx, elem); + if (!JS_IsObject(val)) return 0; + + void *ptr = JS_VALUE_GET_PTR(val); + ObjectRef *current = enc->visited_stack; + + while (current) { + if (current->ptr == ptr) return 1; + current = current->next; } return 0; } +static void wota_stack_free(WotaEncodeContext *enc) +{ + while (enc->visited_stack) { + wota_stack_pop(enc); + } +} + static JSValue apply_replacer(WotaEncodeContext *enc, JSValueConst holder, JSValueConst key, JSValueConst val) { if (JS_IsUndefined(enc->replacer)) return JS_DupValue(enc->ctx, val); @@ -65,25 +82,33 @@ static void encode_object_properties(WotaEncodeContext *enc, JSValueConst val, J return; } uint32_t non_function_count = 0; - for (uint32_t i = 0; i < plen; i++) { - JSValue prop_val = JS_GetProperty(ctx, val, ptab[i].atom); - if (!JS_IsFunction(ctx, prop_val)) non_function_count++; - JS_FreeValue(ctx, prop_val); - } - wota_write_record(&enc->wb, non_function_count); + JSValue props[plen]; + JSAtom atoms[plen]; + for (uint32_t i = 0; i < plen; i++) { JSValue prop_val = JS_GetProperty(ctx, val, ptab[i].atom); if (!JS_IsFunction(ctx, prop_val)) { - const char *prop_name = JS_AtomToCString(ctx, ptab[i].atom); - JSValue prop_key = JS_AtomToValue(ctx, ptab[i].atom); - wota_write_text(&enc->wb, prop_name); - wota_encode_value(enc, prop_val, val, prop_key); - JS_FreeCString(ctx, prop_name); - JS_FreeValue(ctx, prop_key); - } - JS_FreeValue(ctx, prop_val); - JS_FreeAtom(ctx, ptab[i].atom); + atoms[non_function_count] = ptab[i].atom; + props[non_function_count++] = prop_val; + } else + JS_FreeValue(ctx, prop_val); } + wota_write_record(&enc->wb, non_function_count); + for (uint32_t i = 0; i < non_function_count; i++) { + size_t plen; + const char *prop_name = JS_AtomToCStringLen(ctx, &plen, atoms[i]); + JSValue prop_key = JS_AtomToValue(ctx, atoms[i]); + JSValue prop_val = props[i]; + wota_write_text_len(&enc->wb, prop_name, plen); + wota_encode_value(enc, prop_val, val, prop_key); + JS_FreeCString(ctx, prop_name); + JS_FreeValue(ctx, prop_key); + JS_FreeValue(ctx, prop_val); + } + + for (int i = 0; i < plen; i++) + JS_FreeAtom(ctx, ptab[i].atom); + js_free(ctx, ptab); } @@ -115,8 +140,9 @@ static void wota_encode_value(WotaEncodeContext *enc, JSValueConst val, JSValueC break; } case JS_TAG_STRING: { - const char *str = JS_ToCString(ctx, replaced); - wota_write_text(&enc->wb, str ? str : ""); + size_t plen; + const char *str = JS_ToCStringLen(ctx, &plen, replaced); + wota_write_text_len(&enc->wb, str ? str : "", str ? plen : 0); JS_FreeCString(ctx, str); break; } @@ -140,9 +166,10 @@ static void wota_encode_value(WotaEncodeContext *enc, JSValueConst val, JSValueC break; } wota_stack_push(enc, replaced); - int arr_len = JS_ArrayLength(ctx, replaced); + int64_t arr_len; + JS_GetLength(ctx, replaced, &arr_len); wota_write_array(&enc->wb, arr_len); - for (int i = 0; i < arr_len; i++) { + for (int64_t i = 0; i < arr_len; i++) { JSValue elem_val = JS_GetPropertyUint32(ctx, replaced, i); JSValue elem_key = JS_NewInt32(ctx, i); wota_encode_value(enc, elem_val, replaced, elem_key); @@ -242,6 +269,7 @@ static char *decode_wota_value(JSContext *ctx, char *data_ptr, JSValue *out_val, long long c; data_ptr = wota_read_array(&c, data_ptr); JSValue arr = JS_NewArray(ctx); + JS_SetLength(ctx, arr, c); for (long long i = 0; i < c; i++) { JSValue elem_val = JS_UNDEFINED; data_ptr = decode_wota_value(ctx, data_ptr, &elem_val, arr, JS_NewInt32(ctx, i), reviver); @@ -292,17 +320,17 @@ void *value2wota(JSContext *ctx, JSValue v, JSValue replacer, size_t *bytes) WotaEncodeContext enc_s, *enc = &enc_s; enc->ctx = ctx; - enc->visited_stack = JS_NewArray(ctx); + enc->visited_stack = NULL; enc->cycle = 0; enc->replacer = replacer; wota_buffer_init(&enc->wb, 16); wota_encode_value(enc, v, JS_UNDEFINED, JS_UNDEFINED); if (enc->cycle) { - JS_FreeValue(ctx, enc->visited_stack); + wota_stack_free(enc); wota_buffer_free(&enc->wb); return NULL; } - JS_FreeValue(ctx, enc->visited_stack); + wota_stack_free(enc); size_t total_bytes = enc->wb.size * sizeof(uint64_t); void *wota = realloc(enc->wb.data, total_bytes); if (bytes) *bytes = total_bytes; diff --git a/source/wota.h b/source/wota.h index 5fa56e1f..f2197327 100644 --- a/source/wota.h +++ b/source/wota.h @@ -73,6 +73,7 @@ void wota_buffer_free(WotaBuffer *wb); /* Writing function prototypes */ void wota_write_blob (WotaBuffer *wb, unsigned long long nbits, const char *data); void wota_write_text (WotaBuffer *wb, const char *utf8); +void wota_write_text_len(WotaBuffer *wb, const char *utf8, size_t len); void wota_write_array (WotaBuffer *wb, unsigned long long count); void wota_write_record (WotaBuffer *wb, unsigned long long count); /* We'll store numbers as either 56-bit integers or raw double */ @@ -396,10 +397,9 @@ char *wota_read_blob(long long *byte_len, char **blob, char *wota) /* TEXT: - preamble => top 56 bits = #characters, LSB=0x05 - then floor((nchars+1)/2) 64-bit words - each word has 2 UTF-32 codepoints: top 32 bits = codepoint1, - low 32 bits = codepoint2 + preamble => top 56 bits = #bytes in UTF-8, LSB=0x05 + then floor((nbytes + 7)/8) 64-bit words + containing the UTF-8 bytes, packed 8 bytes per word */ char *wota_read_text(char **text_utf8, char *wota) { @@ -412,68 +412,26 @@ char *wota_read_text(char **text_utf8, char *wota) return wota_skip1(wota); } - uint64_t nchars = (first >> 8); - long long nwords = (long long)((nchars + 1ULL) >> 1); + uint64_t nbytes = (first >> 8); + long long nwords = (long long)((nbytes + 7ULL) >> 3); uint64_t *data_words = p + 1; - /* - We'll convert them to a UTF-8 string. Each codepoint can - become up to 4 bytes. So we need up to 4*nchars + 1. - */ - size_t max_utf8 = (size_t)(4 * nchars + 1); - char *out = (char *)malloc(max_utf8); + + char *out = (char *)malloc((size_t)(nbytes + 1)); if (!out) { fprintf(stderr, "malloc failed in wota_read_text\n"); abort(); } - size_t out_len = 0; + /* Copy bytes from the packed 64-bit words */ for (long long i = 0; i < nwords; i++) { uint64_t wval = data_words[i]; - uint32_t c1 = (uint32_t)(wval >> 32); - uint32_t c2 = (uint32_t)(wval & 0xffffffffULL); - - // If we haven't exceeded nchars, convert c1 -> UTF-8 - if ((i * 2) + 0 < (long long)nchars) { - uint32_t c = c1; - if (c < 0x80) { - out[out_len++] = (char)c; - } else if (c < 0x800) { - out[out_len++] = (char)(0xC0 | (c >> 6)); - out[out_len++] = (char)(0x80 | (c & 0x3F)); - } else if (c < 0x10000) { - out[out_len++] = (char)(0xE0 | (c >> 12)); - out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F)); - out[out_len++] = (char)(0x80 | (c & 0x3F)); - } else { - out[out_len++] = (char)(0xF0 | (c >> 18)); - out[out_len++] = (char)(0x80 | ((c >> 12) & 0x3F)); - out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F)); - out[out_len++] = (char)(0x80 | (c & 0x3F)); - } - } - // Similarly for c2: - if ((i * 2) + 1 < (long long)nchars) { - uint32_t c = c2; - if (c < 0x80) { - out[out_len++] = (char)c; - } else if (c < 0x800) { - out[out_len++] = (char)(0xC0 | (c >> 6)); - out[out_len++] = (char)(0x80 | (c & 0x3F)); - } else if (c < 0x10000) { - out[out_len++] = (char)(0xE0 | (c >> 12)); - out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F)); - out[out_len++] = (char)(0x80 | (c & 0x3F)); - } else { - out[out_len++] = (char)(0xF0 | (c >> 18)); - out[out_len++] = (char)(0x80 | ((c >> 12) & 0x3F)); - out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F)); - out[out_len++] = (char)(0x80 | (c & 0x3F)); - } + for (int j = 0; j < 8 && (i * 8 + j) < (long long)nbytes; j++) { + out[i * 8 + j] = (char)((wval >> (56 - j * 8)) & 0xff); } } - out[out_len] = '\0'; + out[nbytes] = '\0'; *text_utf8 = out; return (char *)(data_words + nwords); @@ -625,70 +583,37 @@ void wota_write_blob(WotaBuffer *wb, unsigned long long nbits, const char *data) } } -void wota_write_text(WotaBuffer *wb, const char *utf8) +void wota_write_text_len(WotaBuffer *wb, const char *utf8, size_t nbytes) { if (!utf8) utf8 = ""; - /* Convert the utf8 string to an array of UTF-32 codepoints. */ - size_t len = strlen(utf8); - const unsigned char *uc = (const unsigned char *)utf8; - /* In worst case, every single byte might form a codepoint, so we allocate enough: */ - uint32_t *codepoints = (uint32_t *)malloc(sizeof(uint32_t)*(len+1)); - if (!codepoints) { - fprintf(stderr, "malloc failed in wota_write_text\n"); - abort(); - } - size_t ccount = 0; - - while (*uc) { - uint32_t c; - if ((uc[0] & 0x80) == 0) { - c = uc[0]; - uc += 1; - } else if ((uc[0] & 0xe0) == 0xc0 && (uc[1] != 0)) { - c = ((uc[0] & 0x1f) << 6) | (uc[1] & 0x3f); - uc += 2; - } else if ((uc[0] & 0xf0) == 0xe0 && (uc[1] != 0) && (uc[2] != 0)) { - c = ((uc[0] & 0x0f) << 12) | ((uc[1] & 0x3f) << 6) | (uc[2] & 0x3f); - uc += 3; - } else if ((uc[0] & 0xf8) == 0xf0 && (uc[1] != 0) && (uc[2] != 0) && (uc[3] != 0)) { - c = ((uc[0] & 0x07) << 18) | ((uc[1] & 0x3f) << 12) - | ((uc[2] & 0x3f) << 6) | (uc[3] & 0x3f); - uc += 4; - } else { - /* invalid sequence => skip 1 byte */ - c = uc[0]; - uc++; - } - codepoints[ccount++] = c; - } - - /* preamble => top 56 bits = ccount, LSB=0x05 */ - uint64_t preamble = ((uint64_t)ccount << 8) | (uint64_t)WOTA_TEXT; + /* preamble => top 56 bits = nbytes, LSB=0x05 */ + uint64_t preamble = ((uint64_t)nbytes << 8) | (uint64_t)WOTA_TEXT; uint64_t *pw = wota_buffer_alloc(wb, 1); pw[0] = preamble; - /* store pairs of 32-bit codepoints in 64-bit words */ - size_t nwords = (ccount + 1) / 2; + /* pack UTF-8 bytes into 64-bit words, 8 bytes per word */ + size_t nwords = (nbytes + 7) / 8; if (nwords == 0) { - free(codepoints); return; } uint64_t *blocks = wota_buffer_alloc(wb, nwords); - size_t idx = 0; - for (size_t i = 0; i < nwords; i++) { - uint64_t hi = 0, lo = 0; - if (idx < ccount) { - hi = codepoints[idx++]; - } - if (idx < ccount) { - lo = codepoints[idx++]; - } - blocks[i] = ((hi & 0xffffffffULL) << 32) | (lo & 0xffffffffULL); - } + memset(blocks, 0, nwords * sizeof(uint64_t)); - free(codepoints); + for (size_t i = 0; i < nwords; i++) { + uint64_t wval = 0; + for (int j = 0; j < 8 && (i * 8 + j) < nbytes; j++) { + wval |= ((uint64_t)(unsigned char)utf8[i * 8 + j]) << (56 - j * 8); + } + blocks[i] = wval; + } +} + +void wota_write_text(WotaBuffer *wb, const char *utf8) +{ + if (!utf8) utf8 = ""; + wota_write_text_len(wb, utf8, strlen(utf8)); } void wota_write_array(WotaBuffer *wb, unsigned long long count) diff --git a/tests/jswota.ce b/tests/jswota.ce index af177ce3..8cb5413a 100644 --- a/tests/jswota.ce +++ b/tests/jswota.ce @@ -4,12 +4,14 @@ var jswota = use('jswota'); log.console("Testing jswota headers:"); log.console("INT header:", text(jswota.INT, 'b')); -log.console("FP header:", text(jswota.FP, 'b')); +log.console("FP_HEADER:", text(jswota.FP_HEADER, 'b')); log.console("ARRAY header:", text(jswota.ARRAY, 'b')); log.console("RECORD header:", text(jswota.RECORD, 'b')); log.console("BLOB header:", text(jswota.BLOB, 'b')); log.console("TEXT header:", text(jswota.TEXT, 'b')); -log.console("SYMBOL header:", text(jswota.SYMBOL, 'b')); +log.console("NULL_SYMBOL:", text(jswota.NULL_SYMBOL, 'b')); +log.console("FALSE_SYMBOL:", text(jswota.FALSE_SYMBOL, 'b')); +log.console("TRUE_SYMBOL:", text(jswota.TRUE_SYMBOL, 'b')); log.console("4.25:" ,text(jswota.encode(4.25),'b')); log.console("true:", text(jswota.encode(true),'b'))