faster wota encoding
This commit is contained in:
@@ -6,7 +6,7 @@ ar_timer = 60
|
||||
actor_memory = 0
|
||||
net_service = 0.1
|
||||
reply_timeout = 60
|
||||
actor_max = 10_000
|
||||
actor_max = "10_000"
|
||||
stack_max = 0
|
||||
[actors]
|
||||
[actors.prosperon/sdl_video]
|
||||
@@ -14,4 +14,4 @@ main = true
|
||||
[actors.prosperon/prosperon]
|
||||
main = true
|
||||
[actors.prosperon]
|
||||
main = false
|
||||
main = true
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
var wota = use('wota');
|
||||
var nota = use('nota');
|
||||
var json = use('json');
|
||||
var jswota = use('jswota')
|
||||
var os = use('os');
|
||||
//
|
||||
|
||||
@@ -22,7 +23,7 @@ const libraries = [
|
||||
decode: wota.decode,
|
||||
// Wota produces an ArrayBuffer. We'll count `buffer.byteLength` as size.
|
||||
getSize(encoded) {
|
||||
return encoded.byteLength;
|
||||
return encoded.length;
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -31,7 +32,7 @@ const libraries = [
|
||||
decode: nota.decode,
|
||||
// Nota also produces an ArrayBuffer:
|
||||
getSize(encoded) {
|
||||
return encoded.byteLength;
|
||||
return encoded.length;
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -91,11 +92,6 @@ const benchmarks = [
|
||||
data: [ Array.from({length:1000}, (_, i) => i) ],
|
||||
iterations: 1000
|
||||
},
|
||||
{
|
||||
name: "Large Binary Blob (256KB)",
|
||||
data: [ new Uint8Array(256 * 1024).buffer ],
|
||||
iterations: 200
|
||||
}
|
||||
];
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -274,7 +274,6 @@ function update(dt)
|
||||
|
||||
function draw()
|
||||
{
|
||||
return {}
|
||||
draw2d.clear()
|
||||
drawBoard()
|
||||
drawPieces()
|
||||
|
||||
@@ -13,7 +13,6 @@ $_.start(e => {
|
||||
video = e.actor
|
||||
graphics = use('graphics', video)
|
||||
send(video, {kind:"window", op:"makeRenderer"}, e => {
|
||||
log.console("MADE A WINDOW! so now renderer")
|
||||
$_.start(e => {
|
||||
if (gameactor) return
|
||||
gameactor = e.actor
|
||||
@@ -246,13 +245,7 @@ function loop(time)
|
||||
}
|
||||
})
|
||||
|
||||
send(gameactor, {kind:'update', dt:1/60});
|
||||
$_.delay(loop, 1/60);
|
||||
return;
|
||||
|
||||
// Update the game
|
||||
send(gameactor, {kind:'che', dt:1/60}, e => {
|
||||
// Get draw commands from game
|
||||
send(gameactor, {kind:'update', dt:1/60}, e => {
|
||||
send(gameactor, {kind:'draw'}, draw_commands => {
|
||||
var batch_commands = []
|
||||
|
||||
@@ -300,5 +293,3 @@ $_.receiver(e => {
|
||||
e.d_pos.y *= -1
|
||||
}
|
||||
})
|
||||
|
||||
log.console("main prosperon end")
|
||||
@@ -430,7 +430,7 @@ function handle_renderer(msg) {
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
return {results: results};
|
||||
return {success:true};
|
||||
|
||||
default:
|
||||
return {error: "Unknown renderer operation: " + msg.op};
|
||||
|
||||
@@ -555,8 +555,6 @@ function actor_send_immediate(actor, send) {
|
||||
}
|
||||
}
|
||||
|
||||
var jswota = use('jswota')
|
||||
|
||||
function actor_send(actor, message) {
|
||||
if (actor[HEADER] && !actor[HEADER].replycc) // attempting to respond to a message but sender is not expecting; silently drop
|
||||
return
|
||||
@@ -573,14 +571,7 @@ function actor_send(actor, message) {
|
||||
|
||||
// message to actor in same flock
|
||||
if (actor[ACTORDATA].id && actor_mod.mailbox_exist(actor[ACTORDATA].id)) {
|
||||
var st = time.number()
|
||||
var m1 = jswota.encode(message)
|
||||
var m1t = time.number()-st
|
||||
st = time.number()
|
||||
var m2 = wota.encode(message)
|
||||
var m2t = time.number()-st
|
||||
log.console(`jswota: ${m1.length} bits in ${m1t}. wota: ${m2.length} bits in ${m2t}.`)
|
||||
actor_mod.mailbox_push(actor[ACTORDATA].id, m2)
|
||||
actor_mod.mailbox_push(actor[ACTORDATA].id, wota.encode(message))
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -4,9 +4,10 @@ var utf8 = use('utf8')
|
||||
var INT = new blob(8, false)
|
||||
stone(INT)
|
||||
|
||||
var FP = new blob(8)
|
||||
FP.write_fit(1,8)
|
||||
stone(FP)
|
||||
var FP_HEADER = new blob(64)
|
||||
FP_HEADER.write_fit(0,56)
|
||||
FP_HEADER.write_fit(1,8)
|
||||
stone(FP_HEADER)
|
||||
|
||||
var ARRAY = new blob(8)
|
||||
ARRAY.write_fit(2,8)
|
||||
@@ -24,103 +25,87 @@ var TEXT = new blob(8)
|
||||
TEXT.write_fit(5,8)
|
||||
stone(TEXT)
|
||||
|
||||
var SYMBOL = new blob(8)
|
||||
SYMBOL.write_fit(7,8)
|
||||
stone(SYMBOL)
|
||||
var NULL_SYMBOL = new blob(64)
|
||||
NULL_SYMBOL.write_fit(0,56)
|
||||
NULL_SYMBOL.write_fit(7,8)
|
||||
stone(NULL_SYMBOL)
|
||||
|
||||
var NULL = new blob(56)
|
||||
NULL.write_fit(0,56)
|
||||
stone(NULL)
|
||||
var FALSE_SYMBOL = new blob(64)
|
||||
FALSE_SYMBOL.write_fit(2,56)
|
||||
FALSE_SYMBOL.write_fit(7,8)
|
||||
stone(FALSE_SYMBOL)
|
||||
|
||||
var FALSE = new blob(56)
|
||||
FALSE.write_fit(2,56)
|
||||
stone(FALSE)
|
||||
var TRUE_SYMBOL = new blob(64)
|
||||
TRUE_SYMBOL.write_fit(3,56)
|
||||
TRUE_SYMBOL.write_fit(7,8)
|
||||
stone(TRUE_SYMBOL)
|
||||
|
||||
var TRUE = new blob(56)
|
||||
TRUE.write_fit(3, 56)
|
||||
stone(TRUE)
|
||||
var PRIVATE_SYMBOL = new blob(64)
|
||||
PRIVATE_SYMBOL.write_fit(8,56)
|
||||
PRIVATE_SYMBOL.write_fit(7,8)
|
||||
stone(PRIVATE_SYMBOL)
|
||||
|
||||
var PRIVATE = new blob(56)
|
||||
PRIVATE.write_fit(8, 56)
|
||||
stone(PRIVATE)
|
||||
var SYSTEM_SYMBOL = new blob(64)
|
||||
SYSTEM_SYMBOL.write_fit(9,56)
|
||||
SYSTEM_SYMBOL.write_fit(7,8)
|
||||
stone(SYSTEM_SYMBOL)
|
||||
|
||||
var SYSTEM = new blob(56)
|
||||
SYSTEM.write_fit(9, 56)
|
||||
stone(SYSTEM)
|
||||
var key_cache = {}
|
||||
|
||||
var encoders = {}
|
||||
|
||||
encoders.number = function(b, val)
|
||||
function encode_key(key)
|
||||
{
|
||||
// encoding all as floats
|
||||
b.write_fit(0,56)
|
||||
b.write_blob(FP)
|
||||
b.write_number(val)
|
||||
}
|
||||
if (key_cache[key])
|
||||
return key_cache[key]
|
||||
|
||||
function encode_array(b, val)
|
||||
{
|
||||
b.write_fit(val.length, 56)
|
||||
b.write_blob(ARRAY)
|
||||
for (var v of val)
|
||||
encode_val(b, v)
|
||||
}
|
||||
var encoded_key = utf8.encode(key)
|
||||
var cached_blob = new blob(64 + encoded_key.length)
|
||||
cached_blob.write_fit(utf8.byte_length(key), 56)
|
||||
cached_blob.write_blob(TEXT)
|
||||
cached_blob.write_blob(encoded_key)
|
||||
stone(cached_blob)
|
||||
|
||||
function encode_object(b, val)
|
||||
{
|
||||
var keys = Object.keys(val)
|
||||
b.write_fit(b, keys.length)
|
||||
b.write_blob(RECORD)
|
||||
for (var key of keys) {
|
||||
if (typeof val[key] === 'function') continue
|
||||
encoders.string(b, key)
|
||||
encode_val(b, val[key])
|
||||
}
|
||||
}
|
||||
|
||||
function encode_blob(b, val)
|
||||
{
|
||||
b.write_fit(val.length, 56)
|
||||
b.write_blob(BLOB)
|
||||
b.write_blob(val)
|
||||
}
|
||||
|
||||
encoders.object = function(b, val)
|
||||
{
|
||||
if (Array.isArray(val))
|
||||
encode_array(b,val)
|
||||
else if (val instanceof blob)
|
||||
encode_blob(b,val)
|
||||
else
|
||||
encode_object(b,val)
|
||||
}
|
||||
|
||||
encoders.string = function(b, val)
|
||||
{
|
||||
// encoding as utf8
|
||||
b.write_fit(utf8.byte_length(val), 56)
|
||||
b.write_blob(TEXT)
|
||||
b.write_blob(utf8.encode(val))
|
||||
}
|
||||
|
||||
encoders.boolean = function(b, val)
|
||||
{
|
||||
if (val)
|
||||
b.write_blob(TRUE)
|
||||
else
|
||||
b.write_blob(FALSE)
|
||||
b.write_blob(SYMBOL)
|
||||
}
|
||||
|
||||
encoders.undefined = function(b, val)
|
||||
{
|
||||
b.write_blob(NULL)
|
||||
b.write_blob(SYMBOL)
|
||||
key_cache[key] = cached_blob
|
||||
return cached_blob
|
||||
}
|
||||
|
||||
function encode_val(b, val)
|
||||
{
|
||||
encoders[typeof val](b, val)
|
||||
var type = typeof val
|
||||
if (type === 'number') {
|
||||
b.write_blob(FP_HEADER)
|
||||
b.write_number(val)
|
||||
} else if (type === 'string') {
|
||||
b.write_fit(utf8.byte_length(val), 56)
|
||||
b.write_blob(TEXT)
|
||||
b.write_blob(utf8.encode(val))
|
||||
} else if (type === 'boolean') {
|
||||
if (val)
|
||||
b.write_blob(TRUE_SYMBOL)
|
||||
else
|
||||
b.write_blob(FALSE_SYMBOL)
|
||||
} else if (type === 'undefined') {
|
||||
b.write_blob(NULL_SYMBOL)
|
||||
} else if (type === 'object') {
|
||||
if (Array.isArray(val)) {
|
||||
b.write_fit(val.length, 56)
|
||||
b.write_blob(ARRAY)
|
||||
for (var v of val)
|
||||
encode_val(b, v)
|
||||
} else if (val instanceof blob) {
|
||||
b.write_fit(val.length, 56)
|
||||
b.write_blob(BLOB)
|
||||
b.write_blob(val)
|
||||
} else {
|
||||
var keys = Object.keys(val)
|
||||
b.write_fit(keys.length, 56)
|
||||
b.write_blob(RECORD)
|
||||
for (var key of keys) {
|
||||
if (typeof val[key] === 'function') continue
|
||||
b.write_blob(encode_key(key))
|
||||
encode_val(b, val[key])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function encode(val)
|
||||
@@ -131,4 +116,9 @@ function encode(val)
|
||||
return stone(b)
|
||||
}
|
||||
|
||||
return { INT,FP,ARRAY,RECORD,BLOB,TEXT,SYMBOL, encode }
|
||||
function decode(b)
|
||||
{
|
||||
return undefined
|
||||
}
|
||||
|
||||
return { INT, FP_HEADER, ARRAY, RECORD, BLOB, TEXT, NULL_SYMBOL, FALSE_SYMBOL, TRUE_SYMBOL, PRIVATE_SYMBOL, SYSTEM_SYMBOL, encode, decode }
|
||||
@@ -1531,6 +1531,7 @@ JSC_CCALL(os_value_id,
|
||||
#include "qjs_http.h"
|
||||
#include "qjs_wota.h"
|
||||
#include "qjs_socket.h"
|
||||
#include "qjs_nota.h"
|
||||
|
||||
//JSValue js_imgui_use(JSContext *js);
|
||||
#define MISTLINE(NAME) (ModuleEntry){#NAME, js_##NAME##_use}
|
||||
@@ -1560,6 +1561,8 @@ void ffi_load(JSContext *js)
|
||||
arrput(rt->module_registry, MISTLINE(kim));
|
||||
arrput(rt->module_registry, MISTLINE(utf8));
|
||||
arrput(rt->module_registry, MISTLINE(fit));
|
||||
arrput(rt->module_registry, MISTLINE(wota));
|
||||
arrput(rt->module_registry, MISTLINE(nota));
|
||||
|
||||
// power user
|
||||
arrput(rt->module_registry, MISTLINE(js));
|
||||
|
||||
@@ -4,9 +4,14 @@
|
||||
#include "wota.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct ObjectRef {
|
||||
void *ptr;
|
||||
struct ObjectRef *next;
|
||||
} ObjectRef;
|
||||
|
||||
typedef struct WotaEncodeContext {
|
||||
JSContext *ctx;
|
||||
JSValue visited_stack;
|
||||
ObjectRef *visited_stack;
|
||||
WotaBuffer wb;
|
||||
int cycle;
|
||||
JSValue replacer;
|
||||
@@ -14,34 +19,46 @@ typedef struct WotaEncodeContext {
|
||||
|
||||
static void wota_stack_push(WotaEncodeContext *enc, JSValueConst val)
|
||||
{
|
||||
JSContext *ctx = enc->ctx;
|
||||
int len = JS_ArrayLength(ctx, enc->visited_stack);
|
||||
JS_SetPropertyInt64(ctx, enc->visited_stack, len, JS_DupValue(ctx, val));
|
||||
if (!JS_IsObject(val)) return;
|
||||
|
||||
ObjectRef *ref = malloc(sizeof(ObjectRef));
|
||||
if (!ref) return;
|
||||
|
||||
ref->ptr = JS_VALUE_GET_PTR(val);
|
||||
ref->next = enc->visited_stack;
|
||||
enc->visited_stack = ref;
|
||||
}
|
||||
|
||||
static void wota_stack_pop(WotaEncodeContext *enc)
|
||||
{
|
||||
JSContext *ctx = enc->ctx;
|
||||
int len = JS_ArrayLength(ctx, enc->visited_stack);
|
||||
JS_SetPropertyStr(ctx, enc->visited_stack, "length", JS_NewUint32(ctx, len - 1));
|
||||
if (!enc->visited_stack) return;
|
||||
|
||||
ObjectRef *top = enc->visited_stack;
|
||||
enc->visited_stack = top->next;
|
||||
free(top);
|
||||
}
|
||||
|
||||
static int wota_stack_has(WotaEncodeContext *enc, JSValueConst val)
|
||||
{
|
||||
JSContext *ctx = enc->ctx;
|
||||
int len = JS_ArrayLength(ctx, enc->visited_stack);
|
||||
for (int i = 0; i < len; i++) {
|
||||
JSValue elem = JS_GetPropertyUint32(ctx, enc->visited_stack, i);
|
||||
if (JS_IsObject(elem) && JS_IsObject(val))
|
||||
if (JS_StrictEq(ctx, elem, val)) {
|
||||
JS_FreeValue(ctx, elem);
|
||||
return 1;
|
||||
}
|
||||
JS_FreeValue(ctx, elem);
|
||||
if (!JS_IsObject(val)) return 0;
|
||||
|
||||
void *ptr = JS_VALUE_GET_PTR(val);
|
||||
ObjectRef *current = enc->visited_stack;
|
||||
|
||||
while (current) {
|
||||
if (current->ptr == ptr) return 1;
|
||||
current = current->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wota_stack_free(WotaEncodeContext *enc)
|
||||
{
|
||||
while (enc->visited_stack) {
|
||||
wota_stack_pop(enc);
|
||||
}
|
||||
}
|
||||
|
||||
static JSValue apply_replacer(WotaEncodeContext *enc, JSValueConst holder, JSValueConst key, JSValueConst val)
|
||||
{
|
||||
if (JS_IsUndefined(enc->replacer)) return JS_DupValue(enc->ctx, val);
|
||||
@@ -65,25 +82,33 @@ static void encode_object_properties(WotaEncodeContext *enc, JSValueConst val, J
|
||||
return;
|
||||
}
|
||||
uint32_t non_function_count = 0;
|
||||
for (uint32_t i = 0; i < plen; i++) {
|
||||
JSValue prop_val = JS_GetProperty(ctx, val, ptab[i].atom);
|
||||
if (!JS_IsFunction(ctx, prop_val)) non_function_count++;
|
||||
JS_FreeValue(ctx, prop_val);
|
||||
}
|
||||
wota_write_record(&enc->wb, non_function_count);
|
||||
JSValue props[plen];
|
||||
JSAtom atoms[plen];
|
||||
|
||||
for (uint32_t i = 0; i < plen; i++) {
|
||||
JSValue prop_val = JS_GetProperty(ctx, val, ptab[i].atom);
|
||||
if (!JS_IsFunction(ctx, prop_val)) {
|
||||
const char *prop_name = JS_AtomToCString(ctx, ptab[i].atom);
|
||||
JSValue prop_key = JS_AtomToValue(ctx, ptab[i].atom);
|
||||
wota_write_text(&enc->wb, prop_name);
|
||||
wota_encode_value(enc, prop_val, val, prop_key);
|
||||
JS_FreeCString(ctx, prop_name);
|
||||
JS_FreeValue(ctx, prop_key);
|
||||
}
|
||||
JS_FreeValue(ctx, prop_val);
|
||||
JS_FreeAtom(ctx, ptab[i].atom);
|
||||
atoms[non_function_count] = ptab[i].atom;
|
||||
props[non_function_count++] = prop_val;
|
||||
} else
|
||||
JS_FreeValue(ctx, prop_val);
|
||||
}
|
||||
wota_write_record(&enc->wb, non_function_count);
|
||||
for (uint32_t i = 0; i < non_function_count; i++) {
|
||||
size_t plen;
|
||||
const char *prop_name = JS_AtomToCStringLen(ctx, &plen, atoms[i]);
|
||||
JSValue prop_key = JS_AtomToValue(ctx, atoms[i]);
|
||||
JSValue prop_val = props[i];
|
||||
wota_write_text_len(&enc->wb, prop_name, plen);
|
||||
wota_encode_value(enc, prop_val, val, prop_key);
|
||||
JS_FreeCString(ctx, prop_name);
|
||||
JS_FreeValue(ctx, prop_key);
|
||||
JS_FreeValue(ctx, prop_val);
|
||||
}
|
||||
|
||||
for (int i = 0; i < plen; i++)
|
||||
JS_FreeAtom(ctx, ptab[i].atom);
|
||||
|
||||
js_free(ctx, ptab);
|
||||
}
|
||||
|
||||
@@ -115,8 +140,9 @@ static void wota_encode_value(WotaEncodeContext *enc, JSValueConst val, JSValueC
|
||||
break;
|
||||
}
|
||||
case JS_TAG_STRING: {
|
||||
const char *str = JS_ToCString(ctx, replaced);
|
||||
wota_write_text(&enc->wb, str ? str : "");
|
||||
size_t plen;
|
||||
const char *str = JS_ToCStringLen(ctx, &plen, replaced);
|
||||
wota_write_text_len(&enc->wb, str ? str : "", str ? plen : 0);
|
||||
JS_FreeCString(ctx, str);
|
||||
break;
|
||||
}
|
||||
@@ -140,9 +166,10 @@ static void wota_encode_value(WotaEncodeContext *enc, JSValueConst val, JSValueC
|
||||
break;
|
||||
}
|
||||
wota_stack_push(enc, replaced);
|
||||
int arr_len = JS_ArrayLength(ctx, replaced);
|
||||
int64_t arr_len;
|
||||
JS_GetLength(ctx, replaced, &arr_len);
|
||||
wota_write_array(&enc->wb, arr_len);
|
||||
for (int i = 0; i < arr_len; i++) {
|
||||
for (int64_t i = 0; i < arr_len; i++) {
|
||||
JSValue elem_val = JS_GetPropertyUint32(ctx, replaced, i);
|
||||
JSValue elem_key = JS_NewInt32(ctx, i);
|
||||
wota_encode_value(enc, elem_val, replaced, elem_key);
|
||||
@@ -242,6 +269,7 @@ static char *decode_wota_value(JSContext *ctx, char *data_ptr, JSValue *out_val,
|
||||
long long c;
|
||||
data_ptr = wota_read_array(&c, data_ptr);
|
||||
JSValue arr = JS_NewArray(ctx);
|
||||
JS_SetLength(ctx, arr, c);
|
||||
for (long long i = 0; i < c; i++) {
|
||||
JSValue elem_val = JS_UNDEFINED;
|
||||
data_ptr = decode_wota_value(ctx, data_ptr, &elem_val, arr, JS_NewInt32(ctx, i), reviver);
|
||||
@@ -292,17 +320,17 @@ void *value2wota(JSContext *ctx, JSValue v, JSValue replacer, size_t *bytes)
|
||||
WotaEncodeContext enc_s, *enc = &enc_s;
|
||||
|
||||
enc->ctx = ctx;
|
||||
enc->visited_stack = JS_NewArray(ctx);
|
||||
enc->visited_stack = NULL;
|
||||
enc->cycle = 0;
|
||||
enc->replacer = replacer;
|
||||
wota_buffer_init(&enc->wb, 16);
|
||||
wota_encode_value(enc, v, JS_UNDEFINED, JS_UNDEFINED);
|
||||
if (enc->cycle) {
|
||||
JS_FreeValue(ctx, enc->visited_stack);
|
||||
wota_stack_free(enc);
|
||||
wota_buffer_free(&enc->wb);
|
||||
return NULL;
|
||||
}
|
||||
JS_FreeValue(ctx, enc->visited_stack);
|
||||
wota_stack_free(enc);
|
||||
size_t total_bytes = enc->wb.size * sizeof(uint64_t);
|
||||
void *wota = realloc(enc->wb.data, total_bytes);
|
||||
if (bytes) *bytes = total_bytes;
|
||||
|
||||
137
source/wota.h
137
source/wota.h
@@ -73,6 +73,7 @@ void wota_buffer_free(WotaBuffer *wb);
|
||||
/* Writing function prototypes */
|
||||
void wota_write_blob (WotaBuffer *wb, unsigned long long nbits, const char *data);
|
||||
void wota_write_text (WotaBuffer *wb, const char *utf8);
|
||||
void wota_write_text_len(WotaBuffer *wb, const char *utf8, size_t len);
|
||||
void wota_write_array (WotaBuffer *wb, unsigned long long count);
|
||||
void wota_write_record (WotaBuffer *wb, unsigned long long count);
|
||||
/* We'll store numbers as either 56-bit integers or raw double */
|
||||
@@ -396,10 +397,9 @@ char *wota_read_blob(long long *byte_len, char **blob, char *wota)
|
||||
|
||||
/*
|
||||
TEXT:
|
||||
preamble => top 56 bits = #characters, LSB=0x05
|
||||
then floor((nchars+1)/2) 64-bit words
|
||||
each word has 2 UTF-32 codepoints: top 32 bits = codepoint1,
|
||||
low 32 bits = codepoint2
|
||||
preamble => top 56 bits = #bytes in UTF-8, LSB=0x05
|
||||
then floor((nbytes + 7)/8) 64-bit words
|
||||
containing the UTF-8 bytes, packed 8 bytes per word
|
||||
*/
|
||||
char *wota_read_text(char **text_utf8, char *wota)
|
||||
{
|
||||
@@ -412,68 +412,26 @@ char *wota_read_text(char **text_utf8, char *wota)
|
||||
return wota_skip1(wota);
|
||||
}
|
||||
|
||||
uint64_t nchars = (first >> 8);
|
||||
long long nwords = (long long)((nchars + 1ULL) >> 1);
|
||||
uint64_t nbytes = (first >> 8);
|
||||
long long nwords = (long long)((nbytes + 7ULL) >> 3);
|
||||
|
||||
uint64_t *data_words = p + 1;
|
||||
/*
|
||||
We'll convert them to a UTF-8 string. Each codepoint can
|
||||
become up to 4 bytes. So we need up to 4*nchars + 1.
|
||||
*/
|
||||
size_t max_utf8 = (size_t)(4 * nchars + 1);
|
||||
char *out = (char *)malloc(max_utf8);
|
||||
|
||||
char *out = (char *)malloc((size_t)(nbytes + 1));
|
||||
if (!out) {
|
||||
fprintf(stderr, "malloc failed in wota_read_text\n");
|
||||
abort();
|
||||
}
|
||||
size_t out_len = 0;
|
||||
|
||||
/* Copy bytes from the packed 64-bit words */
|
||||
for (long long i = 0; i < nwords; i++) {
|
||||
uint64_t wval = data_words[i];
|
||||
uint32_t c1 = (uint32_t)(wval >> 32);
|
||||
uint32_t c2 = (uint32_t)(wval & 0xffffffffULL);
|
||||
|
||||
// If we haven't exceeded nchars, convert c1 -> UTF-8
|
||||
if ((i * 2) + 0 < (long long)nchars) {
|
||||
uint32_t c = c1;
|
||||
if (c < 0x80) {
|
||||
out[out_len++] = (char)c;
|
||||
} else if (c < 0x800) {
|
||||
out[out_len++] = (char)(0xC0 | (c >> 6));
|
||||
out[out_len++] = (char)(0x80 | (c & 0x3F));
|
||||
} else if (c < 0x10000) {
|
||||
out[out_len++] = (char)(0xE0 | (c >> 12));
|
||||
out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F));
|
||||
out[out_len++] = (char)(0x80 | (c & 0x3F));
|
||||
} else {
|
||||
out[out_len++] = (char)(0xF0 | (c >> 18));
|
||||
out[out_len++] = (char)(0x80 | ((c >> 12) & 0x3F));
|
||||
out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F));
|
||||
out[out_len++] = (char)(0x80 | (c & 0x3F));
|
||||
}
|
||||
}
|
||||
// Similarly for c2:
|
||||
if ((i * 2) + 1 < (long long)nchars) {
|
||||
uint32_t c = c2;
|
||||
if (c < 0x80) {
|
||||
out[out_len++] = (char)c;
|
||||
} else if (c < 0x800) {
|
||||
out[out_len++] = (char)(0xC0 | (c >> 6));
|
||||
out[out_len++] = (char)(0x80 | (c & 0x3F));
|
||||
} else if (c < 0x10000) {
|
||||
out[out_len++] = (char)(0xE0 | (c >> 12));
|
||||
out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F));
|
||||
out[out_len++] = (char)(0x80 | (c & 0x3F));
|
||||
} else {
|
||||
out[out_len++] = (char)(0xF0 | (c >> 18));
|
||||
out[out_len++] = (char)(0x80 | ((c >> 12) & 0x3F));
|
||||
out[out_len++] = (char)(0x80 | ((c >> 6) & 0x3F));
|
||||
out[out_len++] = (char)(0x80 | (c & 0x3F));
|
||||
}
|
||||
for (int j = 0; j < 8 && (i * 8 + j) < (long long)nbytes; j++) {
|
||||
out[i * 8 + j] = (char)((wval >> (56 - j * 8)) & 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
out[out_len] = '\0';
|
||||
out[nbytes] = '\0';
|
||||
*text_utf8 = out;
|
||||
|
||||
return (char *)(data_words + nwords);
|
||||
@@ -625,70 +583,37 @@ void wota_write_blob(WotaBuffer *wb, unsigned long long nbits, const char *data)
|
||||
}
|
||||
}
|
||||
|
||||
void wota_write_text(WotaBuffer *wb, const char *utf8)
|
||||
void wota_write_text_len(WotaBuffer *wb, const char *utf8, size_t nbytes)
|
||||
{
|
||||
if (!utf8) utf8 = "";
|
||||
|
||||
/* Convert the utf8 string to an array of UTF-32 codepoints. */
|
||||
size_t len = strlen(utf8);
|
||||
const unsigned char *uc = (const unsigned char *)utf8;
|
||||
/* In worst case, every single byte might form a codepoint, so we allocate enough: */
|
||||
uint32_t *codepoints = (uint32_t *)malloc(sizeof(uint32_t)*(len+1));
|
||||
if (!codepoints) {
|
||||
fprintf(stderr, "malloc failed in wota_write_text\n");
|
||||
abort();
|
||||
}
|
||||
size_t ccount = 0;
|
||||
|
||||
while (*uc) {
|
||||
uint32_t c;
|
||||
if ((uc[0] & 0x80) == 0) {
|
||||
c = uc[0];
|
||||
uc += 1;
|
||||
} else if ((uc[0] & 0xe0) == 0xc0 && (uc[1] != 0)) {
|
||||
c = ((uc[0] & 0x1f) << 6) | (uc[1] & 0x3f);
|
||||
uc += 2;
|
||||
} else if ((uc[0] & 0xf0) == 0xe0 && (uc[1] != 0) && (uc[2] != 0)) {
|
||||
c = ((uc[0] & 0x0f) << 12) | ((uc[1] & 0x3f) << 6) | (uc[2] & 0x3f);
|
||||
uc += 3;
|
||||
} else if ((uc[0] & 0xf8) == 0xf0 && (uc[1] != 0) && (uc[2] != 0) && (uc[3] != 0)) {
|
||||
c = ((uc[0] & 0x07) << 18) | ((uc[1] & 0x3f) << 12)
|
||||
| ((uc[2] & 0x3f) << 6) | (uc[3] & 0x3f);
|
||||
uc += 4;
|
||||
} else {
|
||||
/* invalid sequence => skip 1 byte */
|
||||
c = uc[0];
|
||||
uc++;
|
||||
}
|
||||
codepoints[ccount++] = c;
|
||||
}
|
||||
|
||||
/* preamble => top 56 bits = ccount, LSB=0x05 */
|
||||
uint64_t preamble = ((uint64_t)ccount << 8) | (uint64_t)WOTA_TEXT;
|
||||
/* preamble => top 56 bits = nbytes, LSB=0x05 */
|
||||
uint64_t preamble = ((uint64_t)nbytes << 8) | (uint64_t)WOTA_TEXT;
|
||||
uint64_t *pw = wota_buffer_alloc(wb, 1);
|
||||
pw[0] = preamble;
|
||||
|
||||
/* store pairs of 32-bit codepoints in 64-bit words */
|
||||
size_t nwords = (ccount + 1) / 2;
|
||||
/* pack UTF-8 bytes into 64-bit words, 8 bytes per word */
|
||||
size_t nwords = (nbytes + 7) / 8;
|
||||
if (nwords == 0) {
|
||||
free(codepoints);
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t *blocks = wota_buffer_alloc(wb, nwords);
|
||||
size_t idx = 0;
|
||||
for (size_t i = 0; i < nwords; i++) {
|
||||
uint64_t hi = 0, lo = 0;
|
||||
if (idx < ccount) {
|
||||
hi = codepoints[idx++];
|
||||
}
|
||||
if (idx < ccount) {
|
||||
lo = codepoints[idx++];
|
||||
}
|
||||
blocks[i] = ((hi & 0xffffffffULL) << 32) | (lo & 0xffffffffULL);
|
||||
}
|
||||
memset(blocks, 0, nwords * sizeof(uint64_t));
|
||||
|
||||
free(codepoints);
|
||||
for (size_t i = 0; i < nwords; i++) {
|
||||
uint64_t wval = 0;
|
||||
for (int j = 0; j < 8 && (i * 8 + j) < nbytes; j++) {
|
||||
wval |= ((uint64_t)(unsigned char)utf8[i * 8 + j]) << (56 - j * 8);
|
||||
}
|
||||
blocks[i] = wval;
|
||||
}
|
||||
}
|
||||
|
||||
void wota_write_text(WotaBuffer *wb, const char *utf8)
|
||||
{
|
||||
if (!utf8) utf8 = "";
|
||||
wota_write_text_len(wb, utf8, strlen(utf8));
|
||||
}
|
||||
|
||||
void wota_write_array(WotaBuffer *wb, unsigned long long count)
|
||||
|
||||
@@ -4,12 +4,14 @@ var jswota = use('jswota');
|
||||
log.console("Testing jswota headers:");
|
||||
|
||||
log.console("INT header:", text(jswota.INT, 'b'));
|
||||
log.console("FP header:", text(jswota.FP, 'b'));
|
||||
log.console("FP_HEADER:", text(jswota.FP_HEADER, 'b'));
|
||||
log.console("ARRAY header:", text(jswota.ARRAY, 'b'));
|
||||
log.console("RECORD header:", text(jswota.RECORD, 'b'));
|
||||
log.console("BLOB header:", text(jswota.BLOB, 'b'));
|
||||
log.console("TEXT header:", text(jswota.TEXT, 'b'));
|
||||
log.console("SYMBOL header:", text(jswota.SYMBOL, 'b'));
|
||||
log.console("NULL_SYMBOL:", text(jswota.NULL_SYMBOL, 'b'));
|
||||
log.console("FALSE_SYMBOL:", text(jswota.FALSE_SYMBOL, 'b'));
|
||||
log.console("TRUE_SYMBOL:", text(jswota.TRUE_SYMBOL, 'b'));
|
||||
|
||||
log.console("4.25:" ,text(jswota.encode(4.25),'b'));
|
||||
log.console("true:", text(jswota.encode(true),'b'))
|
||||
|
||||
Reference in New Issue
Block a user