#ifndef WOTA_H #define WOTA_H #include #include /* ---------------------------------------------------------------- Wota Type Codes (LSB of a 64-bit word) ---------------------------------------------------------------- */ #define WOTA_INT 0x00 #define WOTA_FLOAT 0x01 #define WOTA_ARR 0x02 #define WOTA_REC 0x03 #define WOTA_BLOB 0x04 #define WOTA_TEXT 0x05 #define WOTA_SYM 0x07 /* ---------------------------------------------------------------- Wota Symbol Codes (stored in top 56 bits) e.g. word = ((uint64_t)sym_code << 8) | WOTA_SYM ---------------------------------------------------------------- */ #define WOTA_NULL 0x00 #define WOTA_FALSE 0x02 #define WOTA_TRUE 0x03 #define WOTA_PRIVATE 0x08 #define WOTA_SYSTEM 0x09 /* We store all data in 64-bit words. The least significant byte is the type code. The top 56 bits are used differently depending on type. This version (non-standard) stores floating-point values as *raw 64-bit IEEE 754 doubles* in a second 64-bit word. */ /* ---------------------------------------------------------------- Accessor: return the Wota type code from the LSB of a 64-bit word ---------------------------------------------------------------- */ static inline int wota_type(const uint64_t *w) { return (int)(*w & 0xffU); } /* ---------------------------------------------------------------- Reading function prototypes. Each consumes some number of 64-bit words and returns a pointer to the next Wota data. If you pass a NULL for the output pointer, the function will skip the data. ---------------------------------------------------------------- */ char *wota_read_blob (long long *byte_len, char **blob, char *wota); char *wota_read_text (char **text_utf8, char *wota); char *wota_read_text_len(size_t *byte_len, char **text_utf8, char *wota); char *wota_read_array (long long *count, char *wota); char *wota_read_record (long long *count, char *wota); char *wota_read_float (double *d, char *wota); char *wota_read_int (long long *n, char *wota); char *wota_read_sym (int *sym_code, char *wota); /* ---------------------------------------------------------------- WotaBuffer: dynamic array of 64-bit words for building a Wota message in memory. ---------------------------------------------------------------- */ typedef struct WotaBuffer { uint64_t *data; /* allocated array of 64-bit words */ size_t size; /* how many 64-bit words are used */ size_t capacity; /* allocated capacity in 64-bit words */ } WotaBuffer; void *wota_dup_data(struct WotaBuffer *wb); /* Buffer management */ void wota_buffer_init(WotaBuffer *wb, size_t initial_capacity_in_words); void wota_buffer_free(WotaBuffer *wb); /* Writing function prototypes */ void wota_write_blob (WotaBuffer *wb, unsigned long long nbits, const char *data); void wota_write_text (WotaBuffer *wb, const char *utf8); void wota_write_text_len(WotaBuffer *wb, const char *utf8, size_t len); void wota_write_array (WotaBuffer *wb, unsigned long long count); void wota_write_record (WotaBuffer *wb, unsigned long long count); /* We'll store numbers as either 56-bit integers or raw double */ void wota_write_number (WotaBuffer *wb, double n); /* Symbol codes (WOTA_NULL, WOTA_FALSE, etc.) */ void wota_write_sym (WotaBuffer *wb, int sym_code); void wota_write_int_word(WotaBuffer *wb, long long val); void wota_write_float_word(WotaBuffer *wb, double val); #ifdef WOTA_IMPLEMENTATION #include #include #include #include #include /* ================================================================ Detect endianness. We'll use this to do 64-bit byte-swaps if needed. If you know you only run on little-endian, you can hard-code that. ================================================================ */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define WOTA_BIG_ENDIAN 1 #elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define WOTA_LITTLE_ENDIAN 1 #elif defined(_MSC_VER) /* MSVC on x86/x64 is little-endian */ #define WOTA_LITTLE_ENDIAN 1 #else /* Fallback: assume little-endian if unknown. Adjust if your platform is otherwise. */ #define WOTA_LITTLE_ENDIAN 1 #endif /* 64-bit byte-swap helper (for bit-level blob ordering) */ static inline uint64_t wota_bswap64(uint64_t x) { /* Modern compilers often have a built-in. If not, do manually: */ #if defined(__GNUC__) || defined(__clang__) return __builtin_bswap64(x); #else /* Portable approach */ x = ((x & 0x00000000FFFFFFFFULL) << 32) | ((x >> 32) & 0x00000000FFFFFFFFULL); x = ((x & 0x0000FFFF0000FFFFULL) << 16) | ((x >> 16) & 0x0000FFFF0000FFFFULL); x = ((x & 0x00FF00FF00FF00FFULL) << 8 ) | ((x >> 8 ) & 0x00FF00FF00FF00FFULL); return x; #endif } void *wota_dup_data(WotaBuffer *wb) { void *copy = malloc(wb->size*8); memcpy(copy, wb->data, wb->size*8); return copy; } /* ================================================================ Helper: Grow the buffer to fit 'min_add' more 64-bit words ================================================================ */ static void wota_buffer_grow(WotaBuffer *wb, size_t min_add) { size_t needed = wb->size + min_add; if (needed <= wb->capacity) return; size_t new_cap = (wb->capacity == 0 ? 8 : wb->capacity * 2); while (new_cap < needed) { new_cap *= 2; } uint64_t *new_data = (uint64_t *)realloc(wb->data, new_cap * sizeof(uint64_t)); if (!new_data) { fprintf(stderr, "realloc failed in wota_buffer_grow\n"); abort(); } wb->data = new_data; wb->capacity = new_cap; } void wota_buffer_init(WotaBuffer *wb, size_t initial_capacity_in_words) { wb->data = NULL; wb->size = 0; wb->capacity = 0; if (initial_capacity_in_words > 0) { wb->data = (uint64_t *)malloc(initial_capacity_in_words * sizeof(uint64_t)); if (!wb->data) { fprintf(stderr, "malloc failed in wota_buffer_init\n"); abort(); } wb->capacity = initial_capacity_in_words; } } void wota_buffer_free(WotaBuffer *wb) { if (wb->data) { free(wb->data); } wb->data = NULL; wb->size = 0; wb->capacity = 0; } /* Alloc 'count' 64-bit words in the buffer, return pointer to them */ static uint64_t *wota_buffer_alloc(WotaBuffer *wb, size_t count) { wota_buffer_grow(wb, count); uint64_t *p = wb->data + wb->size; wb->size += count; return p; } /* ================================================================ READING ================================================================ */ /* We skip 1 word if we do not want to interpret it. */ static inline char *wota_skip1(char *wota) { return wota + 8; /* skip one 64-bit word */ } char *wota_read_int(long long *n, char *wota) { /* WOTA_INT => single 64-bit word: top 56 bits is a signed integer, LSB=0. */ if (!n) return wota_skip1(wota); uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_INT) { /* not an int; skip one word */ return wota_skip1(wota); } /* sign-extend top 56 bits into a 64-bit signed integer */ int64_t val = (int64_t)first; val >>= 8; /* arithmetic shift right 8 bits to keep sign */ *n = val; return wota + 8; } /* We store a double as: - first 64-bit word => type code (LSB=1), top 56 bits unused - second 64-bit word => raw IEEE 754 bits */ char *wota_read_float(double *out, char *wota) { if (!out) return wota + 16; /* skip 2 words if no pointer */ uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_FLOAT) { /* skip if not float */ return wota + 8; } /* second word has the raw double bits */ uint64_t bits = p[1]; union { uint64_t u; double d; } converter; converter.u = bits; *out = converter.d; return (char *)(p + 2); } char *wota_read_sym(int *sym_code, char *wota) { if (!sym_code) return wota_skip1(wota); uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_SYM) { return wota_skip1(wota); } uint64_t top56 = (first >> 8); /* symbol code in top 56 bits */ *sym_code = (int)top56; return wota + 8; } char *wota_read_array(long long *count, char *wota) { if (!count) return wota_skip1(wota); uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_ARR) { return wota_skip1(wota); } uint64_t c = (first >> 8); *count = (long long)c; return wota + 8; } char *wota_read_record(long long *count, char *wota) { if (!count) return wota_skip1(wota); uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_REC) { return wota_skip1(wota); } uint64_t c = (first >> 8); *count = (long long)c; return wota + 8; } /* BLOB: preamble => top 56 bits = #bits, LSB=0x04 then floor((nbits + 63)/64) 64-bit words of data The first bit is the MSB of the first data word. Faster approach: - If nbits is a multiple of 8, do fast 8-byte copying (with endianness fix). - If partial bits remain, handle them with old bit-by-bit logic. */ char *wota_read_blob(long long *byte_len, char **blob, char *wota) { if (!byte_len || !blob) { return wota_skip1(wota); } uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_BLOB) { return wota_skip1(wota); } uint64_t nbits = (first >> 8); long long nwords = (long long)((nbits + 63ULL) >> 6); /* # of 64-bit blocks */ *byte_len = (long long)((nbits + 7ULL) >> 3); *blob = (char *)malloc((size_t)(*byte_len)); if (!(*blob)) { fprintf(stderr, "malloc failed in wota_read_blob\n"); abort(); } memset(*blob, 0, (size_t)(*byte_len)); uint64_t *data_words = p + 1; int bit_in_byte = 0; /* If nbits is multiple of 8, we can do a bulk copy in 64-bit chunks, then do leftover if any. */ long long full_bytes = (long long)(nbits / 8ULL); /* how many full bytes total */ long long leftover_bits = (long long)(nbits % 8ULL); /* We'll process 8 bytes at a time from each 64-bit block in big-endian format. */ long long full_64_chunks = full_bytes / 8; /* # of full 8-byte chunks we can copy. */ long long remainder_bytes = full_bytes % 8; /* leftover bytes after those chunks. */ size_t chunk_index = 0; /* Bulk 64-bit copy for each full 8-byte chunk. */ for (long long i = 0; i < full_64_chunks; i++) { uint64_t block = data_words[i]; /* If we are on a little-endian system, we must swap to get the "first bit in MSB" ordering. */ #if defined(WOTA_LITTLE_ENDIAN) block = wota_bswap64(block); #endif /* Copy the 8 bytes from `block` into the output. */ memcpy((*blob) + (i * 8), &block, 8); chunk_index = i + 1; } /* Now we handle leftover bytes (0..7) in the next block, if any. */ if (remainder_bytes > 0) { uint64_t block = data_words[chunk_index]; #if defined(WOTA_LITTLE_ENDIAN) block = wota_bswap64(block); #endif memcpy((*blob) + (chunk_index * 8), &block, (size_t)remainder_bytes); /* The chunk_index used up one block if there's leftover bytes. */ chunk_index++; } /* If leftover_bits != 0, we still have some partial bits at the end to decode. */ if (leftover_bits != 0) { /* We'll handle that partial chunk bit-by-bit (since we only have up to 7 bits). */ /* The next block is data_words[chunk_index-1] if remainder_bytes > 0, or data_words[chunk_index] if remainder_bytes == 0, depending on how we count. */ long long block_idx; if (remainder_bytes > 0) { /* We partially used data_words[chunk_index-1]. So let's re-read it bit by bit. */ block_idx = (long long)(chunk_index - 1); } else { /* We haven't used data_words[chunk_index] yet. */ block_idx = (long long)chunk_index; } uint64_t partial_block = data_words[block_idx]; #if defined(WOTA_LITTLE_ENDIAN) partial_block = wota_bswap64(partial_block); #endif /* We used up remainder_bytes * 8 bits from this partial_block if remainder_bytes>0. */ int start_bit = 63 - (int)(remainder_bytes * 8); if (start_bit < 0) start_bit = 63; /* if we used the entire block, clamp it */ /* Now decode leftover_bits from partial_block, from MSB to LSB. */ for (int b = start_bit; b >= 0 && leftover_bits > 0; b--) { int bitval = (int)((partial_block >> b) & 1ULL); (*blob)[full_bytes] |= (char)(bitval << bit_in_byte); bit_in_byte++; leftover_bits--; if (bit_in_byte == 8) { bit_in_byte = 0; full_bytes++; } } } /* If the total # of blocks was more than chunk_index, skip them if necessary. */ return (char *)(data_words + nwords); } /* TEXT: preamble => top 56 bits = #bytes in UTF-8, LSB=0x05 then floor((nbytes + 7)/8) 64-bit words containing the UTF-8 bytes, packed 8 bytes per word */ char *wota_read_text_len(size_t *byte_len, char **text_utf8, char *wota) { if (!text_utf8) return wota_skip1(wota); uint64_t *p = (uint64_t *)wota; uint64_t first = p[0]; int type = (int)(first & 0xffU); if (type != WOTA_TEXT) { return wota_skip1(wota); } uint64_t nbytes = (first >> 8); long long nwords = (long long)((nbytes + 7ULL) >> 3); if (byte_len) { *byte_len = (size_t)nbytes; } uint64_t *data_words = p + 1; char *out = (char *)malloc((size_t)(nbytes + 1)); if (!out) { fprintf(stderr, "malloc failed in wota_read_text_len\n"); abort(); } /* Copy bytes from the packed 64-bit words */ for (long long i = 0; i < nwords; i++) { uint64_t wval = data_words[i]; for (int j = 0; j < 8 && (i * 8 + j) < (long long)nbytes; j++) { out[i * 8 + j] = (char)((wval >> (56 - j * 8)) & 0xff); } } out[nbytes] = '\0'; *text_utf8 = out; return (char *)(data_words + nwords); } char *wota_read_text(char **text_utf8, char *wota) { return wota_read_text_len(NULL, text_utf8, wota); } /* ================================================================ WRITING ================================================================ */ /* Helper to see if double is integral and can fit in 56 bits signed. Range: -2^55 <= x <= 2^55 - 1 */ static int fits_in_56_bits(long long x) { const long long min_val = -(1LL << 55); const long long max_val = (1LL << 55) - 1; return (x >= min_val && x <= max_val); } /* Write a WOTA_INT (single 64-bit word): top 56 bits = signed integer (arithmetic shift), LSB=0x00 */ void wota_write_int_word(WotaBuffer *wb, long long val) { /* shift 'val' left by 8 bits into the top 56, then OR the type code in the bottom byte. */ uint64_t u = (uint64_t)((int64_t)val) << 8; u |= WOTA_INT; uint64_t *p = wota_buffer_alloc(wb, 1); p[0] = u; } /* Write a WOTA_FLOAT (2 words): first word => type=0x01 in LSB, top 56 bits=0 second word => raw IEEE 754 double bits */ void wota_write_float_word(WotaBuffer *wb, double val) { uint64_t *p = wota_buffer_alloc(wb, 2); p[0] = (uint64_t)WOTA_FLOAT; /* top 56 bits=0, LSB=0x01 */ union { double d; uint64_t u; } converter; converter.d = val; p[1] = converter.u; } void wota_write_sym(WotaBuffer *wb, int sym_code) { /* single word => top 56 bits = sym_code, LSB=0x07 */ uint64_t w = ((uint64_t)(sym_code) << 8) | WOTA_SYM; uint64_t *p = wota_buffer_alloc(wb, 1); p[0] = w; } /* BLOB: preamble word => top 56 bits= nbits, LSB=0x04 then floor((nbits + 63)/64) 64-bit words If nbits is multiple of 8, we do a fast copy in 64-bit chunks (with a byte-swap if on little-endian) to place the first bit in the MSB of the first word. If partial bits remain, we handle them bit-by-bit at the end. */ void wota_write_blob(WotaBuffer *wb, unsigned long long nbits, const char *data) { /* preamble word => top 56 bits= nbits, LSB=0x04 */ uint64_t preamble = ((uint64_t)nbits << 8) | (uint64_t)WOTA_BLOB; uint64_t *p = wota_buffer_alloc(wb, 1); p[0] = preamble; unsigned long long nwords = (nbits + 63ULL) >> 6; /* # of 64-bit blocks */ if (nwords == 0) { return; /* empty blob => done */ } uint64_t *blocks = wota_buffer_alloc(wb, (size_t)nwords); memset(blocks, 0, (size_t)(nwords * sizeof(uint64_t))); /* If exactly byte-aligned, do a fast copy first. */ unsigned long long full_bytes = (nbits / 8ULL); /* total full bytes */ unsigned long long leftover_bits = (nbits % 8ULL);/* leftover bits if not multiple of 8 */ size_t block_index = 0; unsigned long long num_full_64_chunks = full_bytes / 8ULL; /* how many full 8-byte chunks */ unsigned long long remainder_bytes = full_bytes % 8ULL; /* 1) Bulk copy each 8-byte chunk */ for (unsigned long long i = 0; i < num_full_64_chunks; i++) { /* read 8 bytes from data, build a 64-bit. */ uint64_t tmp = 0; memcpy(&tmp, data + (i * 8), 8); /* We must store it so that the first bit is in the MSB. On a little-endian CPU, that means bswap. */ #if defined(WOTA_LITTLE_ENDIAN) tmp = wota_bswap64(tmp); #endif blocks[i] = tmp; block_index = (size_t)(i + 1); } /* 2) If there's remainder_bytes in the next block, handle them. */ if (remainder_bytes > 0) { uint64_t tmp = 0; memcpy(&tmp, data + (block_index * 8), (size_t)remainder_bytes); /* swap if needed */ #if defined(WOTA_LITTLE_ENDIAN) tmp = wota_bswap64(tmp); #endif blocks[block_index] = tmp; block_index++; } /* 3) If leftover_bits != 0, handle the final partial bits bit-by-bit. */ if (leftover_bits != 0) { /* We have leftover_bits up to 7. We'll write them starting from the MSB. */ /* We'll write them from data[full_bytes]. */ /* The partial block is blocks[block_index - 1] if remainder_bytes>0, else blocks[block_index]. */ size_t partial_idx; if (remainder_bytes > 0) { partial_idx = block_index - 1; } else { partial_idx = block_index; } uint64_t outword = blocks[partial_idx]; #if defined(WOTA_LITTLE_ENDIAN) /* We want to unify our approach: the block is currently in "MSB=first bit" form. Actually, let's do direct approach: re-swap? */ /* For safety, let's swap back, set bits, then swap again. Another approach is to set bits from the top down. */ outword = wota_bswap64(outword); #endif unsigned long long bits_used = remainder_bytes * 8ULL; /* how many bits we've used in this block so far if remainder_bytes>0 */ int bitpos = 63 - (int)bits_used; /* start from MSB downwards */ for (unsigned long long b = 0; b < leftover_bits; b++) { int bitval = ( (unsigned char)data[full_bytes] >> b ) & 1; outword |= ((uint64_t)bitval << (bitpos)); bitpos--; } #if defined(WOTA_LITTLE_ENDIAN) outword = wota_bswap64(outword); #endif blocks[partial_idx] = outword; } } void wota_write_text_len(WotaBuffer *wb, const char *utf8, size_t nbytes) { if (!utf8) utf8 = ""; /* preamble => top 56 bits = nbytes, LSB=0x05 */ uint64_t preamble = ((uint64_t)nbytes << 8) | (uint64_t)WOTA_TEXT; uint64_t *pw = wota_buffer_alloc(wb, 1); pw[0] = preamble; /* pack UTF-8 bytes into 64-bit words, 8 bytes per word */ size_t nwords = (nbytes + 7) / 8; if (nwords == 0) { return; } uint64_t *blocks = wota_buffer_alloc(wb, nwords); memset(blocks, 0, nwords * sizeof(uint64_t)); for (size_t i = 0; i < nwords; i++) { uint64_t wval = 0; for (int j = 0; j < 8 && (i * 8 + j) < nbytes; j++) { wval |= ((uint64_t)(unsigned char)utf8[i * 8 + j]) << (56 - j * 8); } blocks[i] = wval; } } void wota_write_text(WotaBuffer *wb, const char *utf8) { if (!utf8) utf8 = ""; wota_write_text_len(wb, utf8, strlen(utf8)); } void wota_write_array(WotaBuffer *wb, unsigned long long count) { /* single 64-bit word => top 56 bits = count, LSB=0x02 */ uint64_t w = ((uint64_t)count << 8) | (uint64_t)WOTA_ARR; uint64_t *p = wota_buffer_alloc(wb, 1); p[0] = w; } void wota_write_record(WotaBuffer *wb, unsigned long long count) { /* single 64-bit word => top 56 bits = count, LSB=0x03 */ uint64_t w = ((uint64_t)count << 8) | (uint64_t)WOTA_REC; uint64_t *p = wota_buffer_alloc(wb, 1); p[0] = w; } /* wota_write_number: If n is an integer (within 2^53 range) you might store as int, or specifically check if it fits in 56 bits. If it does, store as WOTA_INT. Otherwise store as WOTA_FLOAT (raw double). */ void wota_write_number(WotaBuffer *wb, double n) { /* Is it integral within 2^53? Quick check: */ double ip; double frac = modf(n, &ip); if (frac == 0.0) { /* candidate integer */ long long i = (long long)ip; if ((double)i == ip && fits_in_56_bits(i)) { /* store as a 56-bit integer */ wota_write_int_word(wb, i); return; } } /* fallback: store as double */ wota_write_float_word(wb, n); } #endif /* WOTA_IMPLEMENTATION */ #endif /* WOTA_H */