Minor nota speed improvement; use nota growable array internally so no more fixed size
All checks were successful
Build and Deploy / build-linux (push) Successful in 1m15s
Build and Deploy / build-windows (CLANG64) (push) Successful in 14m57s
Build and Deploy / package-dist (push) Has been skipped
Build and Deploy / deploy-itch (push) Has been skipped
Build and Deploy / deploy-gitea (push) Has been skipped

This commit is contained in:
2025-02-24 11:16:37 -06:00
parent 8ea8f7fec7
commit 35647a5c5b
4 changed files with 669 additions and 671 deletions

View File

@@ -54,20 +54,12 @@ jobs:
- name: Check Out Code - name: Check Out Code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache MSYS2
uses: actions/cache@v3
with:
path: C:\msys64
key: ${{ runner.os }}-msys2-${{ matrix.msystem }}-${{ hashFiles('**/lockfiles', '.github/workflows/*.yml') }}
restore-keys: |
${{ runner.os }}-msys2-${{ matrix.msystem }}-
- name: Setup MSYS2 - name: Setup MSYS2
uses: msys2/setup-msys2@v2 uses: msys2/setup-msys2@v2
with: with:
msystem: ${{ matrix.msystem }} msystem: ${{ matrix.msystem }}
update: true update: true
cache: false cache: true
install: | install: |
git git
zip zip

View File

@@ -2,14 +2,11 @@
#define KIM_H #define KIM_H
// write number of runes from a kim stream int a utf8 stream // write number of runes from a kim stream int a utf8 stream
void utf8_to_kim(const char **utf, char **kim); void utf8_to_kim(const char **utf, char **kim, long long *runeout);
// write number of runes from a kim stream int a utf8 stream // write number of runes from a kim stream int a utf8 stream
void kim_to_utf8(char **kim, char **utf, int runes); void kim_to_utf8(char **kim, char **utf, int runes);
// Return the number of bytes a given utf-8 rune will have
int utf8_bytes(char c);
// Return the number of runes in a utf8 string // Return the number of runes in a utf8 string
int utf8_count(const char *utf8); int utf8_count(const char *utf8);
@@ -24,7 +21,7 @@ void encode_utf8(char **s, int code);
static void encode_kim(char **s, int code); static void encode_kim(char **s, int code);
int decode_kim(char **s); int decode_kim(char **s);
int utf8_bytes(char c) static inline int utf8_bytes(char c)
{ {
int bytes = __builtin_clz(~(c)); int bytes = __builtin_clz(~(c));
if (!bytes) return 1; if (!bytes) return 1;
@@ -105,11 +102,15 @@ int decode_kim(char **s)
return rune; return rune;
} }
void utf8_to_kim(const char **utf, char **kim) void utf8_to_kim(const char **utf, char **kim, long long *runeout)
{ {
const char * str = *utf; const char * str = *utf;
while (*str) long long runes = 0;
while (*str) {
runes++;
encode_kim(kim, decode_utf8(&str)); encode_kim(kim, decode_utf8(&str));
}
if (runeout) *runeout = runes;
} }
void kim_to_utf8(char **kim, char **utf, int runes) void kim_to_utf8(char **kim, char **utf, int runes)

View File

@@ -1,55 +1,26 @@
#ifndef NOTA_H #ifndef NOTA_H
#define NOTA_H #define NOTA_H
#define NOTA_BLOB 0x00 // C 0 0 0 #include <stddef.h>
#define NOTA_TEXT 0x10 // C 0 0 1 #include <stdint.h>
#define NOTA_ARR 0x20 // C 0 1 0
#define NOTA_REC 0x30 // C 0 1 1
#define NOTA_FLOAT 0x40 // C 1 0
#define NOTA_INT 0x60 // C 1 1 0
#define NOTA_SYM 0x70 // C 1 1 1
#define NOTA_NULL 0x00 /* Nota type nibble values */
#define NOTA_BLOB 0x00
#define NOTA_TEXT 0x10
#define NOTA_ARR 0x20
#define NOTA_REC 0x30
#define NOTA_FLOAT 0x40
#define NOTA_INT 0x60
#define NOTA_SYM 0x70
#define NOTA_NULL 0x00
#define NOTA_FALSE 0x02 #define NOTA_FALSE 0x02
#define NOTA_TRUE 0x03 #define NOTA_TRUE 0x03
#define NOTA_INF 0x03 #define NOTA_INF 0x03
#define NOTA_PRIVATE 0x08 #define NOTA_PRIVATE 0x08
#define NOTA_SYSTEM 0x09 #define NOTA_SYSTEM 0x09
// Returns the type NOTA_ of the byte at *nota
int nota_type(char *nota);
// Functions take a pointer to a buffer *nota, read or write the value, and then return a pointer to the next byte of the stream
// Pass NULL into the read in variable to skip over it
char *nota_read_blob(long long *len, char **blob, char *nota);
// ALLOCATES! Uses strdup to return it via the text pointer
char *nota_read_text(char **text, char *nota);
char *nota_read_array(long long *len, char *nota);
char *nota_read_record(long long *len, char *nota);
char *nota_read_float(double *d, char *nota);
char *nota_read_int(long long *l, char *nota);
char *nota_read_sym(int *sym, char *nota);
char *nota_write_blob(unsigned long long n, char *data, char *nota);
char *nota_write_text(const char *s, char *nota);
char *nota_write_array(unsigned long long n, char *nota);
char *nota_write_record(unsigned long long n, char *nota);
char *nota_write_number(double n, char *nota);
char *nota_write_sym(int sym, char *nota);
void print_nota_hex(char *nota);
#ifdef NOTA_IMPLEMENTATION
#include "stdio.h"
#include "math.h"
#include "string.h"
#include "stdlib.h"
#include "limits.h"
#include "kim.h"
/* Some internal constants/macros (used in varint logic, etc.) */
#define NOTA_CONT 0x80 #define NOTA_CONT 0x80
#define NOTA_DATA 0x7f #define NOTA_DATA 0x7f
#define NOTA_INT_DATA 0x07 #define NOTA_INT_DATA 0x07
@@ -58,38 +29,74 @@ void print_nota_hex(char *nota);
#define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4)) #define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4))
#define NOTA_TYPE 0x70 #define NOTA_TYPE 0x70
#define NOTA_HEAD_DATA 0x0f #define NOTA_HEAD_DATA 0x0f
#define CONTINUE(CHAR) (CHAR>>7) #define CONTINUE(CHAR) ((CHAR)>>7)
#define UTF8_DATA 0x3f #define UTF8_DATA 0x3f
/* define this to use native string instead of kim. Bytes are encoded instead of runes */ /* A helper to get the high-level Nota type nibble from a byte */
#define NOTA_UTF8 static inline int nota_type(const char *nota) { return (*nota) & 0x70; }
int nota_type(char *nota) { return *nota & NOTA_TYPE; } char *nota_read_blob(long long *len, char **blob, char *nota);
char *nota_read_text(char **text, char *nota);
char *nota_read_array(long long *len, char *nota);
char *nota_read_record(long long *len, char *nota);
char *nota_read_float(double *d, char *nota);
char *nota_read_int(long long *n, char *nota);
char *nota_read_sym(int *sym, char *nota);
char *nota_skip(char *nota) typedef struct NotaBuffer {
char *data;
size_t size; /* number of bytes used */
size_t capacity; /* allocated size of data */
} NotaBuffer;
/* Initialize a NotaBuffer with a given initial capacity. */
void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity);
/* Free the buffer's internal memory. (Does NOT free nb itself.) */
void nota_buffer_free(NotaBuffer *nb);
void nota_write_blob (NotaBuffer *nb, unsigned long long nbits, const char *data);
void nota_write_text (NotaBuffer *nb, const char *s);
void nota_write_array (NotaBuffer *nb, unsigned long long count);
void nota_write_record(NotaBuffer *nb, unsigned long long count);
void nota_write_number(NotaBuffer *nb, double n);
void nota_write_sym (NotaBuffer *nb, int sym);
#ifdef NOTA_IMPLEMENTATION
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include "kim.h"
/* -------------------------------------------------------
HELPER: skip a varint
------------------------------------------------------- */
static inline char *nota_skip(char *nota)
{ {
while (CONTINUE(*nota)) while (CONTINUE(*nota)) {
nota++; nota++;
}
return nota+1; return nota + 1;
} }
/* -------------------------------------------------------
HELPER: read a varint
------------------------------------------------------- */
char *nota_read_num(long long *n, char *nota) char *nota_read_num(long long *n, char *nota)
{ {
if (!n) { if (!n) {
return nota_skip(nota); return nota_skip(nota);
} }
unsigned char b = (unsigned char)*nota;
// Start by reading the first byte long long result = b & NOTA_HEAD_DATA;
unsigned char b = *nota;
int result = b & NOTA_HEAD_DATA; // lower bits
nota++; nota++;
// While the top bit is set, read more 7-bit chunks while (b & NOTA_CONT) {
while (CONTINUE(b)) { b = (unsigned char)*nota++;
b = *nota;
nota++;
result = (result << 7) | (b & NOTA_DATA); result = (result << 7) | (b & NOTA_DATA);
} }
@@ -97,458 +104,358 @@ char *nota_read_num(long long *n, char *nota)
return nota; return nota;
} }
// Given a number n, and bits used in the first char sb, how many bits are needed /* Count how many bits of varint we need to encode n,
int nota_bits(long long n, int sb) with sb “special bits” in the first byte. */
static inline int nota_bits(long long n, int sb)
{ {
if (n == 0) return sb; if (n == 0) return sb;
int bits = sizeof(n)*CHAR_BIT - __builtin_clzll(n); int bits = (sizeof(n)*CHAR_BIT) - __builtin_clzll(n);
bits-=sb; bits -= sb;
int needed = ((bits + 6) / 7)*7 + sb; int needed = ((bits + 6) / 7)*7 + sb;
return needed; return needed;
} }
// write a number from n into *nota, with sb bits in the first char /* Write a varint into *nota, with sb bits in the first char (which is already set). */
char *nota_continue_num(long long n, char *nota, int sb) static inline char *nota_continue_num(long long n, char *nota, int sb)
{ {
int bits = nota_bits(n, sb); int bits = nota_bits(n, sb);
bits -= sb; bits -= sb;
if (bits > 0)
nota[0] |= NOTA_CONT;
else
nota[0] &= ~NOTA_CONT;
int shex = (~0) << sb; if (bits > 0)
nota[0] &= shex; /* clear shex bits */ nota[0] |= NOTA_CONT;
nota[0] |= (~shex) & (n>>bits); else
nota[0] &= ~NOTA_CONT;
int i = 1; int shex = (~0) << sb;
while (bits > 0) { nota[0] &= shex; /* clear sb bits */
bits -= 7; nota[0] |= (~shex) & ((unsigned long long)n >> bits);
int head = bits == 0 ? 0 : NOTA_CONT;
nota[i] = head | (NOTA_DATA & (n >> bits));
i++;
}
return &nota[i]; int i = 1;
} while (bits > 0) {
bits -= 7;
int head = (bits == 0) ? 0 : NOTA_CONT;
void print_nota_hex(char *nota) nota[i] = head | (NOTA_DATA & (n >> bits));
{ i++;
do {
printf("%02X ", (unsigned char)(*nota));
} while(CONTINUE(*(nota++)));
printf("\n");
return;
long long chars = 0;
if (!((*nota>>4 & 0x07) ^ NOTA_TEXT>>4))
nota_read_num(&chars, nota);
if ((*nota>>5) == 2 || (*nota>>5) == 6)
chars = 1;
for (int i = 0; i < chars+1; i++) {
do {
printf("%02X ", (unsigned char)(*nota));
} while(CONTINUE(*(nota++)));
}
printf("\n");
}
char *nota_write_int(long long n, char *nota)
{
char sign = 0;
if (n < 0) {
sign = 0x08;
n *= -1;
}
*nota = NOTA_INT | sign;
return nota_continue_num(n, nota, 3);
}
#define NOTA_DBL_PREC 6
#define xstr(s) str(s)
#define str(s) #s
#include <stdio.h>
#include <math.h>
void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
{
if (num == 0.0) {
*coefficient = 0;
*exponent = 0;
return;
}
// Optional: handle sign separately if you want 'coefficient' always positive.
// For simplicity, let's just let atol(...) parse the sign if it's there.
// 1) Slightly round the number to avoid too many FP trailing digits:
// Example: Round to 12 decimal places.
double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12;
if (num < 0) {
rounded = -rounded;
}
// 2) Convert to string with fewer digits of precision so we do NOT get
// the long binary-fraction expansions (like 98.599999999999994).
char buf[64];
snprintf(buf, sizeof(buf), "%.14g", rounded);
// 3) Look for scientific notation
char *exp_pos = strpbrk(buf, "eE");
long exp_from_sci = 0;
if (exp_pos) {
exp_from_sci = atol(exp_pos + 1);
*exp_pos = '\0'; // Truncate the exponent part from the string
} }
// 4) Find decimal point return &nota[i];
char *dec_point = strchr(buf, '.');
int digits_after_decimal = 0;
if (dec_point) {
digits_after_decimal = (int)strlen(dec_point + 1);
// Remove the '.' by shifting the remainder left
memmove(dec_point, dec_point + 1, strlen(dec_point));
}
// 5) Now the string is just an integer (possibly signed), so parse it
long long coeff_ll = atoll(buf); // support up to 64-bit range
*coefficient = (long)coeff_ll;
// 6) The final decimal exponent is whatever came from 'e/E'
// minus however many digits we removed by removing the decimal point.
*exponent = exp_from_sci - digits_after_decimal;
}
char *nota_write_decimal_str(const char *decimal, char *nota)
{
// Handle negative sign
int neg = (decimal[0] == '-');
if (neg) decimal++; // Skip the '-' if present
// Parse integer part
long coef = 0;
long exp = 0;
int decimal_point_seen = 0;
const char *ptr = decimal;
int has_exponent = 0;
// First pass: calculate coefficient up to 'e' or 'E'
while (*ptr && *ptr != 'e' && *ptr != 'E') {
if (*ptr == '.') {
decimal_point_seen = 1;
ptr++;
continue;
}
if (*ptr >= '0' && *ptr <= '9') {
coef = coef * 10 + (*ptr - '0');
if (decimal_point_seen) {
exp--; // Each digit after decimal point decreases exponent
}
}
ptr++;
}
// Parse exponent part if present
if (*ptr == 'e' || *ptr == 'E') {
has_exponent = 1;
ptr++; // Skip 'e' or 'E'
int exp_sign = 1;
if (*ptr == '-') {
exp_sign = -1;
ptr++;
} else if (*ptr == '+') {
ptr++;
}
long explicit_exp = 0;
while (*ptr >= '0' && *ptr <= '9') {
explicit_exp = explicit_exp * 10 + (*ptr - '0');
ptr++;
}
exp += exp_sign * explicit_exp;
}
// If no decimal point and no exponent, treat as integer
if (!decimal_point_seen && !has_exponent) {
return nota_write_int(coef * (neg ? -1 : 1), nota);
}
// Remove trailing zeros from coefficient
while (coef > 0 && coef % 10 == 0 && exp < 0) {
coef /= 10;
exp++;
}
// Handle zero case
if (coef == 0) {
return nota_write_int(0, nota);
}
// Set up the notation format similar to nota_write_float
int expsign = exp < 0 ? ~0 : 0;
exp = llabs(exp);
nota[0] = NOTA_FLOAT;
nota[0] |= (expsign & 1) << 4; // Exponent sign bit
nota[0] |= (neg & 1) << 3; // Number sign bit
char *c = nota_continue_num(exp, nota, 3);
return nota_continue_num(coef, c, 7);
}
char *nota_write_float(double n, char *nota)
{
int neg = (n < 0);
long coef;
long exp;
extract_mantissa_coefficient(n, &coef, &exp);
// Store integer if exponent is zero
if (exp == 0)
return nota_write_int(coef * (neg ? -1 : 1), nota);
int expsign = exp < 0 ? ~0 : 0;
exp = labs(exp);
nota[0] = NOTA_FLOAT;
nota[0] |= (expsign & 1) << 4;
nota[0] |= (neg & 1) << 3;
char *c = nota_continue_num(exp, nota, 3);
return nota_continue_num(labs(coef), c, 7);
}
char *nota_read_float_str(char **d, char *nota)
{
// Extract sign bits from the first byte
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
// Read the exponents lower 3 bits from the first byte
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
// Count exponent bytes and accumulate value
int e_bytes = 1;
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
e_bytes++;
}
// Move past the last exponent byte
nota++;
// Read the mantissa
long long sig = (*nota) & NOTA_DATA;
int sig_bytes = 1;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
sig_bytes++;
}
// Move past the last mantissa byte
nota++;
// Apply sign bits
if (neg) sig = -sig;
if (esign) e = -e;
// Calculate digits in mantissa (sig) and exponent (e)
int sig_digits = (sig == 0) ? 1 : (int)log10(llabs(sig)) + 1;
int e_digits = (e == 0) ? 1 : (int)log10(llabs(e)) + 1;
// Calculate total string size:
// - Mantissa: sign (1), digits, decimal (1), 2 decimal places
// - Exponent: 'e', sign (1), digits
// - Null terminator (1)
int size = 1 + sig_digits + 1 + 2 + 1 + 1 + e_digits + 1;
if (neg) size++; // Extra space for negative mantissa
if (esign) size++; // Extra space for negative exponent
// Allocate the string
char *result = (char *)malloc(size);
if (!result) {
*d = NULL;
return nota; // Return current position even on failure
}
// Format the string as "xey" (e.g., "1.23e4")
double value = (double)sig * pow(10.0, (double)e);
snprintf(result, size, "%.*fe%lld", 2, value/pow(10.0, (double)e), e);
// Set the output pointer and return
*d = result;
return nota;
}
char *nota_read_float(double *d, char *nota)
{
// If the caller passed NULL for d, just skip over the float encoding
if (!d) {
return nota_skip(nota);
}
// Extract sign bits from the first byte
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
// Read the exponents lower 3 bits from the first byte
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
// While the continuation bit is set, advance and accumulate exponent
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
}
// Move past the last exponent byte
nota++;
// Now read the mantissa in the same variable-length style
long long sig = (*nota) & NOTA_DATA;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
}
// Move past the last mantissa byte
nota++;
// Apply sign bits
if (neg) sig = -sig;
if (esign) e = -e;
// Finally compute the double value: mantissa * 10^exponent
*d = (double)sig * pow(10.0, (double)e);
// Return the pointer to wherever we ended
return nota;
}
char *nota_write_number(double n, char *nota)
{
if (n < (double)INT64_MIN || n > (double)INT64_MAX) return nota_write_float(n, nota);
double int_part;
double frac = modf(n, &int_part);
if (fabs(frac) < 1e-14)
return nota_write_int((long long)int_part, nota);
else
return nota_write_float(n, nota);
}
char *nota_read_int(long long *n, char *nota)
{
if (!n)
return nota_skip(nota);
*n = 0;
char *c = nota;
*n |= (*c) & NOTA_INT_DATA; /* first three bits */
while (CONTINUE(*(c++)))
*n = (*n<<7) | (*c & NOTA_DATA);
if (NOTA_INT_SIGN(*nota)) *n *= -1;
return c;
}
/* n is the number of bits */
char *nota_write_blob(unsigned long long n, char *data, char *nota)
{
nota[0] = NOTA_BLOB;
nota = nota_continue_num(n, nota, 4);
int bytes = floor((n+7)/8);
for (int i = 0; i < bytes; i++)
nota[i] = data[i];
return nota+bytes;
}
char *nota_write_array(unsigned long long n, char *nota)
{
nota[0] = NOTA_ARR;
return nota_continue_num(n, nota, 4);
}
char *nota_read_array(long long *len, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
}
char *nota_read_record(long long *len, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
} }
char *nota_read_blob(long long *len, char **blob, char *nota) char *nota_read_blob(long long *len, char **blob, char *nota)
{ {
if (!len) return nota; if (!len) return nota;
nota = nota_read_num(len,nota); nota = nota_read_num(len, nota);
int bytes = floor((*len+7)/8); int bytes = (int)floor((*len + 7) / 8.0);
*len = bytes; *len = bytes;
*blob = (char *)malloc(bytes);
*blob = malloc(bytes); memcpy(*blob, nota, bytes);
memcpy(*blob,nota,bytes); return nota + bytes;
return nota+bytes;
}
char *nota_write_record(unsigned long long n, char *nota)
{
nota[0] = NOTA_REC;
return nota_continue_num(n, nota, 4);
}
char *nota_write_sym(int sym, char *nota)
{
*nota = NOTA_SYM | sym;
return nota+1;
}
char *nota_read_sym(int *sym, char *nota)
{
if (sym) *sym = (*nota) & 0x0f;
return nota+1;
} }
char *nota_read_text(char **text, char *nota) char *nota_read_text(char **text, char *nota)
{ {
long long chars; long long chars;
nota = nota_read_num(&chars, nota); nota = nota_read_num(&chars, nota);
char utf[chars*4]; // enough for the worst case scenario char utf[chars*4 + 1]; /* enough for wide chars + null */
char *pp = utf; char *pp = utf;
kim_to_utf8(&nota, &pp, chars); kim_to_utf8(&nota, &pp, chars);
*pp = 0; *pp = 0;
*text = strdup(utf); *text = strdup(utf);
return nota;
return nota;
} }
char *nota_write_text(const char *s, char *nota) char *nota_read_array(long long *len, char *nota)
{ {
nota[0] = NOTA_TEXT; if (!len) return nota;
long long n = utf8_count(s); return nota_read_num(len, nota);
nota = nota_continue_num(n,nota,4);
utf8_to_kim(&s, &nota);
return nota;
} }
#endif char *nota_read_record(long long *len, char *nota)
#endif {
if (!len) return nota;
return nota_read_num(len, nota);
}
char *nota_read_float(double *d, char *nota)
{
if (!d) {
return nota_skip(nota);
}
int neg = NOTA_SIG_SIGN(*nota);
int esign = NOTA_EXP_SIGN(*nota);
long long e = (*nota) & NOTA_INT_DATA;
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA);
}
nota++;
long long sig = (*nota) & NOTA_DATA;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
}
nota++;
if (neg) sig = -sig;
if (esign) e = -e;
*d = (double)sig * pow(10.0, (double)e);
return nota;
}
char *nota_read_int(long long *n, char *nota)
{
if (!n) return nota_skip(nota);
*n = 0;
char *c = nota;
*n |= (*c) & NOTA_INT_DATA;
while (CONTINUE(*(c++))) {
*n = (*n << 7) | (*c & NOTA_DATA);
}
/* if sign bit is set in the first byte, negative. */
if (NOTA_INT_SIGN(*nota)) *n = -*n;
return c;
}
char *nota_read_sym(int *sym, char *nota)
{
if (sym) *sym = ((*nota) & 0x0f);
return nota + 1;
}
static void nota_buffer_grow(NotaBuffer *nb, size_t min_add)
{
size_t needed = nb->size + min_add;
if (needed <= nb->capacity) return;
size_t new_cap = (nb->capacity == 0 ? 64 : nb->capacity * 2);
while (new_cap < needed) {
new_cap *= 2;
}
char *new_data = (char *)realloc(nb->data, new_cap);
if (!new_data) {
fprintf(stderr, "realloc failed in nota_buffer_grow\n");
abort();
}
nb->data = new_data;
nb->capacity = new_cap;
}
void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity)
{
nb->data = NULL;
nb->size = 0;
nb->capacity = 0;
if (initial_capacity > 0) {
nb->data = (char *)malloc(initial_capacity);
if (!nb->data) {
fprintf(stderr, "malloc failed in nota_buffer_init\n");
abort();
}
nb->capacity = initial_capacity;
}
}
void nota_buffer_free(NotaBuffer *nb)
{
if (nb->data) free(nb->data);
nb->data = NULL;
nb->size = 0;
nb->capacity = 0;
}
/* Allocate 'len' bytes in the buffer and return a pointer to them. */
static char *nota_buffer_alloc(NotaBuffer *nb, size_t len)
{
nota_buffer_grow(nb, len);
char *p = nb->data + nb->size;
nb->size += len;
return p;
}
static void nota_write_int_buf(NotaBuffer *nb, long long n);
static void nota_write_float_buf(NotaBuffer *nb, double d);
static void nota_write_int_or_float_buf(NotaBuffer *nb, double n)
{
if (n < (double)INT64_MIN || n > (double)INT64_MAX) {
nota_write_float_buf(nb, n);
return;
}
double ip;
double frac = modf(n, &ip);
if (fabs(frac) < 1e-14)
nota_write_int_buf(nb, (long long)ip);
else
nota_write_float_buf(nb, n);
}
void nota_write_sym(NotaBuffer *nb, int sym)
{
char *p = nota_buffer_alloc(nb, 1);
*p = NOTA_SYM | (sym & 0x0f);
}
void nota_write_blob(NotaBuffer *nb, unsigned long long nbits, const char *data)
{
unsigned long long bytes_len = (nbits + 7ULL) >> 3;
char *p = nota_buffer_alloc(nb, 1 + 10 + bytes_len);
p[0] = NOTA_BLOB;
char *end = nota_continue_num(nbits, p, 4);
size_t varint_used = (size_t)(end - p - 1);
memcpy(end, data, (size_t)bytes_len);
size_t total_used = 1 + varint_used + bytes_len;
size_t allocated = 1 + 10 + bytes_len;
nb->size -= (allocated - total_used);
}
void nota_write_text(NotaBuffer *nb, const char *s)
{
long long runes = utf8_count(s);
size_t max_kim = (size_t)(runes * 5);
char *p = nota_buffer_alloc(nb, 1 + 10 + max_kim);
p[0] = NOTA_TEXT;
char *end = nota_continue_num(runes, p, 4);
char *kim_out = end;
const char *utf_in = s;
while (*utf_in) {
int codepoint = decode_utf8((char **)&utf_in);
encode_kim(&kim_out, codepoint);
}
size_t used = (size_t)(kim_out - p);
size_t allocated = 1 + 10 + max_kim;
nb->size -= (allocated - used);
}
void nota_write_array(NotaBuffer *nb, unsigned long long count)
{
char *p = nota_buffer_alloc(nb, 10);
p[0] = NOTA_ARR;
char *end = nota_continue_num(count, p, 4);
size_t used = (size_t)(end - p);
nb->size -= (10 - used);
}
void nota_write_record(NotaBuffer *nb, unsigned long long count)
{
char *p = nota_buffer_alloc(nb, 10);
p[0] = NOTA_REC;
char *end = nota_continue_num(count, p, 4);
size_t used = (size_t)(end - p);
nb->size -= (10 - used);
}
void nota_write_number(NotaBuffer *nb, double n)
{
nota_write_int_or_float_buf(nb, n);
}
/* Write an integer in varint form (with sign bit) */
static void nota_write_int_buf(NotaBuffer *nb, long long n)
{
/* up to ~10 bytes for varint */
char *p = nota_buffer_alloc(nb, 10);
char sign = 0;
if (n < 0) {
sign = 0x08; /* sign bit in the nibble */
n = -n;
}
p[0] = NOTA_INT | sign;
char *end = nota_continue_num(n, p, 3);
size_t used = (size_t)(end - p);
nb->size -= (10 - used);
}
static void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
{
if (num == 0.0) {
*coefficient = 0;
*exponent = 0;
return;
}
/* Round to 12 decimal places to avoid floating artifacts. */
double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12;
if (num < 0) {
rounded = -rounded;
}
char buf[64];
snprintf(buf, sizeof(buf), "%.14g", rounded);
char *exp_pos = strpbrk(buf, "eE");
long exp_from_sci = 0;
if (exp_pos) {
exp_from_sci = atol(exp_pos + 1);
*exp_pos = '\0';
}
char *dec_point = strchr(buf, '.');
int digits_after_decimal = 0;
if (dec_point) {
digits_after_decimal = (int)strlen(dec_point + 1);
memmove(dec_point, dec_point + 1, strlen(dec_point));
}
long long coeff_ll = atoll(buf);
*coefficient = (long)coeff_ll;
*exponent = exp_from_sci - digits_after_decimal;
}
static void nota_write_float_buf(NotaBuffer *nb, double d)
{
if (d == 0.0) {
nota_write_int_buf(nb, 0);
return;
}
long coef, exp;
extract_mantissa_coefficient(d, &coef, &exp);
if (coef == 0) {
nota_write_int_buf(nb, 0);
return;
}
int neg = (d < 0.0);
if (exp == 0) {
nota_write_int_buf(nb, neg ? -coef : coef);
return;
}
char *p = nota_buffer_alloc(nb, 21);
p[0] = NOTA_FLOAT;
if (neg) p[0] |= (1 << 3);
if (exp < 0) {
p[0] |= (1 << 4);
exp = -exp;
}
char *c = nota_continue_num(exp, p, 3);
char *end = nota_continue_num(labs(coef), c, 7);
size_t used = (size_t)(end - p);
nb->size -= (21 - used);
}
#endif /* NOTA_IMPLEMENTATION */
#endif /* NOTA_H */

View File

@@ -4,194 +4,291 @@
#define NOTA_IMPLEMENTATION #define NOTA_IMPLEMENTATION
#include "nota.h" #include "nota.h"
typedef struct NotaEncodeContext {
JSContext *ctx;
JSValue visitedStack;
NotaBuffer nb; // use the dynamic NotaBuffer
int cycle;
} NotaEncodeContext;
static void nota_stack_push(NotaEncodeContext *enc, JSValueConst val)
{
JSContext *ctx = enc->ctx;
int len = JS_ArrayLength(ctx, enc->visitedStack);
JS_SetPropertyInt64(ctx, enc->visitedStack, len, JS_DupValue(ctx, val));
}
static void nota_stack_pop(NotaEncodeContext *enc)
{
JSContext *ctx = enc->ctx;
int len = JS_ArrayLength(ctx, enc->visitedStack);
JS_SetPropertyStr(ctx, enc->visitedStack, "length", JS_NewUint32(ctx, len - 1));
}
static int nota_stack_has(NotaEncodeContext *enc, JSValueConst val)
{
JSContext *ctx = enc->ctx;
int len = JS_ArrayLength(ctx, enc->visitedStack);
for (int i = 0; i < len; i++) {
JSValue elem = JS_GetPropertyUint32(ctx, enc->visitedStack, i);
if (JS_IsObject(elem) && JS_IsObject(val)) {
if (JS_VALUE_GET_OBJ(elem) == JS_VALUE_GET_OBJ(val)) {
JS_FreeValue(ctx, elem);
return 1;
}
}
JS_FreeValue(ctx, elem);
}
return 0;
}
JSValue number; JSValue number;
char *js_do_nota_decode(JSContext *js, JSValue *tmp, char *nota) char *js_do_nota_decode(JSContext *js, JSValue *tmp, char *nota)
{ {
int type = nota_type(nota); int type = nota_type(nota);
JSValue ret2; JSValue ret2;
long long n; long long n;
double d; double d;
int b; int b;
char *str; char *str;
uint8_t *blob; uint8_t *blob;
switch(type) { switch(type) {
case NOTA_BLOB: case NOTA_BLOB:
nota = nota_read_blob(&n, &blob, nota); nota = nota_read_blob(&n, (char**)&blob, nota);
*tmp = JS_NewArrayBufferCopy(js, blob, n); *tmp = JS_NewArrayBufferCopy(js, blob, n);
free(blob); free(blob);
break; break;
case NOTA_TEXT: case NOTA_TEXT:
nota = nota_read_text(&str, nota);
*tmp = JS_NewString(js, str);
/* TODO: Avoid malloc and free here */
free(str);
break;
case NOTA_ARR:
nota = nota_read_array(&n, nota);
*tmp = JS_NewArray(js);
for (int i = 0; i < n; i++) {
nota = js_do_nota_decode(js, &ret2, nota);
JS_SetPropertyInt64(js, *tmp, i, ret2);
}
break;
case NOTA_REC:
nota = nota_read_record(&n, nota);
*tmp = JS_NewObject(js);
for (int i = 0; i < n; i++) {
nota = nota_read_text(&str, nota); nota = nota_read_text(&str, nota);
nota = js_do_nota_decode(js, &ret2, nota); *tmp = JS_NewString(js, str);
JS_SetPropertyStr(js, *tmp, str, ret2);
free(str); free(str);
} break;
break; case NOTA_ARR:
case NOTA_INT: nota = nota_read_array(&n, nota);
nota = nota_read_int(&n, nota); *tmp = JS_NewArray(js);
*tmp = JS_NewInt64(js,n);
break;
case NOTA_SYM:
nota = nota_read_sym(&b, nota);
switch(b) {
case NOTA_NULL:
*tmp = JS_UNDEFINED;
break;
case NOTA_FALSE:
*tmp = JS_NewBool(js,0);
break;
case NOTA_TRUE:
*tmp = JS_NewBool(js,1);
break;
}
break;
default:
case NOTA_FLOAT:
nota = nota_read_float(&d, nota);
*tmp = JS_NewFloat64(js,d);
break;
}
return nota;
}
// Writers the JSValue v into the buffer of char *nota, returning a pointer to the next byte in nota to be written
char *js_do_nota_encode(JSContext *js, JSValue v, char *nota)
{
int tag = JS_VALUE_GET_TAG(v);
const char *str = NULL;
JSPropertyEnum *ptab;
uint32_t plen;
double nval;
JSValue val;
void *blob;
size_t bloblen;
switch(tag) {
case JS_TAG_FLOAT64:
case JS_TAG_INT:
case JS_TAG_BIG_DECIMAL:
case JS_TAG_BIG_INT:
case JS_TAG_BIG_FLOAT:
JS_ToFloat64(js, &nval, v);
return nota_write_number(nval, nota);
/* str = JS_ToCString(js,v);
nota = nota_write_decimal_str(str, nota);
JS_FreeCString(js,str);
return nota;
*/
case JS_TAG_STRING:
str = JS_ToCString(js, v);
nota = nota_write_text(str, nota);
JS_FreeCString(js, str);
return nota;
case JS_TAG_BOOL:
if (JS_VALUE_GET_BOOL(v)) return nota_write_sym(NOTA_TRUE, nota);
else
return nota_write_sym(NOTA_FALSE, nota);
case JS_TAG_UNDEFINED:
case JS_TAG_NULL:
return nota_write_sym(NOTA_NULL, nota);
case JS_TAG_OBJECT:
blob = JS_GetArrayBuffer(js,&bloblen, v);
if (blob)
return nota_write_blob(bloblen*8, blob, nota);
if (JS_IsArray(js, v)) {
JSValue lengthVal = JS_GetPropertyStr(js, v, "length");
int n;
JS_ToInt32(js, &n, lengthVal);
JS_FreeValue(js, lengthVal);
nota = nota_write_array(n, nota);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
JSValue elemVal = JS_GetPropertyUint32(js, v, i); nota = js_do_nota_decode(js, &ret2, nota);
nota = js_do_nota_encode(js, elemVal, nota); JS_SetPropertyInt64(js, *tmp, i, ret2);
JS_FreeValue(js, elemVal);
} }
return nota; break;
} case NOTA_REC:
nota = nota_read_record(&n, nota);
JS_GetOwnPropertyNames(js, &ptab, &plen, v, JS_GPN_ENUM_ONLY | JS_GPN_STRING_MASK); *tmp = JS_NewObject(js);
nota = nota_write_record(plen, nota); for (int i = 0; i < n; i++) {
nota = nota_read_text(&str, nota);
nota = js_do_nota_decode(js, &ret2, nota);
JS_SetPropertyStr(js, *tmp, str, ret2);
free(str);
}
break;
case NOTA_INT:
nota = nota_read_int(&n, nota);
*tmp = JS_NewInt64(js,n);
break;
case NOTA_SYM:
nota = nota_read_sym(&b, nota);
switch(b) {
case NOTA_NULL:
*tmp = JS_UNDEFINED;
break;
case NOTA_FALSE:
*tmp = JS_NewBool(js,0);
break;
case NOTA_TRUE:
*tmp = JS_NewBool(js,1);
break;
}
break;
default:
case NOTA_FLOAT:
nota = nota_read_float(&d, nota);
*tmp = JS_NewFloat64(js,d);
break;
}
for (int i = 0; i < plen; i++) { return nota;
val = JS_GetProperty(js,v,ptab[i].atom);
str = JS_AtomToCString(js, ptab[i].atom);
JS_FreeAtom(js, ptab[i].atom);
nota = nota_write_text(str, nota);
JS_FreeCString(js, str);
nota = js_do_nota_encode(js, val, nota);
JS_FreeValue(js,val);
}
js_free(js, ptab);
return nota;
default:
return nota;
}
return nota;
} }
JSValue js_nota_encode(JSContext *js, JSValue self, int argc, JSValue *argv) static void nota_encode_value(NotaEncodeContext *enc, JSValueConst val);
{
if (argc < 1)
JS_ThrowInternalError(js, "Expected at least one argument to encode.");
JSValue obj = argv[0]; static void encode_object_properties(NotaEncodeContext *enc, JSValueConst val)
char nota[1024*1024]; {
char *e = js_do_nota_encode(js, obj, nota); JSContext *ctx = enc->ctx;
return JS_NewArrayBufferCopy(js, (unsigned char*)nota, e-nota);
JSPropertyEnum *ptab;
uint32_t plen;
if (JS_GetOwnPropertyNames(ctx, &ptab, &plen, val, JS_GPN_ENUM_ONLY | JS_GPN_STRING_MASK) < 0) {
nota_write_sym(&enc->nb, NOTA_NULL);
return;
}
nota_write_record(&enc->nb, plen);
for (uint32_t i = 0; i < plen; i++) {
// property name
const char *propName = JS_AtomToCString(ctx, ptab[i].atom);
nota_write_text(&enc->nb, propName);
JS_FreeCString(ctx, propName);
// property value
JSValue propVal = JS_GetProperty(ctx, val, ptab[i].atom);
nota_encode_value(enc, propVal);
JS_FreeValue(ctx, propVal);
// free the atom
JS_FreeAtom(ctx, ptab[i].atom);
}
js_free(ctx, ptab);
}
static void nota_encode_value(NotaEncodeContext *enc, JSValueConst val)
{
JSContext *ctx = enc->ctx;
int tag = JS_VALUE_GET_TAG(val);
switch (tag) {
case JS_TAG_INT:
case JS_TAG_BIG_INT:
case JS_TAG_FLOAT64:
case JS_TAG_BIG_DECIMAL:
case JS_TAG_BIG_FLOAT: {
double d;
JS_ToFloat64(ctx, &d, val);
nota_write_number(&enc->nb, d);
return;
}
case JS_TAG_STRING: {
const char *str = JS_ToCString(ctx, val);
nota_write_text(&enc->nb, str);
JS_FreeCString(ctx, str);
return;
}
case JS_TAG_BOOL: {
if (JS_VALUE_GET_BOOL(val))
nota_write_sym(&enc->nb, NOTA_TRUE);
else
nota_write_sym(&enc->nb, NOTA_FALSE);
return;
}
case JS_TAG_NULL:
case JS_TAG_UNDEFINED:
nota_write_sym(&enc->nb, NOTA_NULL);
return;
case JS_TAG_OBJECT: {
size_t bufLen;
void *bufData = JS_GetArrayBuffer(ctx, &bufLen, val);
if (bufData) {
/* Write as a blob of bits (bufLen * 8). */
nota_write_blob(&enc->nb, (unsigned long long)bufLen * 8, (const char*)bufData);
return;
}
if (JS_IsArray(ctx, val)) {
if (nota_stack_has(enc, val)) {
enc->cycle = 1;
return; // bail out
}
nota_stack_push(enc, val);
int arrLen = JS_ArrayLength(ctx, val);
nota_write_array(&enc->nb, arrLen);
for (int i = 0; i < arrLen; i++) {
JSValue elemVal = JS_GetPropertyUint32(ctx, val, i);
nota_encode_value(enc, elemVal);
JS_FreeValue(ctx, elemVal);
}
nota_stack_pop(enc);
return;
}
if (nota_stack_has(enc, val)) {
enc->cycle = 1;
return; // bail out
}
nota_stack_push(enc, val);
encode_object_properties(enc, val);
nota_stack_pop(enc);
return;
}
default:
nota_write_sym(&enc->nb, NOTA_NULL);
return;
}
}
static JSValue js_nota_encode(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv)
{
if (argc < 1)
return JS_ThrowTypeError(ctx, "nota.encode requires 1 argument");
NotaEncodeContext enc_s, *enc = &enc_s;
enc->ctx = ctx;
enc->visitedStack = JS_NewArray(ctx); // empty array initially
enc->cycle = 0;
nota_buffer_init(&enc->nb, 128);
nota_encode_value(enc, argv[0]);
if (enc->cycle) {
JS_FreeValue(ctx, enc->visitedStack);
nota_buffer_free(&enc->nb);
return JS_ThrowReferenceError(ctx, "Tried to encode something to nota with a cycle.");
}
JS_FreeValue(ctx, enc->visitedStack);
size_t totalLen = enc->nb.size; // how many bytes used
void* dataPtr = enc->nb.data; // pointer to the raw data
JSValue ret = JS_NewArrayBufferCopy(ctx, (uint8_t*)dataPtr, totalLen);
nota_buffer_free(&enc->nb);
return ret;
} }
JSValue js_nota_decode(JSContext *js, JSValue self, int argc, JSValue *argv) JSValue js_nota_decode(JSContext *js, JSValue self, int argc, JSValue *argv)
{ {
if (argc < 1) return JS_UNDEFINED; if (argc < 1) return JS_UNDEFINED;
size_t len; size_t len;
unsigned char *nota = JS_GetArrayBuffer(js, &len, argv[0]); unsigned char *nota = JS_GetArrayBuffer(js, &len, argv[0]);
JSValue ret; if (!nota) return JS_UNDEFINED;
js_do_nota_decode(js, &ret, (char*)nota);
return ret; JSValue ret;
js_do_nota_decode(js, &ret, (char*)nota);
return ret;
} }
static const JSCFunctionListEntry js_nota_funcs[] = { static const JSCFunctionListEntry js_nota_funcs[] = {
JS_CFUNC_DEF("encode", 1, js_nota_encode), JS_CFUNC_DEF("encode", 1, js_nota_encode),
JS_CFUNC_DEF("decode", 1, js_nota_decode), JS_CFUNC_DEF("decode", 1, js_nota_decode),
}; };
static int js_nota_init(JSContext *ctx, JSModuleDef *m) { static int js_nota_init(JSContext *ctx, JSModuleDef *m) {
JS_SetModuleExportList(ctx, m, js_nota_funcs, sizeof(js_nota_funcs)/sizeof(JSCFunctionListEntry)); JS_SetModuleExportList(ctx, m, js_nota_funcs,
sizeof(js_nota_funcs)/sizeof(JSCFunctionListEntry));
return 0; return 0;
} }
JSValue js_nota_use(JSContext *js) JSValue js_nota_use(JSContext *js)
{ {
JSValue export = JS_NewObject(js); JSValue export = JS_NewObject(js);
JS_SetPropertyFunctionList(js, export, js_nota_funcs, sizeof(js_nota_funcs)/sizeof(JSCFunctionListEntry)); JS_SetPropertyFunctionList(js, export,
number = JS_GetPropertyStr(js, JS_GetGlobalObject(js), "Number"); js_nota_funcs,
return export; sizeof(js_nota_funcs)/sizeof(JSCFunctionListEntry));
number = JS_GetPropertyStr(js, JS_GetGlobalObject(js), "Number");
return export;
} }
#ifdef JS_SHARED_LIBRARY #ifdef JS_SHARED_LIBRARY
@@ -203,6 +300,7 @@ JSValue js_nota_use(JSContext *js)
JSModuleDef *JS_INIT_MODULE(JSContext *ctx, const char *module_name) { JSModuleDef *JS_INIT_MODULE(JSContext *ctx, const char *module_name) {
JSModuleDef *m = JS_NewCModule(ctx, module_name, js_nota_init); JSModuleDef *m = JS_NewCModule(ctx, module_name, js_nota_init);
if (!m) return NULL; if (!m) return NULL;
JS_AddModuleExportList(ctx, m, js_nota_funcs, sizeof(js_nota_funcs)/sizeof(JSCFunctionListEntry)); JS_AddModuleExportList(ctx, m, js_nota_funcs,
sizeof(js_nota_funcs)/sizeof(JSCFunctionListEntry));
return m; return m;
} }