#ifndef NOTA_H #define NOTA_H #include #include #include "kim.h" #define NOTA_BLOB 0x00 #define NOTA_TEXT 0x10 #define NOTA_ARR 0x20 #define NOTA_REC 0x30 #define NOTA_FLOAT 0x40 #define NOTA_INT 0x60 #define NOTA_SYM 0x70 #define NOTA_NULL 0x00 #define NOTA_FALSE 0x02 #define NOTA_TRUE 0x03 #define NOTA_INF 0x03 #define NOTA_PRIVATE 0x08 #define NOTA_SYSTEM 0x09 #define NOTA_CONT 0x80 #define NOTA_DATA 0x7f #define NOTA_INT_DATA 0x07 #define NOTA_INT_SIGN(CHAR) (CHAR & (1<<3)) #define NOTA_SIG_SIGN(CHAR) (CHAR & (1<<3)) #define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4)) #define NOTA_TYPE 0x70 #define NOTA_HEAD_DATA 0x0f #define CONTINUE(CHAR) (CHAR>>7) #define UTF8_DATA 0x3f static inline int nota_type(const char *nota) { return (*nota) & 0x70; } char *nota_read_blob(long long *len, char **blob, char *nota); char *nota_read_text(char **text, char *nota); char *nota_read_array(long long *len, char *nota); char *nota_read_record(long long *len, char *nota); char *nota_read_float(double *d, char *nota); char *nota_read_int(long long *n, char *nota); char *nota_read_sym(int *sym, char *nota); typedef struct NotaBuffer { char *data; size_t size; /* number of bytes used */ size_t capacity; /* allocated size of data */ } NotaBuffer; void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity); void nota_buffer_free(NotaBuffer *nb); void nota_write_blob (NotaBuffer *nb, unsigned long long nbits, const char *data); void nota_write_text (NotaBuffer *nb, const char *s); void nota_write_array (NotaBuffer *nb, unsigned long long count); void nota_write_record(NotaBuffer *nb, unsigned long long count); void nota_write_number(NotaBuffer *nb, double n); void nota_write_sym (NotaBuffer *nb, int sym); #ifdef NOTA_IMPLEMENTATION #include #include #include #include #include static inline char *nota_skip(char *nota) { while (CONTINUE(*nota)) nota++; return nota + 1; } char *nota_read_num(long long *n, char *nota) { if (!n) return nota_skip(nota); unsigned char b = (unsigned char)*nota; long long result = b & NOTA_HEAD_DATA; nota++; while (b & NOTA_CONT) { b = (unsigned char)*nota++; result = (result << 7) | (b & NOTA_DATA); } *n = result; return nota; } /* Count how many bits of varint we need to encode n, with sb “special bits” in the first byte */ static inline int nota_bits(long long n, int sb) { if (n == 0) return sb; int bits = (sizeof(n)*CHAR_BIT) - __builtin_clzll(n); bits -= sb; int needed = ((bits + 6) / 7)*7 + sb; return needed; } static inline char *nota_continue_num(long long n, char *nota, int sb) { int bits = nota_bits(n, sb); bits -= sb; if (bits > 0) nota[0] |= NOTA_CONT; else nota[0] &= ~NOTA_CONT; int shex = (~0) << sb; nota[0] &= shex; /* clear sb bits */ nota[0] |= (~shex) & ((unsigned long long)n >> bits); int i = 1; while (bits > 0) { bits -= 7; int head = (bits == 0) ? 0 : NOTA_CONT; nota[i] = head | (NOTA_DATA & (n >> bits)); i++; } return ¬a[i]; } char *nota_read_blob(long long *len, char **blob, char *nota) { if (!len) return nota; nota = nota_read_num(len, nota); int bytes = (int)floor((*len + 7) / 8.0); *len = bytes; *blob = (char *)malloc(bytes); memcpy(*blob, nota, bytes); return nota + bytes; } char *nota_read_text(char **text, char *nota) { long long chars; nota = nota_read_num(&chars, nota); char utf[chars*4 + 1]; /* enough for wide chars + null */ char *pp = utf; kim_to_utf8(¬a, &pp, chars); *pp = 0; *text = strdup(utf); return nota; } char *nota_read_array(long long *len, char *nota) { if (!len) return nota; return nota_read_num(len, nota); } char *nota_read_record(long long *len, char *nota) { if (!len) return nota; return nota_read_num(len, nota); } char *nota_read_float(double *d, char *nota) { if (!d) { return nota_skip(nota); } int neg = NOTA_SIG_SIGN(*nota); int esign = NOTA_EXP_SIGN(*nota); long long e = (*nota) & NOTA_INT_DATA; while (CONTINUE(*nota)) { nota++; e = (e << 7) | ((*nota) & NOTA_DATA); } nota++; long long sig = (*nota) & NOTA_DATA; while (CONTINUE(*nota)) { nota++; sig = (sig << 7) | ((*nota) & NOTA_DATA); } nota++; if (neg) sig = -sig; if (esign) e = -e; *d = (double)sig * pow(10.0, (double)e); return nota; } char *nota_read_int(long long *n, char *nota) { if (!n) return nota_skip(nota); *n = 0; char *c = nota; *n |= (*c) & NOTA_INT_DATA; while (CONTINUE(*(c++))) { *n = (*n << 7) | (*c & NOTA_DATA); } /* if sign bit is set in the first byte, negative. */ if (NOTA_INT_SIGN(*nota)) *n = -*n; return c; } char *nota_read_sym(int *sym, char *nota) { if (sym) *sym = ((*nota) & 0x0f); return nota + 1; } static void nota_buffer_grow(NotaBuffer *nb, size_t min_add) { size_t needed = nb->size + min_add; if (needed <= nb->capacity) return; size_t new_cap = (nb->capacity == 0 ? 64 : nb->capacity * 2); while (new_cap < needed) { new_cap *= 2; } char *new_data = (char *)realloc(nb->data, new_cap); if (!new_data) { fprintf(stderr, "realloc failed in nota_buffer_grow\n"); abort(); } nb->data = new_data; nb->capacity = new_cap; } void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity) { nb->data = NULL; nb->size = 0; nb->capacity = 0; if (initial_capacity > 0) { nb->data = (char *)malloc(initial_capacity); if (!nb->data) { fprintf(stderr, "malloc failed in nota_buffer_init\n"); abort(); } nb->capacity = initial_capacity; } } void nota_buffer_free(NotaBuffer *nb) { if (nb->data) free(nb->data); nb->data = NULL; nb->size = 0; nb->capacity = 0; } static char *nota_buffer_alloc(NotaBuffer *nb, size_t len) { nota_buffer_grow(nb, len); char *p = nb->data + nb->size; nb->size += len; return p; } static void nota_write_int_buf(NotaBuffer *nb, long long n); static void nota_write_float_buf(NotaBuffer *nb, double d); static void nota_write_int_or_float_buf(NotaBuffer *nb, double n) { if (n < (double)INT64_MIN || n > (double)INT64_MAX) { nota_write_float_buf(nb, n); return; } double ip; double frac = modf(n, &ip); if (fabs(frac) < 1e-14) nota_write_int_buf(nb, (long long)ip); else nota_write_float_buf(nb, n); } void nota_write_sym(NotaBuffer *nb, int sym) { char *p = nota_buffer_alloc(nb, 1); *p = NOTA_SYM | (sym & 0x0f); } void nota_write_blob(NotaBuffer *nb, unsigned long long nbits, const char *data) { unsigned long long bytes_len = (nbits + 7ULL) >> 3; char *p = nota_buffer_alloc(nb, 1 + 10 + bytes_len); p[0] = NOTA_BLOB; char *end = nota_continue_num(nbits, p, 4); size_t varint_used = (size_t)(end - p - 1); memcpy(end, data, (size_t)bytes_len); size_t total_used = 1 + varint_used + bytes_len; size_t allocated = 1 + 10 + bytes_len; nb->size -= (allocated - total_used); } void nota_write_text(NotaBuffer *nb, const char *s) { /* ASCII fast path: if all bytes < 0x80, KIM == UTF-8 and rune count == byte count */ size_t slen = strlen(s); const unsigned char *scan = (const unsigned char *)s; int is_ascii = 1; for (size_t k = 0; k < slen; k++) { if (scan[k] >= 0x80) { is_ascii = 0; break; } } if (is_ascii) { long long runes = (long long)slen; char *p = nota_buffer_alloc(nb, 1 + 10 + slen); p[0] = NOTA_TEXT; char *end = nota_continue_num(runes, p, 4); memcpy(end, s, slen); size_t used = (size_t)(end - p) + slen; size_t allocated = 1 + 10 + slen; nb->size -= (allocated - used); return; } /* Non-ASCII path: full UTF-8 decode + KIM encode */ long long runes = utf8_count(s); size_t max_kim = (size_t)(runes * 5); char *p = nota_buffer_alloc(nb, 1 + 10 + max_kim); p[0] = NOTA_TEXT; char *end = nota_continue_num(runes, p, 4); char *kim_out = end; const char *utf_in = s; while (*utf_in) { int codepoint = decode_utf8((char **)&utf_in); encode_kim(&kim_out, codepoint); } size_t used = (size_t)(kim_out - p); size_t allocated = 1 + 10 + max_kim; nb->size -= (allocated - used); } void nota_write_array(NotaBuffer *nb, unsigned long long count) { char *p = nota_buffer_alloc(nb, 10); p[0] = NOTA_ARR; char *end = nota_continue_num(count, p, 4); size_t used = (size_t)(end - p); nb->size -= (10 - used); } void nota_write_record(NotaBuffer *nb, unsigned long long count) { char *p = nota_buffer_alloc(nb, 10); p[0] = NOTA_REC; char *end = nota_continue_num(count, p, 4); size_t used = (size_t)(end - p); nb->size -= (10 - used); } void nota_write_number(NotaBuffer *nb, double n) { nota_write_int_or_float_buf(nb, n); } static void nota_write_int_buf(NotaBuffer *nb, long long n) { /* up to ~10 bytes for varint */ char *p = nota_buffer_alloc(nb, 10); char sign = 0; if (n < 0) { sign = 0x08; /* sign bit in the nibble */ n = -n; } p[0] = NOTA_INT | sign; char *end = nota_continue_num(n, p, 3); size_t used = (size_t)(end - p); nb->size -= (10 - used); } static const double nota_pow10_table[29] = { 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28 }; static void extract_mantissa_coefficient(double num, long *coefficient, long *exponent) { if (num == 0.0) { *coefficient = 0; *exponent = 0; return; } double absval = fabs(num); int sign = (num < 0) ? -1 : 1; /* Get decimal exponent via log10 */ int dec_exp = (int)floor(log10(absval)); /* Scale to extract 14-digit coefficient. We want coeff * 10^exp = absval, with coeff having up to 14 digits. So coeff = absval * 10^(13 - dec_exp), exp = dec_exp - 13 */ int shift = 13 - dec_exp; double scaled; if (shift >= 0 && shift <= 28) { scaled = absval * nota_pow10_table[shift]; } else if (shift < 0 && -shift <= 28) { scaled = absval / nota_pow10_table[-shift]; } else { scaled = absval * pow(10.0, (double)shift); } long long coeff = (long long)(scaled + 0.5); /* Correct off-by-one from log10 rounding */ if (coeff >= 100000000000000LL) { coeff = (coeff + 5) / 10; shift--; } else if (coeff < 10000000000000LL && coeff > 0) { coeff = (long long)(absval * pow(10.0, (double)(shift + 1)) + 0.5); shift++; } int exp_out = -shift; /* Strip trailing zeros */ while (coeff != 0 && coeff % 10 == 0) { coeff /= 10; exp_out++; } *coefficient = (long)(coeff * sign); *exponent = (long)exp_out; } static void nota_write_float_buf(NotaBuffer *nb, double d) { if (d == 0.0) { nota_write_int_buf(nb, 0); return; } long coef, exp; extract_mantissa_coefficient(d, &coef, &exp); if (coef == 0) { nota_write_int_buf(nb, 0); return; } int neg = (d < 0.0); if (exp == 0) { nota_write_int_buf(nb, neg ? -coef : coef); return; } char *p = nota_buffer_alloc(nb, 21); p[0] = NOTA_FLOAT; if (neg) p[0] |= (1 << 3); if (exp < 0) { p[0] |= (1 << 4); exp = -exp; } char *c = nota_continue_num(exp, p, 3); char *end = nota_continue_num(labs(coef), c, 7); size_t used = (size_t)(end - p); nb->size -= (21 - used); } #endif /* NOTA_IMPLEMENTATION */ #endif /* NOTA_H */