491 lines
12 KiB
C
Executable File
491 lines
12 KiB
C
Executable File
#ifndef NOTA_H
|
|
#define NOTA_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include "kim.h"
|
|
|
|
#define NOTA_BLOB 0x00
|
|
#define NOTA_TEXT 0x10
|
|
#define NOTA_ARR 0x20
|
|
#define NOTA_REC 0x30
|
|
#define NOTA_FLOAT 0x40
|
|
#define NOTA_INT 0x60
|
|
#define NOTA_SYM 0x70
|
|
|
|
#define NOTA_NULL 0x00
|
|
#define NOTA_FALSE 0x02
|
|
#define NOTA_TRUE 0x03
|
|
#define NOTA_INF 0x03
|
|
#define NOTA_PRIVATE 0x08
|
|
#define NOTA_SYSTEM 0x09
|
|
|
|
#define NOTA_CONT 0x80
|
|
#define NOTA_DATA 0x7f
|
|
#define NOTA_INT_DATA 0x07
|
|
#define NOTA_INT_SIGN(CHAR) (CHAR & (1<<3))
|
|
#define NOTA_SIG_SIGN(CHAR) (CHAR & (1<<3))
|
|
#define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4))
|
|
#define NOTA_TYPE 0x70
|
|
#define NOTA_HEAD_DATA 0x0f
|
|
#define CONTINUE(CHAR) (CHAR>>7)
|
|
#define UTF8_DATA 0x3f
|
|
|
|
static inline int nota_type(const char *nota) { return (*nota) & 0x70; }
|
|
|
|
char *nota_read_blob(long long *len, char **blob, char *nota);
|
|
char *nota_read_text(char **text, char *nota);
|
|
char *nota_read_array(long long *len, char *nota);
|
|
char *nota_read_record(long long *len, char *nota);
|
|
char *nota_read_float(double *d, char *nota);
|
|
char *nota_read_int(long long *n, char *nota);
|
|
char *nota_read_sym(int *sym, char *nota);
|
|
|
|
typedef struct NotaBuffer {
|
|
char *data;
|
|
size_t size; /* number of bytes used */
|
|
size_t capacity; /* allocated size of data */
|
|
} NotaBuffer;
|
|
|
|
void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity);
|
|
|
|
void nota_buffer_free(NotaBuffer *nb);
|
|
|
|
void nota_write_blob (NotaBuffer *nb, unsigned long long nbits, const char *data);
|
|
void nota_write_text (NotaBuffer *nb, const char *s);
|
|
void nota_write_array (NotaBuffer *nb, unsigned long long count);
|
|
void nota_write_record(NotaBuffer *nb, unsigned long long count);
|
|
void nota_write_number(NotaBuffer *nb, double n);
|
|
void nota_write_sym (NotaBuffer *nb, int sym);
|
|
|
|
#ifdef NOTA_IMPLEMENTATION
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include <limits.h>
|
|
|
|
static inline char *nota_skip(char *nota)
|
|
{
|
|
while (CONTINUE(*nota))
|
|
nota++;
|
|
return nota + 1;
|
|
}
|
|
|
|
char *nota_read_num(long long *n, char *nota)
|
|
{
|
|
if (!n)
|
|
return nota_skip(nota);
|
|
|
|
unsigned char b = (unsigned char)*nota;
|
|
long long result = b & NOTA_HEAD_DATA;
|
|
nota++;
|
|
|
|
while (b & NOTA_CONT) {
|
|
b = (unsigned char)*nota++;
|
|
result = (result << 7) | (b & NOTA_DATA);
|
|
}
|
|
|
|
*n = result;
|
|
return nota;
|
|
}
|
|
|
|
/* Count how many bits of varint we need to encode n,
|
|
with sb “special bits” in the first byte */
|
|
static inline int nota_bits(long long n, int sb)
|
|
{
|
|
if (n == 0) return sb;
|
|
int bits = (sizeof(n)*CHAR_BIT) - __builtin_clzll(n);
|
|
bits -= sb;
|
|
int needed = ((bits + 6) / 7)*7 + sb;
|
|
return needed;
|
|
}
|
|
|
|
static inline char *nota_continue_num(long long n, char *nota, int sb)
|
|
{
|
|
int bits = nota_bits(n, sb);
|
|
bits -= sb;
|
|
|
|
if (bits > 0)
|
|
nota[0] |= NOTA_CONT;
|
|
else
|
|
nota[0] &= ~NOTA_CONT;
|
|
|
|
int shex = (~0) << sb;
|
|
nota[0] &= shex; /* clear sb bits */
|
|
nota[0] |= (~shex) & ((unsigned long long)n >> bits);
|
|
|
|
int i = 1;
|
|
while (bits > 0) {
|
|
bits -= 7;
|
|
int head = (bits == 0) ? 0 : NOTA_CONT;
|
|
nota[i] = head | (NOTA_DATA & (n >> bits));
|
|
i++;
|
|
}
|
|
|
|
return ¬a[i];
|
|
}
|
|
|
|
char *nota_read_blob(long long *len, char **blob, char *nota)
|
|
{
|
|
if (!len) return nota;
|
|
nota = nota_read_num(len, nota);
|
|
int bytes = (int)floor((*len + 7) / 8.0);
|
|
*len = bytes;
|
|
*blob = (char *)malloc(bytes);
|
|
memcpy(*blob, nota, bytes);
|
|
return nota + bytes;
|
|
}
|
|
|
|
char *nota_read_text(char **text, char *nota)
|
|
{
|
|
long long chars;
|
|
nota = nota_read_num(&chars, nota);
|
|
|
|
char utf[chars*4 + 1]; /* enough for wide chars + null */
|
|
char *pp = utf;
|
|
kim_to_utf8(¬a, &pp, chars);
|
|
*pp = 0;
|
|
*text = strdup(utf);
|
|
return nota;
|
|
}
|
|
|
|
char *nota_read_array(long long *len, char *nota)
|
|
{
|
|
if (!len) return nota;
|
|
return nota_read_num(len, nota);
|
|
}
|
|
|
|
char *nota_read_record(long long *len, char *nota)
|
|
{
|
|
if (!len) return nota;
|
|
return nota_read_num(len, nota);
|
|
}
|
|
|
|
char *nota_read_float(double *d, char *nota)
|
|
{
|
|
if (!d) {
|
|
return nota_skip(nota);
|
|
}
|
|
|
|
int neg = NOTA_SIG_SIGN(*nota);
|
|
int esign = NOTA_EXP_SIGN(*nota);
|
|
|
|
long long e = (*nota) & NOTA_INT_DATA;
|
|
while (CONTINUE(*nota)) {
|
|
nota++;
|
|
e = (e << 7) | ((*nota) & NOTA_DATA);
|
|
}
|
|
nota++;
|
|
|
|
long long sig = (*nota) & NOTA_DATA;
|
|
while (CONTINUE(*nota)) {
|
|
nota++;
|
|
sig = (sig << 7) | ((*nota) & NOTA_DATA);
|
|
}
|
|
nota++;
|
|
|
|
if (neg) sig = -sig;
|
|
if (esign) e = -e;
|
|
|
|
*d = (double)sig * pow(10.0, (double)e);
|
|
return nota;
|
|
}
|
|
|
|
char *nota_read_int(long long *n, char *nota)
|
|
{
|
|
if (!n) return nota_skip(nota);
|
|
|
|
*n = 0;
|
|
char *c = nota;
|
|
*n |= (*c) & NOTA_INT_DATA;
|
|
while (CONTINUE(*(c++))) {
|
|
*n = (*n << 7) | (*c & NOTA_DATA);
|
|
}
|
|
|
|
/* if sign bit is set in the first byte, negative. */
|
|
if (NOTA_INT_SIGN(*nota)) *n = -*n;
|
|
|
|
return c;
|
|
}
|
|
|
|
char *nota_read_sym(int *sym, char *nota)
|
|
{
|
|
if (sym) *sym = ((*nota) & 0x0f);
|
|
return nota + 1;
|
|
}
|
|
|
|
static void nota_buffer_grow(NotaBuffer *nb, size_t min_add)
|
|
{
|
|
size_t needed = nb->size + min_add;
|
|
if (needed <= nb->capacity) return;
|
|
|
|
size_t new_cap = (nb->capacity == 0 ? 64 : nb->capacity * 2);
|
|
while (new_cap < needed) {
|
|
new_cap *= 2;
|
|
}
|
|
char *new_data = (char *)realloc(nb->data, new_cap);
|
|
if (!new_data) {
|
|
fprintf(stderr, "realloc failed in nota_buffer_grow\n");
|
|
abort();
|
|
}
|
|
nb->data = new_data;
|
|
nb->capacity = new_cap;
|
|
}
|
|
|
|
void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity)
|
|
{
|
|
nb->data = NULL;
|
|
nb->size = 0;
|
|
nb->capacity = 0;
|
|
if (initial_capacity > 0) {
|
|
nb->data = (char *)malloc(initial_capacity);
|
|
if (!nb->data) {
|
|
fprintf(stderr, "malloc failed in nota_buffer_init\n");
|
|
abort();
|
|
}
|
|
nb->capacity = initial_capacity;
|
|
}
|
|
}
|
|
|
|
void nota_buffer_free(NotaBuffer *nb)
|
|
{
|
|
if (nb->data) free(nb->data);
|
|
nb->data = NULL;
|
|
nb->size = 0;
|
|
nb->capacity = 0;
|
|
}
|
|
|
|
static char *nota_buffer_alloc(NotaBuffer *nb, size_t len)
|
|
{
|
|
nota_buffer_grow(nb, len);
|
|
char *p = nb->data + nb->size;
|
|
nb->size += len;
|
|
return p;
|
|
}
|
|
|
|
static void nota_write_int_buf(NotaBuffer *nb, long long n);
|
|
static void nota_write_float_buf(NotaBuffer *nb, double d);
|
|
|
|
static void nota_write_int_or_float_buf(NotaBuffer *nb, double n)
|
|
{
|
|
if (n < (double)INT64_MIN || n > (double)INT64_MAX) {
|
|
nota_write_float_buf(nb, n);
|
|
return;
|
|
}
|
|
|
|
double ip;
|
|
double frac = modf(n, &ip);
|
|
if (fabs(frac) < 1e-14)
|
|
nota_write_int_buf(nb, (long long)ip);
|
|
else
|
|
nota_write_float_buf(nb, n);
|
|
}
|
|
|
|
void nota_write_sym(NotaBuffer *nb, int sym)
|
|
{
|
|
char *p = nota_buffer_alloc(nb, 1);
|
|
*p = NOTA_SYM | (sym & 0x0f);
|
|
}
|
|
|
|
void nota_write_blob(NotaBuffer *nb, unsigned long long nbits, const char *data)
|
|
{
|
|
unsigned long long bytes_len = (nbits + 7ULL) >> 3;
|
|
|
|
char *p = nota_buffer_alloc(nb, 1 + 10 + bytes_len);
|
|
p[0] = NOTA_BLOB;
|
|
char *end = nota_continue_num(nbits, p, 4);
|
|
|
|
size_t varint_used = (size_t)(end - p - 1);
|
|
|
|
memcpy(end, data, (size_t)bytes_len);
|
|
size_t total_used = 1 + varint_used + bytes_len;
|
|
size_t allocated = 1 + 10 + bytes_len;
|
|
|
|
nb->size -= (allocated - total_used);
|
|
}
|
|
|
|
void nota_write_text(NotaBuffer *nb, const char *s)
|
|
{
|
|
/* ASCII fast path: if all bytes < 0x80, KIM == UTF-8 and rune count == byte count */
|
|
size_t slen = strlen(s);
|
|
const unsigned char *scan = (const unsigned char *)s;
|
|
int is_ascii = 1;
|
|
for (size_t k = 0; k < slen; k++) {
|
|
if (scan[k] >= 0x80) {
|
|
is_ascii = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (is_ascii) {
|
|
long long runes = (long long)slen;
|
|
char *p = nota_buffer_alloc(nb, 1 + 10 + slen);
|
|
p[0] = NOTA_TEXT;
|
|
char *end = nota_continue_num(runes, p, 4);
|
|
memcpy(end, s, slen);
|
|
size_t used = (size_t)(end - p) + slen;
|
|
size_t allocated = 1 + 10 + slen;
|
|
nb->size -= (allocated - used);
|
|
return;
|
|
}
|
|
|
|
/* Non-ASCII path: full UTF-8 decode + KIM encode */
|
|
long long runes = utf8_count(s);
|
|
|
|
size_t max_kim = (size_t)(runes * 5);
|
|
char *p = nota_buffer_alloc(nb, 1 + 10 + max_kim);
|
|
|
|
p[0] = NOTA_TEXT;
|
|
char *end = nota_continue_num(runes, p, 4);
|
|
|
|
char *kim_out = end;
|
|
const char *utf_in = s;
|
|
while (*utf_in) {
|
|
int codepoint = decode_utf8((char **)&utf_in);
|
|
encode_kim(&kim_out, codepoint);
|
|
}
|
|
|
|
size_t used = (size_t)(kim_out - p);
|
|
size_t allocated = 1 + 10 + max_kim;
|
|
|
|
nb->size -= (allocated - used);
|
|
}
|
|
|
|
void nota_write_array(NotaBuffer *nb, unsigned long long count)
|
|
{
|
|
char *p = nota_buffer_alloc(nb, 10);
|
|
p[0] = NOTA_ARR;
|
|
char *end = nota_continue_num(count, p, 4);
|
|
size_t used = (size_t)(end - p);
|
|
nb->size -= (10 - used);
|
|
}
|
|
|
|
void nota_write_record(NotaBuffer *nb, unsigned long long count)
|
|
{
|
|
char *p = nota_buffer_alloc(nb, 10);
|
|
p[0] = NOTA_REC;
|
|
char *end = nota_continue_num(count, p, 4);
|
|
size_t used = (size_t)(end - p);
|
|
nb->size -= (10 - used);
|
|
}
|
|
|
|
void nota_write_number(NotaBuffer *nb, double n)
|
|
{
|
|
nota_write_int_or_float_buf(nb, n);
|
|
}
|
|
|
|
static void nota_write_int_buf(NotaBuffer *nb, long long n)
|
|
{
|
|
/* up to ~10 bytes for varint */
|
|
char *p = nota_buffer_alloc(nb, 10);
|
|
char sign = 0;
|
|
if (n < 0) {
|
|
sign = 0x08; /* sign bit in the nibble */
|
|
n = -n;
|
|
}
|
|
p[0] = NOTA_INT | sign;
|
|
char *end = nota_continue_num(n, p, 3);
|
|
size_t used = (size_t)(end - p);
|
|
nb->size -= (10 - used);
|
|
}
|
|
|
|
static const double nota_pow10_table[29] = {
|
|
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
|
|
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
|
|
1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23,
|
|
1e24, 1e25, 1e26, 1e27, 1e28
|
|
};
|
|
|
|
static void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
|
|
{
|
|
if (num == 0.0) {
|
|
*coefficient = 0;
|
|
*exponent = 0;
|
|
return;
|
|
}
|
|
|
|
double absval = fabs(num);
|
|
int sign = (num < 0) ? -1 : 1;
|
|
|
|
/* Get decimal exponent via log10 */
|
|
int dec_exp = (int)floor(log10(absval));
|
|
|
|
/* Scale to extract 14-digit coefficient.
|
|
We want coeff * 10^exp = absval, with coeff having up to 14 digits.
|
|
So coeff = absval * 10^(13 - dec_exp), exp = dec_exp - 13 */
|
|
int shift = 13 - dec_exp;
|
|
double scaled;
|
|
if (shift >= 0 && shift <= 28) {
|
|
scaled = absval * nota_pow10_table[shift];
|
|
} else if (shift < 0 && -shift <= 28) {
|
|
scaled = absval / nota_pow10_table[-shift];
|
|
} else {
|
|
scaled = absval * pow(10.0, (double)shift);
|
|
}
|
|
|
|
long long coeff = (long long)(scaled + 0.5);
|
|
|
|
/* Correct off-by-one from log10 rounding */
|
|
if (coeff >= 100000000000000LL) {
|
|
coeff = (coeff + 5) / 10;
|
|
shift--;
|
|
} else if (coeff < 10000000000000LL && coeff > 0) {
|
|
coeff = (long long)(absval * pow(10.0, (double)(shift + 1)) + 0.5);
|
|
shift++;
|
|
}
|
|
|
|
int exp_out = -shift;
|
|
|
|
/* Strip trailing zeros */
|
|
while (coeff != 0 && coeff % 10 == 0) {
|
|
coeff /= 10;
|
|
exp_out++;
|
|
}
|
|
|
|
*coefficient = (long)(coeff * sign);
|
|
*exponent = (long)exp_out;
|
|
}
|
|
|
|
static void nota_write_float_buf(NotaBuffer *nb, double d)
|
|
{
|
|
if (d == 0.0) {
|
|
nota_write_int_buf(nb, 0);
|
|
return;
|
|
}
|
|
|
|
long coef, exp;
|
|
extract_mantissa_coefficient(d, &coef, &exp);
|
|
|
|
if (coef == 0) {
|
|
nota_write_int_buf(nb, 0);
|
|
return;
|
|
}
|
|
|
|
int neg = (d < 0.0);
|
|
if (exp == 0) {
|
|
nota_write_int_buf(nb, neg ? -coef : coef);
|
|
return;
|
|
}
|
|
|
|
char *p = nota_buffer_alloc(nb, 21);
|
|
|
|
p[0] = NOTA_FLOAT;
|
|
if (neg) p[0] |= (1 << 3);
|
|
if (exp < 0) {
|
|
p[0] |= (1 << 4);
|
|
exp = -exp;
|
|
}
|
|
|
|
char *c = nota_continue_num(exp, p, 3);
|
|
char *end = nota_continue_num(labs(coef), c, 7);
|
|
|
|
size_t used = (size_t)(end - p);
|
|
nb->size -= (21 - used);
|
|
}
|
|
|
|
#endif /* NOTA_IMPLEMENTATION */
|
|
|
|
#endif /* NOTA_H */
|