#ifndef NOTA_H #define NOTA_H #include #include #include "kim.h" #define NOTA_BLOB 0x00 #define NOTA_TEXT 0x10 #define NOTA_ARR 0x20 #define NOTA_REC 0x30 #define NOTA_FLOAT 0x40 #define NOTA_INT 0x60 #define NOTA_SYM 0x70 #define NOTA_NULL 0x00 #define NOTA_FALSE 0x02 #define NOTA_TRUE 0x03 #define NOTA_INF 0x03 #define NOTA_PRIVATE 0x08 #define NOTA_SYSTEM 0x09 #define NOTA_CONT 0x80 #define NOTA_DATA 0x7f #define NOTA_INT_DATA 0x07 #define NOTA_INT_SIGN(CHAR) (CHAR & (1<<3)) #define NOTA_SIG_SIGN(CHAR) (CHAR & (1<<3)) #define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4)) #define NOTA_TYPE 0x70 #define NOTA_HEAD_DATA 0x0f #define CONTINUE(CHAR) (CHAR>>7) #define UTF8_DATA 0x3f static inline int nota_type(const char *nota) { return (*nota) & 0x70; } char *nota_read_blob(long long *len, char **blob, char *nota); char *nota_read_text(char **text, char *nota); char *nota_read_array(long long *len, char *nota); char *nota_read_record(long long *len, char *nota); char *nota_read_float(double *d, char *nota); char *nota_read_int(long long *n, char *nota); char *nota_read_sym(int *sym, char *nota); typedef struct NotaBuffer { char *data; size_t size; /* number of bytes used */ size_t capacity; /* allocated size of data */ } NotaBuffer; void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity); void nota_buffer_free(NotaBuffer *nb); void nota_write_blob (NotaBuffer *nb, unsigned long long nbits, const char *data); void nota_write_text (NotaBuffer *nb, const char *s); void nota_write_array (NotaBuffer *nb, unsigned long long count); void nota_write_record(NotaBuffer *nb, unsigned long long count); void nota_write_number(NotaBuffer *nb, double n); void nota_write_sym (NotaBuffer *nb, int sym); #ifdef NOTA_IMPLEMENTATION #include #include #include #include #include static inline char *nota_skip(char *nota) { while (CONTINUE(*nota)) nota++; return nota + 1; } char *nota_read_num(long long *n, char *nota) { if (!n) return nota_skip(nota); unsigned char b = (unsigned char)*nota; long long result = b & NOTA_HEAD_DATA; nota++; while (b & NOTA_CONT) { b = (unsigned char)*nota++; result = (result << 7) | (b & NOTA_DATA); } *n = result; return nota; } /* Count how many bits of varint we need to encode n, with sb “special bits” in the first byte */ static inline int nota_bits(long long n, int sb) { if (n == 0) return sb; int bits = (sizeof(n)*CHAR_BIT) - __builtin_clzll(n); bits -= sb; int needed = ((bits + 6) / 7)*7 + sb; return needed; } static inline char *nota_continue_num(long long n, char *nota, int sb) { int bits = nota_bits(n, sb); bits -= sb; if (bits > 0) nota[0] |= NOTA_CONT; else nota[0] &= ~NOTA_CONT; int shex = (~0) << sb; nota[0] &= shex; /* clear sb bits */ nota[0] |= (~shex) & ((unsigned long long)n >> bits); int i = 1; while (bits > 0) { bits -= 7; int head = (bits == 0) ? 0 : NOTA_CONT; nota[i] = head | (NOTA_DATA & (n >> bits)); i++; } return ¬a[i]; } char *nota_read_blob(long long *len, char **blob, char *nota) { if (!len) return nota; nota = nota_read_num(len, nota); int bytes = (int)floor((*len + 7) / 8.0); *len = bytes; *blob = (char *)malloc(bytes); memcpy(*blob, nota, bytes); return nota + bytes; } char *nota_read_text(char **text, char *nota) { long long chars; nota = nota_read_num(&chars, nota); char utf[chars*4 + 1]; /* enough for wide chars + null */ char *pp = utf; kim_to_utf8(¬a, &pp, chars); *pp = 0; *text = strdup(utf); return nota; } char *nota_read_array(long long *len, char *nota) { if (!len) return nota; return nota_read_num(len, nota); } char *nota_read_record(long long *len, char *nota) { if (!len) return nota; return nota_read_num(len, nota); } char *nota_read_float(double *d, char *nota) { if (!d) { return nota_skip(nota); } int neg = NOTA_SIG_SIGN(*nota); int esign = NOTA_EXP_SIGN(*nota); long long e = (*nota) & NOTA_INT_DATA; while (CONTINUE(*nota)) { nota++; e = (e << 7) | ((*nota) & NOTA_DATA); } nota++; long long sig = (*nota) & NOTA_DATA; while (CONTINUE(*nota)) { nota++; sig = (sig << 7) | ((*nota) & NOTA_DATA); } nota++; if (neg) sig = -sig; if (esign) e = -e; *d = (double)sig * pow(10.0, (double)e); return nota; } char *nota_read_int(long long *n, char *nota) { if (!n) return nota_skip(nota); *n = 0; char *c = nota; *n |= (*c) & NOTA_INT_DATA; while (CONTINUE(*(c++))) { *n = (*n << 7) | (*c & NOTA_DATA); } /* if sign bit is set in the first byte, negative. */ if (NOTA_INT_SIGN(*nota)) *n = -*n; return c; } char *nota_read_sym(int *sym, char *nota) { if (sym) *sym = ((*nota) & 0x0f); return nota + 1; } static void nota_buffer_grow(NotaBuffer *nb, size_t min_add) { size_t needed = nb->size + min_add; if (needed <= nb->capacity) return; size_t new_cap = (nb->capacity == 0 ? 64 : nb->capacity * 2); while (new_cap < needed) { new_cap *= 2; } char *new_data = (char *)realloc(nb->data, new_cap); if (!new_data) { fprintf(stderr, "realloc failed in nota_buffer_grow\n"); abort(); } nb->data = new_data; nb->capacity = new_cap; } void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity) { nb->data = NULL; nb->size = 0; nb->capacity = 0; if (initial_capacity > 0) { nb->data = (char *)malloc(initial_capacity); if (!nb->data) { fprintf(stderr, "malloc failed in nota_buffer_init\n"); abort(); } nb->capacity = initial_capacity; } } void nota_buffer_free(NotaBuffer *nb) { if (nb->data) free(nb->data); nb->data = NULL; nb->size = 0; nb->capacity = 0; } static char *nota_buffer_alloc(NotaBuffer *nb, size_t len) { nota_buffer_grow(nb, len); char *p = nb->data + nb->size; nb->size += len; return p; } static void nota_write_int_buf(NotaBuffer *nb, long long n); static void nota_write_float_buf(NotaBuffer *nb, double d); static void nota_write_int_or_float_buf(NotaBuffer *nb, double n) { if (n < (double)INT64_MIN || n > (double)INT64_MAX) { nota_write_float_buf(nb, n); return; } double ip; double frac = modf(n, &ip); if (fabs(frac) < 1e-14) nota_write_int_buf(nb, (long long)ip); else nota_write_float_buf(nb, n); } void nota_write_sym(NotaBuffer *nb, int sym) { char *p = nota_buffer_alloc(nb, 1); *p = NOTA_SYM | (sym & 0x0f); } void nota_write_blob(NotaBuffer *nb, unsigned long long nbits, const char *data) { unsigned long long bytes_len = (nbits + 7ULL) >> 3; char *p = nota_buffer_alloc(nb, 1 + 10 + bytes_len); p[0] = NOTA_BLOB; char *end = nota_continue_num(nbits, p, 4); size_t varint_used = (size_t)(end - p - 1); memcpy(end, data, (size_t)bytes_len); size_t total_used = 1 + varint_used + bytes_len; size_t allocated = 1 + 10 + bytes_len; nb->size -= (allocated - total_used); } void nota_write_text(NotaBuffer *nb, const char *s) { long long runes = utf8_count(s); size_t max_kim = (size_t)(runes * 5); char *p = nota_buffer_alloc(nb, 1 + 10 + max_kim); p[0] = NOTA_TEXT; char *end = nota_continue_num(runes, p, 4); char *kim_out = end; const char *utf_in = s; while (*utf_in) { int codepoint = decode_utf8((char **)&utf_in); encode_kim(&kim_out, codepoint); } size_t used = (size_t)(kim_out - p); size_t allocated = 1 + 10 + max_kim; nb->size -= (allocated - used); } void nota_write_array(NotaBuffer *nb, unsigned long long count) { char *p = nota_buffer_alloc(nb, 10); p[0] = NOTA_ARR; char *end = nota_continue_num(count, p, 4); size_t used = (size_t)(end - p); nb->size -= (10 - used); } void nota_write_record(NotaBuffer *nb, unsigned long long count) { char *p = nota_buffer_alloc(nb, 10); p[0] = NOTA_REC; char *end = nota_continue_num(count, p, 4); size_t used = (size_t)(end - p); nb->size -= (10 - used); } void nota_write_number(NotaBuffer *nb, double n) { nota_write_int_or_float_buf(nb, n); } static void nota_write_int_buf(NotaBuffer *nb, long long n) { /* up to ~10 bytes for varint */ char *p = nota_buffer_alloc(nb, 10); char sign = 0; if (n < 0) { sign = 0x08; /* sign bit in the nibble */ n = -n; } p[0] = NOTA_INT | sign; char *end = nota_continue_num(n, p, 3); size_t used = (size_t)(end - p); nb->size -= (10 - used); } static void extract_mantissa_coefficient(double num, long *coefficient, long *exponent) { if (num == 0.0) { *coefficient = 0; *exponent = 0; return; } /* Round to 12 decimal places to avoid floating artifacts. */ double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12; if (num < 0) { rounded = -rounded; } char buf[64]; snprintf(buf, sizeof(buf), "%.14g", rounded); char *exp_pos = strpbrk(buf, "eE"); long exp_from_sci = 0; if (exp_pos) { exp_from_sci = atol(exp_pos + 1); *exp_pos = '\0'; } char *dec_point = strchr(buf, '.'); int digits_after_decimal = 0; if (dec_point) { digits_after_decimal = (int)strlen(dec_point + 1); memmove(dec_point, dec_point + 1, strlen(dec_point)); } long long coeff_ll = atoll(buf); *coefficient = (long)coeff_ll; *exponent = exp_from_sci - digits_after_decimal; } static void nota_write_float_buf(NotaBuffer *nb, double d) { if (d == 0.0) { nota_write_int_buf(nb, 0); return; } long coef, exp; extract_mantissa_coefficient(d, &coef, &exp); if (coef == 0) { nota_write_int_buf(nb, 0); return; } int neg = (d < 0.0); if (exp == 0) { nota_write_int_buf(nb, neg ? -coef : coef); return; } char *p = nota_buffer_alloc(nb, 21); p[0] = NOTA_FLOAT; if (neg) p[0] |= (1 << 3); if (exp < 0) { p[0] |= (1 << 4); exp = -exp; } char *c = nota_continue_num(exp, p, 3); char *end = nota_continue_num(labs(coef), c, 7); size_t used = (size_t)(end - p); nb->size -= (21 - used); } #endif /* NOTA_IMPLEMENTATION */ #endif /* NOTA_H */