Files
cell/source/nota.h
John Alanbrook af0996f6ab
All checks were successful
Build and Deploy / build-linux (push) Successful in 1m11s
Build and Deploy / build-windows (CLANG64) (push) Successful in 10m2s
Build and Deploy / package-dist (push) Successful in 8s
Build and Deploy / deploy-gitea (push) Successful in 5s
Build and Deploy / deploy-itch (push) Successful in 9s
nota write decimal numbers via a string in the qjs_nota implementation, solving windows fp error
2025-02-24 23:15:58 -06:00

633 lines
17 KiB
C
Executable File

#ifndef NOTA_H
#define NOTA_H
#include <stddef.h>
#include <stdint.h>
/* Nota type nibble values */
#define NOTA_BLOB 0x00
#define NOTA_TEXT 0x10
#define NOTA_ARR 0x20
#define NOTA_REC 0x30
#define NOTA_FLOAT 0x40
#define NOTA_INT 0x60
#define NOTA_SYM 0x70
#define NOTA_NULL 0x00
#define NOTA_FALSE 0x02
#define NOTA_TRUE 0x03
#define NOTA_INF 0x03
#define NOTA_PRIVATE 0x08
#define NOTA_SYSTEM 0x09
/* Some internal constants/macros (used in varint logic, etc.) */
#define NOTA_CONT 0x80
#define NOTA_DATA 0x7f
#define NOTA_INT_DATA 0x07
#define NOTA_INT_SIGN(CHAR) (CHAR & (1<<3))
#define NOTA_SIG_SIGN(CHAR) (CHAR & (1<<3))
#define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4))
#define NOTA_TYPE 0x70
#define NOTA_HEAD_DATA 0x0f
#define CONTINUE(CHAR) ((CHAR)>>7)
#define UTF8_DATA 0x3f
/* A helper to get the high-level Nota type nibble from a byte */
static inline int nota_type(const char *nota) { return (*nota) & 0x70; }
char *nota_read_blob(long long *len, char **blob, char *nota);
char *nota_read_text(char **text, char *nota);
char *nota_read_array(long long *len, char *nota);
char *nota_read_record(long long *len, char *nota);
char *nota_read_float(double *d, char *nota);
char *nota_read_int(long long *n, char *nota);
char *nota_read_sym(int *sym, char *nota);
typedef struct NotaBuffer {
char *data;
size_t size; /* number of bytes used */
size_t capacity; /* allocated size of data */
} NotaBuffer;
/* Initialize a NotaBuffer with a given initial capacity. */
void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity);
/* Free the buffer's internal memory. (Does NOT free nb itself.) */
void nota_buffer_free(NotaBuffer *nb);
void nota_write_blob (NotaBuffer *nb, unsigned long long nbits, const char *data);
void nota_write_text (NotaBuffer *nb, const char *s);
void nota_write_array (NotaBuffer *nb, unsigned long long count);
void nota_write_record(NotaBuffer *nb, unsigned long long count);
void nota_write_number(NotaBuffer *nb, double n);
void nota_write_sym (NotaBuffer *nb, int sym);
#ifdef NOTA_IMPLEMENTATION
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include "kim.h"
/* -------------------------------------------------------
HELPER: skip a varint
------------------------------------------------------- */
static inline char *nota_skip(char *nota)
{
while (CONTINUE(*nota)) {
nota++;
}
return nota + 1;
}
/* -------------------------------------------------------
HELPER: read a varint
------------------------------------------------------- */
char *nota_read_num(long long *n, char *nota)
{
if (!n) {
return nota_skip(nota);
}
unsigned char b = (unsigned char)*nota;
long long result = b & NOTA_HEAD_DATA;
nota++;
while (b & NOTA_CONT) {
b = (unsigned char)*nota++;
result = (result << 7) | (b & NOTA_DATA);
}
*n = result;
return nota;
}
/* Count how many bits of varint we need to encode n,
with sb “special bits” in the first byte. */
static inline int nota_bits(long long n, int sb)
{
if (n == 0) return sb;
int bits = (sizeof(n)*CHAR_BIT) - __builtin_clzll(n);
bits -= sb;
int needed = ((bits + 6) / 7)*7 + sb;
return needed;
}
/* Write a varint into *nota, with sb bits in the first char (which is already set). */
static inline char *nota_continue_num(long long n, char *nota, int sb)
{
int bits = nota_bits(n, sb);
bits -= sb;
if (bits > 0)
nota[0] |= NOTA_CONT;
else
nota[0] &= ~NOTA_CONT;
int shex = (~0) << sb;
nota[0] &= shex; /* clear sb bits */
nota[0] |= (~shex) & ((unsigned long long)n >> bits);
int i = 1;
while (bits > 0) {
bits -= 7;
int head = (bits == 0) ? 0 : NOTA_CONT;
nota[i] = head | (NOTA_DATA & (n >> bits));
i++;
}
return &nota[i];
}
char *nota_read_blob(long long *len, char **blob, char *nota)
{
if (!len) return nota;
nota = nota_read_num(len, nota);
int bytes = (int)floor((*len + 7) / 8.0);
*len = bytes;
*blob = (char *)malloc(bytes);
memcpy(*blob, nota, bytes);
return nota + bytes;
}
char *nota_read_text(char **text, char *nota)
{
long long chars;
nota = nota_read_num(&chars, nota);
char utf[chars*4 + 1]; /* enough for wide chars + null */
char *pp = utf;
kim_to_utf8(&nota, &pp, chars);
*pp = 0;
*text = strdup(utf);
return nota;
}
char *nota_read_array(long long *len, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
}
char *nota_read_record(long long *len, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
}
char *nota_read_float(double *d, char *nota)
{
if (!d) {
return nota_skip(nota);
}
int neg = NOTA_SIG_SIGN(*nota);
int esign = NOTA_EXP_SIGN(*nota);
long long e = (*nota) & NOTA_INT_DATA;
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA);
}
nota++;
long long sig = (*nota) & NOTA_DATA;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
}
nota++;
if (neg) sig = -sig;
if (esign) e = -e;
*d = (double)sig * pow(10.0, (double)e);
return nota;
}
char *nota_read_int(long long *n, char *nota)
{
if (!n) return nota_skip(nota);
*n = 0;
char *c = nota;
*n |= (*c) & NOTA_INT_DATA;
while (CONTINUE(*(c++))) {
*n = (*n << 7) | (*c & NOTA_DATA);
}
/* if sign bit is set in the first byte, negative. */
if (NOTA_INT_SIGN(*nota)) *n = -*n;
return c;
}
char *nota_read_sym(int *sym, char *nota)
{
if (sym) *sym = ((*nota) & 0x0f);
return nota + 1;
}
static void nota_buffer_grow(NotaBuffer *nb, size_t min_add)
{
size_t needed = nb->size + min_add;
if (needed <= nb->capacity) return;
size_t new_cap = (nb->capacity == 0 ? 64 : nb->capacity * 2);
while (new_cap < needed) {
new_cap *= 2;
}
char *new_data = (char *)realloc(nb->data, new_cap);
if (!new_data) {
fprintf(stderr, "realloc failed in nota_buffer_grow\n");
abort();
}
nb->data = new_data;
nb->capacity = new_cap;
}
void nota_buffer_init(NotaBuffer *nb, size_t initial_capacity)
{
nb->data = NULL;
nb->size = 0;
nb->capacity = 0;
if (initial_capacity > 0) {
nb->data = (char *)malloc(initial_capacity);
if (!nb->data) {
fprintf(stderr, "malloc failed in nota_buffer_init\n");
abort();
}
nb->capacity = initial_capacity;
}
}
void nota_buffer_free(NotaBuffer *nb)
{
if (nb->data) free(nb->data);
nb->data = NULL;
nb->size = 0;
nb->capacity = 0;
}
/* Allocate 'len' bytes in the buffer and return a pointer to them. */
static char *nota_buffer_alloc(NotaBuffer *nb, size_t len)
{
nota_buffer_grow(nb, len);
char *p = nb->data + nb->size;
nb->size += len;
return p;
}
static void nota_write_int_buf(NotaBuffer *nb, long long n);
static void nota_write_float_buf(NotaBuffer *nb, double d);
static void nota_write_int_or_float_buf(NotaBuffer *nb, double n)
{
if (n < (double)INT64_MIN || n > (double)INT64_MAX) {
nota_write_float_buf(nb, n);
return;
}
double ip;
double frac = modf(n, &ip);
if (fabs(frac) < 1e-14)
nota_write_int_buf(nb, (long long)ip);
else
nota_write_float_buf(nb, n);
}
void nota_write_sym(NotaBuffer *nb, int sym)
{
char *p = nota_buffer_alloc(nb, 1);
*p = NOTA_SYM | (sym & 0x0f);
}
void nota_write_blob(NotaBuffer *nb, unsigned long long nbits, const char *data)
{
unsigned long long bytes_len = (nbits + 7ULL) >> 3;
char *p = nota_buffer_alloc(nb, 1 + 10 + bytes_len);
p[0] = NOTA_BLOB;
char *end = nota_continue_num(nbits, p, 4);
size_t varint_used = (size_t)(end - p - 1);
memcpy(end, data, (size_t)bytes_len);
size_t total_used = 1 + varint_used + bytes_len;
size_t allocated = 1 + 10 + bytes_len;
nb->size -= (allocated - total_used);
}
void nota_write_text(NotaBuffer *nb, const char *s)
{
long long runes = utf8_count(s);
size_t max_kim = (size_t)(runes * 5);
char *p = nota_buffer_alloc(nb, 1 + 10 + max_kim);
p[0] = NOTA_TEXT;
char *end = nota_continue_num(runes, p, 4);
char *kim_out = end;
const char *utf_in = s;
while (*utf_in) {
int codepoint = decode_utf8((char **)&utf_in);
encode_kim(&kim_out, codepoint);
}
size_t used = (size_t)(kim_out - p);
size_t allocated = 1 + 10 + max_kim;
nb->size -= (allocated - used);
}
void nota_write_array(NotaBuffer *nb, unsigned long long count)
{
char *p = nota_buffer_alloc(nb, 10);
p[0] = NOTA_ARR;
char *end = nota_continue_num(count, p, 4);
size_t used = (size_t)(end - p);
nb->size -= (10 - used);
}
void nota_write_record(NotaBuffer *nb, unsigned long long count)
{
char *p = nota_buffer_alloc(nb, 10);
p[0] = NOTA_REC;
char *end = nota_continue_num(count, p, 4);
size_t used = (size_t)(end - p);
nb->size -= (10 - used);
}
void nota_write_number_str(NotaBuffer *nb, const char *str)
{
/* -------------------------------------------
1) Parse sign
------------------------------------------- */
int negative = 0;
if (*str == '+') {
str++;
}
else if (*str == '-') {
negative = 1;
str++;
}
/* -------------------------------------------
2) Parse integer part
------------------------------------------- */
long long coefficient = 0;
int got_digits = 0;
while (*str >= '0' && *str <= '9') {
got_digits = 1;
int d = (*str - '0');
str++;
// Basic overflow check (very naive):
if (coefficient <= (LLONG_MAX - d) / 10) {
coefficient = coefficient * 10 + d;
} else {
// If you want to handle overflow by switching to float, do that here.
// For simplicity, let's just keep wrapping. In production, be careful!
coefficient = coefficient * 10 + d;
}
}
/* -------------------------------------------
3) Check for decimal part
------------------------------------------- */
int has_decimal_point = 0;
int fraction_digits = 0;
if (*str == '.') {
has_decimal_point = 1;
str++;
while (*str >= '0' && *str <= '9') {
got_digits = 1;
int d = (*str - '0');
str++;
fraction_digits++;
if (coefficient <= (LLONG_MAX - d) / 10) {
coefficient = coefficient * 10 + d;
} else {
// Same naive overflow comment
coefficient = coefficient * 10 + d;
}
}
}
/* -------------------------------------------
4) Check for exponent part
------------------------------------------- */
int exponent_negative = 0;
long long exponent_val = 0;
if (*str == 'e' || *str == 'E') {
str++;
if (*str == '+') {
str++;
}
else if (*str == '-') {
exponent_negative = 1;
str++;
}
while (*str >= '0' && *str <= '9') {
int d = (*str - '0');
str++;
if (exponent_val <= (LLONG_MAX - d) / 10) {
exponent_val = exponent_val * 10 + d;
} else {
// Again, naive overflow handling
exponent_val = exponent_val * 10 + d;
}
}
}
/* -------------------------------------------
5) If there were no valid digits at all,
store 0 and return. (simple fallback)
------------------------------------------- */
if (!got_digits) {
nota_write_int_buf(nb, 0);
return;
}
/* -------------------------------------------
6) Combine fraction digits into exponent
final_exponent = exponent_val - fraction_digits
(apply exponent sign if any)
------------------------------------------- */
if (exponent_negative) {
exponent_val = -exponent_val;
}
long long final_exponent = exponent_val - fraction_digits;
/* -------------------------------------------
7) Decide if we are storing an integer
or a float in Nota format.
-------------------------------------------
Rule used here:
- If there's no decimal point AND final_exponent == 0,
=> integer
- If we do have a decimal point, but fraction_digits == 0
and exponent_val == 0, then the user typed something
like "123." or "100.0". That is effectively an integer,
so store it as an integer if you want a purely numeric approach.
- Otherwise store as float.
------------------------------------------- */
// If "no decimal" => definitely integer:
// or decimal present but fraction_digits=0 & exponent_val=0 => integer
int treat_as_integer = 0;
if (!has_decimal_point && final_exponent == 0) {
treat_as_integer = 1;
}
else if (has_decimal_point && fraction_digits == 0 && exponent_val == 0) {
// Means "123." or "123.0"
treat_as_integer = 1;
}
if (treat_as_integer) {
// If negative => flip the sign in the stored value
if (negative) {
coefficient = -coefficient;
}
// Write the integer in Nota format (varint with sign bit)
nota_write_int_buf(nb, coefficient);
return;
}
/* -------------------------------------------
8) Write as float in Nota format
We do basically the same approach as
nota_write_float_buf does:
- NOTA_FLOAT nibble
- sign bit if negative
- exponent sign bit if final_exponent < 0
- varint of |final_exponent|
- varint of |coefficient|
------------------------------------------- */
{
char *p = nota_buffer_alloc(nb, 21); // Up to ~21 bytes worst-case
p[0] = NOTA_FLOAT;
if (negative) {
p[0] |= (1 << 3); // Mantissa sign bit
}
if (final_exponent < 0) {
p[0] |= (1 << 4); // Exponent sign bit
final_exponent = -final_exponent;
}
// Write exponent as varint (with 3 bits used in the first byte)
char *c = nota_continue_num(final_exponent, p, 3);
// Write the absolute coefficient (7 bits used in the first byte)
char *end = nota_continue_num(coefficient, c, 7);
// Adjust the buffer size to the actual used length
size_t used = (size_t)(end - p);
nb->size -= (21 - used);
}
}
void nota_write_number(NotaBuffer *nb, double n)
{
nota_write_int_or_float_buf(nb, n);
}
/* Write an integer in varint form (with sign bit) */
static void nota_write_int_buf(NotaBuffer *nb, long long n)
{
/* up to ~10 bytes for varint */
char *p = nota_buffer_alloc(nb, 10);
char sign = 0;
if (n < 0) {
sign = 0x08; /* sign bit in the nibble */
n = -n;
}
p[0] = NOTA_INT | sign;
char *end = nota_continue_num(n, p, 3);
size_t used = (size_t)(end - p);
nb->size -= (10 - used);
}
static void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
{
if (num == 0.0) {
*coefficient = 0;
*exponent = 0;
return;
}
/* Round to 12 decimal places to avoid floating artifacts. */
double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12;
if (num < 0) {
rounded = -rounded;
}
char buf[64];
snprintf(buf, sizeof(buf), "%.14g", rounded);
char *exp_pos = strpbrk(buf, "eE");
long exp_from_sci = 0;
if (exp_pos) {
exp_from_sci = atol(exp_pos + 1);
*exp_pos = '\0';
}
char *dec_point = strchr(buf, '.');
int digits_after_decimal = 0;
if (dec_point) {
digits_after_decimal = (int)strlen(dec_point + 1);
memmove(dec_point, dec_point + 1, strlen(dec_point));
}
long long coeff_ll = atoll(buf);
*coefficient = (long)coeff_ll;
*exponent = exp_from_sci - digits_after_decimal;
}
static void nota_write_float_buf(NotaBuffer *nb, double d)
{
if (d == 0.0) {
nota_write_int_buf(nb, 0);
return;
}
long coef, exp;
extract_mantissa_coefficient(d, &coef, &exp);
if (coef == 0) {
nota_write_int_buf(nb, 0);
return;
}
int neg = (d < 0.0);
if (exp == 0) {
nota_write_int_buf(nb, neg ? -coef : coef);
return;
}
char *p = nota_buffer_alloc(nb, 21);
p[0] = NOTA_FLOAT;
if (neg) p[0] |= (1 << 3);
if (exp < 0) {
p[0] |= (1 << 4);
exp = -exp;
}
char *c = nota_continue_num(exp, p, 3);
char *end = nota_continue_num(labs(coef), c, 7);
size_t used = (size_t)(end - p);
nb->size -= (21 - used);
}
#endif /* NOTA_IMPLEMENTATION */
#endif /* NOTA_H */