Fix nota implementation; add nota test suite

This commit is contained in:
2025-02-23 16:06:40 -06:00
parent fb10c63882
commit 7ea79c8ced
7 changed files with 494 additions and 143 deletions

View File

@@ -2,16 +2,16 @@
#define NOTA_H
#define NOTA_BLOB 0x00 // C 0 0 0
#define NOTA_TEXT 0x10 //
#define NOTA_ARR 0x20 // 0 1 0
#define NOTA_TEXT 0x10 // C 0 0 1
#define NOTA_ARR 0x20 // C 0 1 0
#define NOTA_REC 0x30 // C 0 1 1
#define NOTA_FLOAT 0x40 // C 1 0
#define NOTA_INT 0x60 // C 1 1 0
#define NOTA_SYM 0x70 // C 1 1 1
#define NOTA_FALSE 0x00
#define NOTA_TRUE 0x01
#define NOTA_NULL 0x02
#define NOTA_NULL 0x00
#define NOTA_FALSE 0x02
#define NOTA_TRUE 0x03
#define NOTA_INF 0x03
#define NOTA_PRIVATE 0x08
#define NOTA_SYSTEM 0x09
@@ -23,7 +23,7 @@ int nota_type(char *nota);
// Pass NULL into the read in variable to skip over it
char *nota_read_blob(long long *len, char *nota);
char *nota_read_blob(long long *len, char **blob, char *nota);
// ALLOCATES! Uses strdup to return it via the text pointer
char *nota_read_text(char **text, char *nota);
char *nota_read_array(long long *len, char *nota);
@@ -32,7 +32,7 @@ char *nota_read_float(double *d, char *nota);
char *nota_read_int(long long *l, char *nota);
char *nota_read_sym(int *sym, char *nota);
char *nota_write_blob(unsigned long long n, char *nota);
char *nota_write_blob(unsigned long long n, char *data, char *nota);
char *nota_write_text(const char *s, char *nota);
char *nota_write_array(unsigned long long n, char *nota);
char *nota_write_record(unsigned long long n, char *nota);
@@ -84,7 +84,7 @@ char *nota_read_num(long long *n, char *nota)
*n |= (*nota) & NOTA_HEAD_DATA;
while (CONTINUE(*(nota++)))
*n = (*n<<7) | (*nota) & NOTA_DATA;
*n = (((*n<<7) | *nota) & NOTA_DATA);
return nota;
}
@@ -170,99 +170,276 @@ char *nota_write_int(long long n, char *nota)
#include <stdio.h>
#include <math.h>
void extract_mantissa_coefficient(double num, long *mantissa, long* coefficient) {
void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
{
if (num == 0.0) {
*coefficient = 0;
*exponent = 0;
return;
}
// Optional: handle sign separately if you want 'coefficient' always positive.
// For simplicity, let's just let atol(...) parse the sign if it's there.
// 1) Slightly round the number to avoid too many FP trailing digits:
// Example: Round to 12 decimal places.
double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12;
if (num < 0) {
rounded = -rounded;
}
// 2) Convert to string with fewer digits of precision so we do NOT get
// the long binary-fraction expansions (like 98.599999999999994).
char buf[64];
char *p, *dec_point;
int exp = 0, coeff = 0;
snprintf(buf, sizeof(buf), "%.14g", rounded);
// Convert double to string with maximum precision
snprintf(buf, sizeof(buf), "%.17g", num);
// Find if 'e' or 'E' is present (scientific notation)
p = strchr(buf, 'e');
if (!p) p = strchr(buf, 'E');
if (p) {
// There is an exponent part
exp = atol(p + 1);
*p = '\0'; // Remove exponent part from the string
// 3) Look for scientific notation
char *exp_pos = strpbrk(buf, "eE");
long exp_from_sci = 0;
if (exp_pos) {
exp_from_sci = atol(exp_pos + 1);
*exp_pos = '\0'; // Truncate the exponent part from the string
}
// Find decimal point
dec_point = strchr(buf, '.');
// 4) Find decimal point
char *dec_point = strchr(buf, '.');
int digits_after_decimal = 0;
if (dec_point) {
// Count number of digits after decimal point
int digits_after_point = strlen(dec_point + 1);
coeff = digits_after_point;
// Remove decimal point by shifting characters
digits_after_decimal = (int)strlen(dec_point + 1);
// Remove the '.' by shifting the remainder left
memmove(dec_point, dec_point + 1, strlen(dec_point));
} else
coeff = 0;
}
// Adjust coefficient with exponent from scientific notation
coeff -= exp;
// 5) Now the string is just an integer (possibly signed), so parse it
long long coeff_ll = atoll(buf); // support up to 64-bit range
*coefficient = (long)coeff_ll;
// Copy the mantissa
*mantissa = atol(buf);
// 6) The final decimal exponent is whatever came from 'e/E'
// minus however many digits we removed by removing the decimal point.
*exponent = exp_from_sci - digits_after_decimal;
}
// Set coefficient
*coefficient = coeff;
char *nota_write_decimal_str(const char *decimal, char *nota)
{
// Handle negative sign
int neg = (decimal[0] == '-');
if (neg) decimal++; // Skip the '-' if present
// Parse integer part
long coef = 0;
long exp = 0;
int decimal_point_seen = 0;
const char *ptr = decimal;
int has_exponent = 0;
// First pass: calculate coefficient up to 'e' or 'E'
while (*ptr && *ptr != 'e' && *ptr != 'E') {
if (*ptr == '.') {
decimal_point_seen = 1;
ptr++;
continue;
}
if (*ptr >= '0' && *ptr <= '9') {
coef = coef * 10 + (*ptr - '0');
if (decimal_point_seen) {
exp--; // Each digit after decimal point decreases exponent
}
}
ptr++;
}
// Parse exponent part if present
if (*ptr == 'e' || *ptr == 'E') {
has_exponent = 1;
ptr++; // Skip 'e' or 'E'
int exp_sign = 1;
if (*ptr == '-') {
exp_sign = -1;
ptr++;
} else if (*ptr == '+') {
ptr++;
}
long explicit_exp = 0;
while (*ptr >= '0' && *ptr <= '9') {
explicit_exp = explicit_exp * 10 + (*ptr - '0');
ptr++;
}
exp += exp_sign * explicit_exp;
}
// If no decimal point and no exponent, treat as integer
if (!decimal_point_seen && !has_exponent) {
return nota_write_int(coef * (neg ? -1 : 1), nota);
}
// Remove trailing zeros from coefficient
while (coef > 0 && coef % 10 == 0 && exp < 0) {
coef /= 10;
exp++;
}
// Handle zero case
if (coef == 0) {
return nota_write_int(0, nota);
}
// Set up the notation format similar to nota_write_float
int expsign = exp < 0 ? ~0 : 0;
exp = llabs(exp);
nota[0] = NOTA_FLOAT;
nota[0] |= (expsign & 1) << 4; // Exponent sign bit
nota[0] |= (neg & 1) << 3; // Number sign bit
char *c = nota_continue_num(exp, nota, 3);
return nota_continue_num(coef, c, 7);
}
char *nota_write_float(double n, char *nota)
{
int neg = n < 0;
long digits;
int neg = (n < 0);
long coef;
extract_mantissa_coefficient(n, &digits, &coef);
long exp;
extract_mantissa_coefficient(n, &coef, &exp);
printf("Values of %g are %ld e %ld\n", n, digits, coef);
if (coef == 0)
// Store integer if exponent is zero
if (exp == 0)
return nota_write_int(coef * (neg ? -1 : 1), nota);
int expsign = coef < 0 ? ~0 : 0;
coef = llabs(coef);
int expsign = exp < 0 ? ~0 : 0;
exp = labs(exp);
nota[0] = NOTA_FLOAT;
nota[0] |= 0x10 & expsign;
nota[0] |= 0x08 & neg;
nota[0] |= (expsign & 1) << 4;
nota[0] |= (neg & 1) << 3;
char *c = nota_continue_num(coef, nota, 3);
char *c = nota_continue_num(exp, nota, 3);
return nota_continue_num(labs(coef), c, 7);
}
return nota_continue_num(digits, c, 7);
char *nota_read_float_str(char **d, char *nota)
{
// Extract sign bits from the first byte
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
// Read the exponents lower 3 bits from the first byte
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
// Count exponent bytes and accumulate value
int e_bytes = 1;
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
e_bytes++;
}
// Move past the last exponent byte
nota++;
// Read the mantissa
long long sig = (*nota) & NOTA_DATA;
int sig_bytes = 1;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
sig_bytes++;
}
// Move past the last mantissa byte
nota++;
// Apply sign bits
if (neg) sig = -sig;
if (esign) e = -e;
// Calculate digits in mantissa (sig) and exponent (e)
int sig_digits = (sig == 0) ? 1 : (int)log10(llabs(sig)) + 1;
int e_digits = (e == 0) ? 1 : (int)log10(llabs(e)) + 1;
// Calculate total string size:
// - Mantissa: sign (1), digits, decimal (1), 2 decimal places
// - Exponent: 'e', sign (1), digits
// - Null terminator (1)
int size = 1 + sig_digits + 1 + 2 + 1 + 1 + e_digits + 1;
if (neg) size++; // Extra space for negative mantissa
if (esign) size++; // Extra space for negative exponent
// Allocate the string
char *result = (char *)malloc(size);
if (!result) {
*d = NULL;
return nota; // Return current position even on failure
}
// Format the string as "xey" (e.g., "1.23e4")
double value = (double)sig * pow(10.0, (double)e);
snprintf(result, size, "%.*fe%lld", 2, value/pow(10.0, (double)e), e);
// Set the output pointer and return
*d = result;
return nota;
}
char *nota_read_float(double *d, char *nota)
{
long long sig = 0;
long long e = 0;
// If the caller passed NULL for d, just skip over the float encoding
if (!d) {
return nota_skip(nota);
}
char *c = nota;
e = (*c) & NOTA_INT_DATA; /* first three bits */
// Extract sign bits from the first byte
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
while (CONTINUE(*c)) {
e = (e<<7) | (*c) & NOTA_DATA;
c++;
}
c++;
// Read the exponents lower 3 bits from the first byte
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
do
sig = (sig<<7) | *c & NOTA_DATA;
while (CONTINUE(*(c++)));
// While the continuation bit is set, advance and accumulate exponent
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
}
if (NOTA_SIG_SIGN(*nota)) sig *= -1;
if (NOTA_EXP_SIGN(*nota)) e *= -1;
// Move past the last exponent byte
nota++;
*d = (double)sig * pow(10.0, e);
return c;
// Now read the mantissa in the same variable-length style
long long sig = (*nota) & NOTA_DATA;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
}
// Move past the last mantissa byte
nota++;
// Apply sign bits
if (neg) sig = -sig;
if (esign) e = -e;
// Finally compute the double value: mantissa * 10^exponent
*d = (double)sig * pow(10.0, (double)e);
// Return the pointer to wherever we ended
return nota;
}
char *nota_write_number(double n, char *nota)
{
if (n < (double)INT64_MIN || n > (double)INT64_MAX) return nota_write_float(n, nota);
if (floor(n) == n)
return nota_write_int(n, nota);
return nota_write_float(n, nota);
double int_part;
double frac = modf(n, &int_part);
if (fabs(frac) < 1e-14)
return nota_write_int((long long)int_part, nota);
else
return nota_write_float(n, nota);
}
char *nota_read_int(long long *n, char *nota)
@@ -274,7 +451,7 @@ char *nota_read_int(long long *n, char *nota)
char *c = nota;
*n |= (*c) & NOTA_INT_DATA; /* first three bits */
while (CONTINUE(*(c++)))
*n = (*n<<7) | (*c) & NOTA_DATA;
*n = (*n<<7) | (*c & NOTA_DATA);
if (NOTA_INT_SIGN(*nota)) *n *= -1;
@@ -282,10 +459,15 @@ char *nota_read_int(long long *n, char *nota)
}
/* n is the number of bits */
char *nota_write_blob(unsigned long long n, char *nota)
char *nota_write_blob(unsigned long long n, char *data, char *nota)
{
nota[0] = NOTA_BLOB;
return nota_continue_num(n, nota, 4);
nota = nota_continue_num(n, nota, 4);
int bytes = floor((n+7)/8);
for (int i = 0; i < bytes; i++)
nota[i] = data[i];
return nota+bytes;
}
char *nota_write_array(unsigned long long n, char *nota)
@@ -306,10 +488,17 @@ char *nota_read_record(long long *len, char *nota)
return nota_read_num(len, nota);
}
char *nota_read_blob(long long *len, char *nota)
char *nota_read_blob(long long *len, char **blob, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
nota = nota_read_num(len,nota);
int bytes = floor((*len+7)/8);
*len = bytes;
*blob = malloc(bytes);
memcpy(*blob,nota,bytes);
return nota+bytes;
}
char *nota_write_record(unsigned long long n, char *nota)
@@ -326,7 +515,7 @@ char *nota_write_sym(int sym, char *nota)
char *nota_read_sym(int *sym, char *nota)
{
if (*sym) *sym = (*nota) & 0x0f;
if (sym) *sym = (*nota) & 0x0f;
return nota+1;
}