Files
cell/source/nota.h

555 lines
14 KiB
C
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef NOTA_H
#define NOTA_H
#define NOTA_BLOB 0x00 // C 0 0 0
#define NOTA_TEXT 0x10 // C 0 0 1
#define NOTA_ARR 0x20 // C 0 1 0
#define NOTA_REC 0x30 // C 0 1 1
#define NOTA_FLOAT 0x40 // C 1 0
#define NOTA_INT 0x60 // C 1 1 0
#define NOTA_SYM 0x70 // C 1 1 1
#define NOTA_NULL 0x00
#define NOTA_FALSE 0x02
#define NOTA_TRUE 0x03
#define NOTA_INF 0x03
#define NOTA_PRIVATE 0x08
#define NOTA_SYSTEM 0x09
// Returns the type NOTA_ of the byte at *nota
int nota_type(char *nota);
// Functions take a pointer to a buffer *nota, read or write the value, and then return a pointer to the next byte of the stream
// Pass NULL into the read in variable to skip over it
char *nota_read_blob(long long *len, char **blob, char *nota);
// ALLOCATES! Uses strdup to return it via the text pointer
char *nota_read_text(char **text, char *nota);
char *nota_read_array(long long *len, char *nota);
char *nota_read_record(long long *len, char *nota);
char *nota_read_float(double *d, char *nota);
char *nota_read_int(long long *l, char *nota);
char *nota_read_sym(int *sym, char *nota);
char *nota_write_blob(unsigned long long n, char *data, char *nota);
char *nota_write_text(const char *s, char *nota);
char *nota_write_array(unsigned long long n, char *nota);
char *nota_write_record(unsigned long long n, char *nota);
char *nota_write_number(double n, char *nota);
char *nota_write_sym(int sym, char *nota);
void print_nota_hex(char *nota);
#ifdef NOTA_IMPLEMENTATION
#include "stdio.h"
#include "math.h"
#include "string.h"
#include "stdlib.h"
#include "limits.h"
#include "kim.h"
#define NOTA_CONT 0x80
#define NOTA_DATA 0x7f
#define NOTA_INT_DATA 0x07
#define NOTA_INT_SIGN(CHAR) (CHAR & (1<<3))
#define NOTA_SIG_SIGN(CHAR) (CHAR & (1<<3))
#define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4))
#define NOTA_TYPE 0x70
#define NOTA_HEAD_DATA 0x0f
#define CONTINUE(CHAR) (CHAR>>7)
#define UTF8_DATA 0x3f
/* define this to use native string instead of kim. Bytes are encoded instead of runes */
#define NOTA_UTF8
int nota_type(char *nota) { return *nota & NOTA_TYPE; }
char *nota_skip(char *nota)
{
while (CONTINUE(*nota))
nota++;
return nota+1;
}
char *nota_read_num(long long *n, char *nota)
{
if (!n) {
return nota_skip(nota);
}
// Start by reading the first byte
unsigned char b = *nota;
int result = b & NOTA_HEAD_DATA; // lower bits
nota++;
// While the top bit is set, read more 7-bit chunks
while (CONTINUE(b)) {
b = *nota;
nota++;
result = (result << 7) | (b & NOTA_DATA);
}
*n = result;
return nota;
}
// Given a number n, and bits used in the first char sb, how many bits are needed
int nota_bits(long long n, int sb)
{
if (n == 0) return sb;
int bits = sizeof(n)*CHAR_BIT - __builtin_clzll(n);
bits-=sb;
int needed = ((bits + 6) / 7)*7 + sb;
return needed;
}
// write a number from n into *nota, with sb bits in the first char
char *nota_continue_num(long long n, char *nota, int sb)
{
int bits = nota_bits(n, sb);
bits -= sb;
if (bits > 0)
nota[0] |= NOTA_CONT;
else
nota[0] &= ~NOTA_CONT;
int shex = (~0) << sb;
nota[0] &= shex; /* clear shex bits */
nota[0] |= (~shex) & (n>>bits);
int i = 1;
while (bits > 0) {
bits -= 7;
int head = bits == 0 ? 0 : NOTA_CONT;
nota[i] = head | (NOTA_DATA & (n >> bits));
i++;
}
return &nota[i];
}
void print_nota_hex(char *nota)
{
do {
printf("%02X ", (unsigned char)(*nota));
} while(CONTINUE(*(nota++)));
printf("\n");
return;
long long chars = 0;
if (!((*nota>>4 & 0x07) ^ NOTA_TEXT>>4))
nota_read_num(&chars, nota);
if ((*nota>>5) == 2 || (*nota>>5) == 6)
chars = 1;
for (int i = 0; i < chars+1; i++) {
do {
printf("%02X ", (unsigned char)(*nota));
} while(CONTINUE(*(nota++)));
}
printf("\n");
}
char *nota_write_int(long long n, char *nota)
{
char sign = 0;
if (n < 0) {
sign = 0x08;
n *= -1;
}
*nota = NOTA_INT | sign;
return nota_continue_num(n, nota, 3);
}
#define NOTA_DBL_PREC 6
#define xstr(s) str(s)
#define str(s) #s
#include <stdio.h>
#include <math.h>
void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
{
if (num == 0.0) {
*coefficient = 0;
*exponent = 0;
return;
}
// Optional: handle sign separately if you want 'coefficient' always positive.
// For simplicity, let's just let atol(...) parse the sign if it's there.
// 1) Slightly round the number to avoid too many FP trailing digits:
// Example: Round to 12 decimal places.
double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12;
if (num < 0) {
rounded = -rounded;
}
// 2) Convert to string with fewer digits of precision so we do NOT get
// the long binary-fraction expansions (like 98.599999999999994).
char buf[64];
snprintf(buf, sizeof(buf), "%.14g", rounded);
// 3) Look for scientific notation
char *exp_pos = strpbrk(buf, "eE");
long exp_from_sci = 0;
if (exp_pos) {
exp_from_sci = atol(exp_pos + 1);
*exp_pos = '\0'; // Truncate the exponent part from the string
}
// 4) Find decimal point
char *dec_point = strchr(buf, '.');
int digits_after_decimal = 0;
if (dec_point) {
digits_after_decimal = (int)strlen(dec_point + 1);
// Remove the '.' by shifting the remainder left
memmove(dec_point, dec_point + 1, strlen(dec_point));
}
// 5) Now the string is just an integer (possibly signed), so parse it
long long coeff_ll = atoll(buf); // support up to 64-bit range
*coefficient = (long)coeff_ll;
// 6) The final decimal exponent is whatever came from 'e/E'
// minus however many digits we removed by removing the decimal point.
*exponent = exp_from_sci - digits_after_decimal;
}
char *nota_write_decimal_str(const char *decimal, char *nota)
{
// Handle negative sign
int neg = (decimal[0] == '-');
if (neg) decimal++; // Skip the '-' if present
// Parse integer part
long coef = 0;
long exp = 0;
int decimal_point_seen = 0;
const char *ptr = decimal;
int has_exponent = 0;
// First pass: calculate coefficient up to 'e' or 'E'
while (*ptr && *ptr != 'e' && *ptr != 'E') {
if (*ptr == '.') {
decimal_point_seen = 1;
ptr++;
continue;
}
if (*ptr >= '0' && *ptr <= '9') {
coef = coef * 10 + (*ptr - '0');
if (decimal_point_seen) {
exp--; // Each digit after decimal point decreases exponent
}
}
ptr++;
}
// Parse exponent part if present
if (*ptr == 'e' || *ptr == 'E') {
has_exponent = 1;
ptr++; // Skip 'e' or 'E'
int exp_sign = 1;
if (*ptr == '-') {
exp_sign = -1;
ptr++;
} else if (*ptr == '+') {
ptr++;
}
long explicit_exp = 0;
while (*ptr >= '0' && *ptr <= '9') {
explicit_exp = explicit_exp * 10 + (*ptr - '0');
ptr++;
}
exp += exp_sign * explicit_exp;
}
// If no decimal point and no exponent, treat as integer
if (!decimal_point_seen && !has_exponent) {
return nota_write_int(coef * (neg ? -1 : 1), nota);
}
// Remove trailing zeros from coefficient
while (coef > 0 && coef % 10 == 0 && exp < 0) {
coef /= 10;
exp++;
}
// Handle zero case
if (coef == 0) {
return nota_write_int(0, nota);
}
// Set up the notation format similar to nota_write_float
int expsign = exp < 0 ? ~0 : 0;
exp = llabs(exp);
nota[0] = NOTA_FLOAT;
nota[0] |= (expsign & 1) << 4; // Exponent sign bit
nota[0] |= (neg & 1) << 3; // Number sign bit
char *c = nota_continue_num(exp, nota, 3);
return nota_continue_num(coef, c, 7);
}
char *nota_write_float(double n, char *nota)
{
int neg = (n < 0);
long coef;
long exp;
extract_mantissa_coefficient(n, &coef, &exp);
// Store integer if exponent is zero
if (exp == 0)
return nota_write_int(coef * (neg ? -1 : 1), nota);
int expsign = exp < 0 ? ~0 : 0;
exp = labs(exp);
nota[0] = NOTA_FLOAT;
nota[0] |= (expsign & 1) << 4;
nota[0] |= (neg & 1) << 3;
char *c = nota_continue_num(exp, nota, 3);
return nota_continue_num(labs(coef), c, 7);
}
char *nota_read_float_str(char **d, char *nota)
{
// Extract sign bits from the first byte
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
// Read the exponents lower 3 bits from the first byte
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
// Count exponent bytes and accumulate value
int e_bytes = 1;
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
e_bytes++;
}
// Move past the last exponent byte
nota++;
// Read the mantissa
long long sig = (*nota) & NOTA_DATA;
int sig_bytes = 1;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
sig_bytes++;
}
// Move past the last mantissa byte
nota++;
// Apply sign bits
if (neg) sig = -sig;
if (esign) e = -e;
// Calculate digits in mantissa (sig) and exponent (e)
int sig_digits = (sig == 0) ? 1 : (int)log10(llabs(sig)) + 1;
int e_digits = (e == 0) ? 1 : (int)log10(llabs(e)) + 1;
// Calculate total string size:
// - Mantissa: sign (1), digits, decimal (1), 2 decimal places
// - Exponent: 'e', sign (1), digits
// - Null terminator (1)
int size = 1 + sig_digits + 1 + 2 + 1 + 1 + e_digits + 1;
if (neg) size++; // Extra space for negative mantissa
if (esign) size++; // Extra space for negative exponent
// Allocate the string
char *result = (char *)malloc(size);
if (!result) {
*d = NULL;
return nota; // Return current position even on failure
}
// Format the string as "xey" (e.g., "1.23e4")
double value = (double)sig * pow(10.0, (double)e);
snprintf(result, size, "%.*fe%lld", 2, value/pow(10.0, (double)e), e);
// Set the output pointer and return
*d = result;
return nota;
}
char *nota_read_float(double *d, char *nota)
{
// If the caller passed NULL for d, just skip over the float encoding
if (!d) {
return nota_skip(nota);
}
// Extract sign bits from the first byte
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
// Read the exponents lower 3 bits from the first byte
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
// While the continuation bit is set, advance and accumulate exponent
while (CONTINUE(*nota)) {
nota++;
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
}
// Move past the last exponent byte
nota++;
// Now read the mantissa in the same variable-length style
long long sig = (*nota) & NOTA_DATA;
while (CONTINUE(*nota)) {
nota++;
sig = (sig << 7) | ((*nota) & NOTA_DATA);
}
// Move past the last mantissa byte
nota++;
// Apply sign bits
if (neg) sig = -sig;
if (esign) e = -e;
// Finally compute the double value: mantissa * 10^exponent
*d = (double)sig * pow(10.0, (double)e);
// Return the pointer to wherever we ended
return nota;
}
char *nota_write_number(double n, char *nota)
{
if (n < (double)INT64_MIN || n > (double)INT64_MAX) return nota_write_float(n, nota);
double int_part;
double frac = modf(n, &int_part);
if (fabs(frac) < 1e-14)
return nota_write_int((long long)int_part, nota);
else
return nota_write_float(n, nota);
}
char *nota_read_int(long long *n, char *nota)
{
if (!n)
return nota_skip(nota);
*n = 0;
char *c = nota;
*n |= (*c) & NOTA_INT_DATA; /* first three bits */
while (CONTINUE(*(c++)))
*n = (*n<<7) | (*c & NOTA_DATA);
if (NOTA_INT_SIGN(*nota)) *n *= -1;
return c;
}
/* n is the number of bits */
char *nota_write_blob(unsigned long long n, char *data, char *nota)
{
nota[0] = NOTA_BLOB;
nota = nota_continue_num(n, nota, 4);
int bytes = floor((n+7)/8);
for (int i = 0; i < bytes; i++)
nota[i] = data[i];
return nota+bytes;
}
char *nota_write_array(unsigned long long n, char *nota)
{
nota[0] = NOTA_ARR;
return nota_continue_num(n, nota, 4);
}
char *nota_read_array(long long *len, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
}
char *nota_read_record(long long *len, char *nota)
{
if (!len) return nota;
return nota_read_num(len, nota);
}
char *nota_read_blob(long long *len, char **blob, char *nota)
{
if (!len) return nota;
nota = nota_read_num(len,nota);
int bytes = floor((*len+7)/8);
*len = bytes;
*blob = malloc(bytes);
memcpy(*blob,nota,bytes);
return nota+bytes;
}
char *nota_write_record(unsigned long long n, char *nota)
{
nota[0] = NOTA_REC;
return nota_continue_num(n, nota, 4);
}
char *nota_write_sym(int sym, char *nota)
{
*nota = NOTA_SYM | sym;
return nota+1;
}
char *nota_read_sym(int *sym, char *nota)
{
if (sym) *sym = (*nota) & 0x0f;
return nota+1;
}
char *nota_read_text(char **text, char *nota)
{
long long chars;
nota = nota_read_num(&chars, nota);
char utf[chars*4]; // enough for the worst case scenario
char *pp = utf;
kim_to_utf8(&nota, &pp, chars);
*pp = 0;
*text = strdup(utf);
return nota;
}
char *nota_write_text(const char *s, char *nota)
{
nota[0] = NOTA_TEXT;
long long n = utf8_count(s);
nota = nota_continue_num(n,nota,4);
utf8_to_kim(&s, &nota);
return nota;
}
#endif
#endif