547 lines
13 KiB
C
Executable File
547 lines
13 KiB
C
Executable File
#ifndef NOTA_H
|
||
#define NOTA_H
|
||
|
||
#define NOTA_BLOB 0x00 // C 0 0 0
|
||
#define NOTA_TEXT 0x10 // C 0 0 1
|
||
#define NOTA_ARR 0x20 // C 0 1 0
|
||
#define NOTA_REC 0x30 // C 0 1 1
|
||
#define NOTA_FLOAT 0x40 // C 1 0
|
||
#define NOTA_INT 0x60 // C 1 1 0
|
||
#define NOTA_SYM 0x70 // C 1 1 1
|
||
|
||
#define NOTA_NULL 0x00
|
||
#define NOTA_FALSE 0x02
|
||
#define NOTA_TRUE 0x03
|
||
#define NOTA_INF 0x03
|
||
#define NOTA_PRIVATE 0x08
|
||
#define NOTA_SYSTEM 0x09
|
||
|
||
// Returns the type NOTA_ of the byte at *nota
|
||
int nota_type(char *nota);
|
||
|
||
// Functions take a pointer to a buffer *nota, read or write the value, and then return a pointer to the next byte of the stream
|
||
|
||
// Pass NULL into the read in variable to skip over it
|
||
|
||
char *nota_read_blob(long long *len, char **blob, char *nota);
|
||
// ALLOCATES! Uses strdup to return it via the text pointer
|
||
char *nota_read_text(char **text, char *nota);
|
||
char *nota_read_array(long long *len, char *nota);
|
||
char *nota_read_record(long long *len, char *nota);
|
||
char *nota_read_float(double *d, char *nota);
|
||
char *nota_read_int(long long *l, char *nota);
|
||
char *nota_read_sym(int *sym, char *nota);
|
||
|
||
char *nota_write_blob(unsigned long long n, char *data, char *nota);
|
||
char *nota_write_text(const char *s, char *nota);
|
||
char *nota_write_array(unsigned long long n, char *nota);
|
||
char *nota_write_record(unsigned long long n, char *nota);
|
||
char *nota_write_number(double n, char *nota);
|
||
char *nota_write_sym(int sym, char *nota);
|
||
|
||
void print_nota_hex(char *nota);
|
||
|
||
#ifdef NOTA_IMPLEMENTATION
|
||
|
||
#include "stdio.h"
|
||
#include "math.h"
|
||
#include "string.h"
|
||
#include "stdlib.h"
|
||
#include "limits.h"
|
||
#include "kim.h"
|
||
|
||
#define NOTA_CONT 0x80
|
||
#define NOTA_DATA 0x7f
|
||
#define NOTA_INT_DATA 0x07
|
||
#define NOTA_INT_SIGN(CHAR) (CHAR & (1<<3))
|
||
#define NOTA_SIG_SIGN(CHAR) (CHAR & (1<<3))
|
||
#define NOTA_EXP_SIGN(CHAR) (CHAR & (1<<4))
|
||
#define NOTA_TYPE 0x70
|
||
#define NOTA_HEAD_DATA 0x0f
|
||
#define CONTINUE(CHAR) (CHAR>>7)
|
||
|
||
#define UTF8_DATA 0x3f
|
||
|
||
/* define this to use native string instead of kim. Bytes are encoded instead of runes */
|
||
#define NOTA_UTF8
|
||
|
||
int nota_type(char *nota) { return *nota & NOTA_TYPE; }
|
||
|
||
char *nota_skip(char *nota)
|
||
{
|
||
while (CONTINUE(*nota))
|
||
nota++;
|
||
|
||
return nota+1;
|
||
}
|
||
|
||
char *nota_read_num(long long *n, char *nota)
|
||
{
|
||
if (!n)
|
||
return nota_skip(nota);
|
||
|
||
*n = 0;
|
||
*n |= (*nota) & NOTA_HEAD_DATA;
|
||
|
||
while (CONTINUE(*(nota++)))
|
||
*n = (((*n<<7) | *nota) & NOTA_DATA);
|
||
|
||
return nota;
|
||
}
|
||
|
||
// Given a number n, and bits used in the first char sb, how many bits are needed
|
||
int nota_bits(long long n, int sb)
|
||
{
|
||
if (n == 0) return sb;
|
||
int bits = sizeof(n)*CHAR_BIT - __builtin_clzll(n);
|
||
bits-=sb;
|
||
int needed = ((bits + 6) / 7)*7 + sb;
|
||
return needed;
|
||
}
|
||
|
||
// write a number from n into *nota, with sb bits in the first char
|
||
char *nota_continue_num(long long n, char *nota, int sb)
|
||
{
|
||
int bits = nota_bits(n, sb);
|
||
bits -= sb;
|
||
if (bits > 0)
|
||
nota[0] |= NOTA_CONT;
|
||
else
|
||
nota[0] &= ~NOTA_CONT;
|
||
|
||
int shex = (~0) << sb;
|
||
nota[0] &= shex; /* clear shex bits */
|
||
nota[0] |= (~shex) & (n>>bits);
|
||
|
||
int i = 1;
|
||
while (bits > 0) {
|
||
bits -= 7;
|
||
int head = bits == 0 ? 0 : NOTA_CONT;
|
||
nota[i] = head | (NOTA_DATA & (n >> bits));
|
||
i++;
|
||
}
|
||
|
||
return ¬a[i];
|
||
}
|
||
|
||
|
||
void print_nota_hex(char *nota)
|
||
{
|
||
do {
|
||
printf("%02X ", (unsigned char)(*nota));
|
||
} while(CONTINUE(*(nota++)));
|
||
printf("\n");
|
||
|
||
return;
|
||
long long chars = 0;
|
||
if (!((*nota>>4 & 0x07) ^ NOTA_TEXT>>4))
|
||
nota_read_num(&chars, nota);
|
||
|
||
if ((*nota>>5) == 2 || (*nota>>5) == 6)
|
||
chars = 1;
|
||
|
||
for (int i = 0; i < chars+1; i++) {
|
||
do {
|
||
printf("%02X ", (unsigned char)(*nota));
|
||
} while(CONTINUE(*(nota++)));
|
||
}
|
||
|
||
printf("\n");
|
||
}
|
||
|
||
char *nota_write_int(long long n, char *nota)
|
||
{
|
||
char sign = 0;
|
||
|
||
if (n < 0) {
|
||
sign = 0x08;
|
||
n *= -1;
|
||
}
|
||
|
||
*nota = NOTA_INT | sign;
|
||
|
||
return nota_continue_num(n, nota, 3);
|
||
}
|
||
|
||
#define NOTA_DBL_PREC 6
|
||
#define xstr(s) str(s)
|
||
#define str(s) #s
|
||
|
||
#include <stdio.h>
|
||
#include <math.h>
|
||
|
||
void extract_mantissa_coefficient(double num, long *coefficient, long *exponent)
|
||
{
|
||
if (num == 0.0) {
|
||
*coefficient = 0;
|
||
*exponent = 0;
|
||
return;
|
||
}
|
||
|
||
// Optional: handle sign separately if you want 'coefficient' always positive.
|
||
// For simplicity, let's just let atol(...) parse the sign if it's there.
|
||
|
||
// 1) Slightly round the number to avoid too many FP trailing digits:
|
||
// Example: Round to 12 decimal places.
|
||
double rounded = floor(fabs(num) * 1e12 + 0.5) / 1e12;
|
||
if (num < 0) {
|
||
rounded = -rounded;
|
||
}
|
||
|
||
// 2) Convert to string with fewer digits of precision so we do NOT get
|
||
// the long binary-fraction expansions (like 98.599999999999994).
|
||
char buf[64];
|
||
snprintf(buf, sizeof(buf), "%.14g", rounded);
|
||
|
||
// 3) Look for scientific notation
|
||
char *exp_pos = strpbrk(buf, "eE");
|
||
long exp_from_sci = 0;
|
||
if (exp_pos) {
|
||
exp_from_sci = atol(exp_pos + 1);
|
||
*exp_pos = '\0'; // Truncate the exponent part from the string
|
||
}
|
||
|
||
// 4) Find decimal point
|
||
char *dec_point = strchr(buf, '.');
|
||
int digits_after_decimal = 0;
|
||
|
||
if (dec_point) {
|
||
digits_after_decimal = (int)strlen(dec_point + 1);
|
||
// Remove the '.' by shifting the remainder left
|
||
memmove(dec_point, dec_point + 1, strlen(dec_point));
|
||
}
|
||
|
||
// 5) Now the string is just an integer (possibly signed), so parse it
|
||
long long coeff_ll = atoll(buf); // support up to 64-bit range
|
||
*coefficient = (long)coeff_ll;
|
||
|
||
// 6) The final decimal exponent is whatever came from 'e/E'
|
||
// minus however many digits we removed by removing the decimal point.
|
||
*exponent = exp_from_sci - digits_after_decimal;
|
||
}
|
||
|
||
char *nota_write_decimal_str(const char *decimal, char *nota)
|
||
{
|
||
// Handle negative sign
|
||
int neg = (decimal[0] == '-');
|
||
if (neg) decimal++; // Skip the '-' if present
|
||
|
||
// Parse integer part
|
||
long coef = 0;
|
||
long exp = 0;
|
||
int decimal_point_seen = 0;
|
||
const char *ptr = decimal;
|
||
int has_exponent = 0;
|
||
|
||
// First pass: calculate coefficient up to 'e' or 'E'
|
||
while (*ptr && *ptr != 'e' && *ptr != 'E') {
|
||
if (*ptr == '.') {
|
||
decimal_point_seen = 1;
|
||
ptr++;
|
||
continue;
|
||
}
|
||
|
||
if (*ptr >= '0' && *ptr <= '9') {
|
||
coef = coef * 10 + (*ptr - '0');
|
||
if (decimal_point_seen) {
|
||
exp--; // Each digit after decimal point decreases exponent
|
||
}
|
||
}
|
||
ptr++;
|
||
}
|
||
|
||
// Parse exponent part if present
|
||
if (*ptr == 'e' || *ptr == 'E') {
|
||
has_exponent = 1;
|
||
ptr++; // Skip 'e' or 'E'
|
||
|
||
int exp_sign = 1;
|
||
if (*ptr == '-') {
|
||
exp_sign = -1;
|
||
ptr++;
|
||
} else if (*ptr == '+') {
|
||
ptr++;
|
||
}
|
||
|
||
long explicit_exp = 0;
|
||
while (*ptr >= '0' && *ptr <= '9') {
|
||
explicit_exp = explicit_exp * 10 + (*ptr - '0');
|
||
ptr++;
|
||
}
|
||
exp += exp_sign * explicit_exp;
|
||
}
|
||
|
||
// If no decimal point and no exponent, treat as integer
|
||
if (!decimal_point_seen && !has_exponent) {
|
||
return nota_write_int(coef * (neg ? -1 : 1), nota);
|
||
}
|
||
|
||
// Remove trailing zeros from coefficient
|
||
while (coef > 0 && coef % 10 == 0 && exp < 0) {
|
||
coef /= 10;
|
||
exp++;
|
||
}
|
||
|
||
// Handle zero case
|
||
if (coef == 0) {
|
||
return nota_write_int(0, nota);
|
||
}
|
||
|
||
// Set up the notation format similar to nota_write_float
|
||
int expsign = exp < 0 ? ~0 : 0;
|
||
exp = llabs(exp);
|
||
|
||
nota[0] = NOTA_FLOAT;
|
||
nota[0] |= (expsign & 1) << 4; // Exponent sign bit
|
||
nota[0] |= (neg & 1) << 3; // Number sign bit
|
||
|
||
char *c = nota_continue_num(exp, nota, 3);
|
||
return nota_continue_num(coef, c, 7);
|
||
}
|
||
|
||
char *nota_write_float(double n, char *nota)
|
||
{
|
||
int neg = (n < 0);
|
||
long coef;
|
||
long exp;
|
||
extract_mantissa_coefficient(n, &coef, &exp);
|
||
|
||
// Store integer if exponent is zero
|
||
if (exp == 0)
|
||
return nota_write_int(coef * (neg ? -1 : 1), nota);
|
||
|
||
int expsign = exp < 0 ? ~0 : 0;
|
||
exp = labs(exp);
|
||
|
||
nota[0] = NOTA_FLOAT;
|
||
nota[0] |= (expsign & 1) << 4;
|
||
nota[0] |= (neg & 1) << 3;
|
||
|
||
char *c = nota_continue_num(exp, nota, 3);
|
||
return nota_continue_num(labs(coef), c, 7);
|
||
}
|
||
|
||
char *nota_read_float_str(char **d, char *nota)
|
||
{
|
||
// Extract sign bits from the first byte
|
||
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
|
||
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
|
||
|
||
// Read the exponent’s lower 3 bits from the first byte
|
||
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
|
||
|
||
// Count exponent bytes and accumulate value
|
||
int e_bytes = 1;
|
||
while (CONTINUE(*nota)) {
|
||
nota++;
|
||
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
|
||
e_bytes++;
|
||
}
|
||
|
||
// Move past the last exponent byte
|
||
nota++;
|
||
|
||
// Read the mantissa
|
||
long long sig = (*nota) & NOTA_DATA;
|
||
int sig_bytes = 1;
|
||
while (CONTINUE(*nota)) {
|
||
nota++;
|
||
sig = (sig << 7) | ((*nota) & NOTA_DATA);
|
||
sig_bytes++;
|
||
}
|
||
|
||
// Move past the last mantissa byte
|
||
nota++;
|
||
|
||
// Apply sign bits
|
||
if (neg) sig = -sig;
|
||
if (esign) e = -e;
|
||
|
||
// Calculate digits in mantissa (sig) and exponent (e)
|
||
int sig_digits = (sig == 0) ? 1 : (int)log10(llabs(sig)) + 1;
|
||
int e_digits = (e == 0) ? 1 : (int)log10(llabs(e)) + 1;
|
||
|
||
// Calculate total string size:
|
||
// - Mantissa: sign (1), digits, decimal (1), 2 decimal places
|
||
// - Exponent: 'e', sign (1), digits
|
||
// - Null terminator (1)
|
||
int size = 1 + sig_digits + 1 + 2 + 1 + 1 + e_digits + 1;
|
||
if (neg) size++; // Extra space for negative mantissa
|
||
if (esign) size++; // Extra space for negative exponent
|
||
|
||
// Allocate the string
|
||
char *result = (char *)malloc(size);
|
||
if (!result) {
|
||
*d = NULL;
|
||
return nota; // Return current position even on failure
|
||
}
|
||
|
||
// Format the string as "xey" (e.g., "1.23e4")
|
||
double value = (double)sig * pow(10.0, (double)e);
|
||
snprintf(result, size, "%.*fe%lld", 2, value/pow(10.0, (double)e), e);
|
||
|
||
// Set the output pointer and return
|
||
*d = result;
|
||
return nota;
|
||
}
|
||
|
||
char *nota_read_float(double *d, char *nota)
|
||
{
|
||
// If the caller passed NULL for d, just skip over the float encoding
|
||
if (!d) {
|
||
return nota_skip(nota);
|
||
}
|
||
|
||
// Extract sign bits from the first byte
|
||
int neg = NOTA_SIG_SIGN(*nota); // bit 3 => mantissa sign
|
||
int esign = NOTA_EXP_SIGN(*nota); // bit 4 => exponent sign
|
||
|
||
// Read the exponent’s lower 3 bits from the first byte
|
||
long long e = (*nota) & NOTA_INT_DATA; // NOTA_INT_DATA = 0x07
|
||
|
||
// While the continuation bit is set, advance and accumulate exponent
|
||
while (CONTINUE(*nota)) {
|
||
nota++;
|
||
e = (e << 7) | ((*nota) & NOTA_DATA); // NOTA_DATA = 0x7F
|
||
}
|
||
|
||
// Move past the last exponent byte
|
||
nota++;
|
||
|
||
// Now read the mantissa in the same variable-length style
|
||
long long sig = (*nota) & NOTA_DATA;
|
||
while (CONTINUE(*nota)) {
|
||
nota++;
|
||
sig = (sig << 7) | ((*nota) & NOTA_DATA);
|
||
}
|
||
|
||
// Move past the last mantissa byte
|
||
nota++;
|
||
|
||
// Apply sign bits
|
||
if (neg) sig = -sig;
|
||
if (esign) e = -e;
|
||
|
||
// Finally compute the double value: mantissa * 10^exponent
|
||
*d = (double)sig * pow(10.0, (double)e);
|
||
|
||
// Return the pointer to wherever we ended
|
||
return nota;
|
||
}
|
||
|
||
char *nota_write_number(double n, char *nota)
|
||
{
|
||
if (n < (double)INT64_MIN || n > (double)INT64_MAX) return nota_write_float(n, nota);
|
||
|
||
double int_part;
|
||
double frac = modf(n, &int_part);
|
||
|
||
if (fabs(frac) < 1e-14)
|
||
return nota_write_int((long long)int_part, nota);
|
||
else
|
||
return nota_write_float(n, nota);
|
||
}
|
||
|
||
char *nota_read_int(long long *n, char *nota)
|
||
{
|
||
if (!n)
|
||
return nota_skip(nota);
|
||
|
||
*n = 0;
|
||
char *c = nota;
|
||
*n |= (*c) & NOTA_INT_DATA; /* first three bits */
|
||
while (CONTINUE(*(c++)))
|
||
*n = (*n<<7) | (*c & NOTA_DATA);
|
||
|
||
if (NOTA_INT_SIGN(*nota)) *n *= -1;
|
||
|
||
return c;
|
||
}
|
||
|
||
/* n is the number of bits */
|
||
char *nota_write_blob(unsigned long long n, char *data, char *nota)
|
||
{
|
||
nota[0] = NOTA_BLOB;
|
||
nota = nota_continue_num(n, nota, 4);
|
||
int bytes = floor((n+7)/8);
|
||
for (int i = 0; i < bytes; i++)
|
||
nota[i] = data[i];
|
||
|
||
return nota+bytes;
|
||
}
|
||
|
||
char *nota_write_array(unsigned long long n, char *nota)
|
||
{
|
||
nota[0] = NOTA_ARR;
|
||
return nota_continue_num(n, nota, 4);
|
||
}
|
||
|
||
char *nota_read_array(long long *len, char *nota)
|
||
{
|
||
if (!len) return nota;
|
||
return nota_read_num(len, nota);
|
||
}
|
||
|
||
char *nota_read_record(long long *len, char *nota)
|
||
{
|
||
if (!len) return nota;
|
||
return nota_read_num(len, nota);
|
||
}
|
||
|
||
char *nota_read_blob(long long *len, char **blob, char *nota)
|
||
{
|
||
if (!len) return nota;
|
||
nota = nota_read_num(len,nota);
|
||
int bytes = floor((*len+7)/8);
|
||
*len = bytes;
|
||
|
||
*blob = malloc(bytes);
|
||
memcpy(*blob,nota,bytes);
|
||
|
||
return nota+bytes;
|
||
}
|
||
|
||
char *nota_write_record(unsigned long long n, char *nota)
|
||
{
|
||
nota[0] = NOTA_REC;
|
||
return nota_continue_num(n, nota, 4);
|
||
}
|
||
|
||
char *nota_write_sym(int sym, char *nota)
|
||
{
|
||
*nota = NOTA_SYM | sym;
|
||
return nota+1;
|
||
}
|
||
|
||
char *nota_read_sym(int *sym, char *nota)
|
||
{
|
||
if (sym) *sym = (*nota) & 0x0f;
|
||
return nota+1;
|
||
}
|
||
|
||
char *nota_read_text(char **text, char *nota)
|
||
{
|
||
long long chars;
|
||
nota = nota_read_num(&chars, nota);
|
||
|
||
char utf[chars*4]; // enough for the worst case scenario
|
||
char *pp = utf;
|
||
kim_to_utf8(¬a, &pp, chars);
|
||
*pp = 0;
|
||
*text = strdup(utf);
|
||
|
||
return nota;
|
||
}
|
||
|
||
char *nota_write_text(const char *s, char *nota)
|
||
{
|
||
nota[0] = NOTA_TEXT;
|
||
long long n = utf8_count(s);
|
||
nota = nota_continue_num(n,nota,4);
|
||
utf8_to_kim(&s, ¬a);
|
||
return nota;
|
||
}
|
||
|
||
#endif
|
||
#endif
|