add utf8 and kim text encoder/decoders

This commit is contained in:
2025-06-07 23:35:19 -05:00
parent d039e2cfe6
commit cbda7dfbc9
12 changed files with 518 additions and 105 deletions

View File

@@ -295,7 +295,7 @@ src += [
'anim.c', 'config.c', 'datastream.c','font.c','HandmadeMath.c','jsffi.c','model.c', 'anim.c', 'config.c', 'datastream.c','font.c','HandmadeMath.c','jsffi.c','model.c',
'render.c','simplex.c','spline.c', 'transform.c','cell.c', 'wildmatch.c', 'render.c','simplex.c','spline.c', 'transform.c','cell.c', 'wildmatch.c',
'sprite.c', 'rtree.c', 'qjs_nota.c', 'qjs_soloud.c', 'qjs_sdl.c', 'qjs_sdl_input.c', 'qjs_sdl_video.c', 'qjs_sdl_surface.c', 'qjs_math.c', 'qjs_geometry.c', 'qjs_transform.c', 'qjs_sprite.c', 'qjs_io.c', 'qjs_fd.c', 'qjs_os.c', 'qjs_actor.c', 'sprite.c', 'rtree.c', 'qjs_nota.c', 'qjs_soloud.c', 'qjs_sdl.c', 'qjs_sdl_input.c', 'qjs_sdl_video.c', 'qjs_sdl_surface.c', 'qjs_math.c', 'qjs_geometry.c', 'qjs_transform.c', 'qjs_sprite.c', 'qjs_io.c', 'qjs_fd.c', 'qjs_os.c', 'qjs_actor.c',
'qjs_qr.c', 'qjs_wota.c', 'monocypher.c', 'qjs_blob.c', 'qjs_crypto.c', 'qjs_time.c', 'qjs_http.c', 'qjs_rtree.c', 'qjs_spline.c', 'qjs_js.c', 'qjs_debug.c', 'picohttpparser.c', 'qjs_miniz.c', 'timer.c', 'qjs_socket.c' 'qjs_qr.c', 'qjs_wota.c', 'monocypher.c', 'qjs_blob.c', 'qjs_crypto.c', 'qjs_time.c', 'qjs_http.c', 'qjs_rtree.c', 'qjs_spline.c', 'qjs_js.c', 'qjs_debug.c', 'picohttpparser.c', 'qjs_miniz.c', 'timer.c', 'qjs_socket.c', 'qjs_kim.c', 'qjs_utf8.c'
] ]
# quirc src # quirc src
src += [ src += [

View File

@@ -3,6 +3,7 @@
/* -------- helper functions ----------------------------------------- */ /* -------- helper functions ----------------------------------------- */
var blob = use('blob') var blob = use('blob')
var utf8 = use('utf8')
// Convert number to string with given radix // Convert number to string with given radix
function to_radix(num, radix) { function to_radix(num, radix) {
@@ -179,113 +180,43 @@ function text() {
} }
// Default: interpret as UTF-8 text // Default: interpret as UTF-8 text
var byte_count = Math.floor(bit_length / 8); // Use the utf8 module to decode the blob
var bytes = []; return utf8.decode(arg);
// Read bytes from the blob
for (var i = 0; i < byte_count; i++) {
var byte_val = 0;
for (var j = 0; j < 8; j++) {
var bit_pos = i * 8 + j;
var bit = arg.read_logical(bit_pos);
if (bit) byte_val |= (1 << j);
}
bytes.push(byte_val);
}
// Convert bytes to UTF-8 string
var result = "";
var i = 0;
while (i < bytes.length) {
var b1 = bytes[i];
var codepoint;
var nextI;
if (b1 < 0x80) {
// 1-byte ASCII
codepoint = b1;
nextI = i + 1;
} else if (b1 < 0xC0) {
// Invalid start byte, treat as replacement character
codepoint = 0xFFFD;
nextI = i + 1;
} else if (b1 < 0xE0) {
// 2-byte sequence
if (i + 1 < bytes.length && (bytes[i + 1] & 0xC0) === 0x80) {
codepoint = ((b1 & 0x1F) << 6) | (bytes[i + 1] & 0x3F);
nextI = i + 2;
} else {
codepoint = 0xFFFD;
nextI = i + 1;
}
} else if (b1 < 0xF0) {
// 3-byte sequence
if (i + 2 < bytes.length &&
(bytes[i + 1] & 0xC0) === 0x80 &&
(bytes[i + 2] & 0xC0) === 0x80) {
codepoint = ((b1 & 0x0F) << 12) |
((bytes[i + 1] & 0x3F) << 6) |
(bytes[i + 2] & 0x3F);
nextI = i + 3;
} else {
codepoint = 0xFFFD;
nextI = i + 1;
}
} else if (b1 < 0xF8) {
// 4-byte sequence
if (i + 3 < bytes.length &&
(bytes[i + 1] & 0xC0) === 0x80 &&
(bytes[i + 2] & 0xC0) === 0x80 &&
(bytes[i + 3] & 0xC0) === 0x80) {
codepoint = ((b1 & 0x07) << 18) |
((bytes[i + 1] & 0x3F) << 12) |
((bytes[i + 2] & 0x3F) << 6) |
(bytes[i + 3] & 0x3F);
nextI = i + 4;
} else {
codepoint = 0xFFFD;
nextI = i + 1;
}
} else {
// Invalid start byte
codepoint = 0xFFFD;
nextI = i + 1;
}
// Convert codepoint to string
if (codepoint <= 0xFFFF) {
result += String.fromCharCode(codepoint);
} else if (codepoint <= 0x10FFFF) {
// Convert to surrogate pair for JavaScript
codepoint -= 0x10000;
result += String.fromCharCode(0xD800 + (codepoint >> 10));
result += String.fromCharCode(0xDC00 + (codepoint & 0x3FF));
} else {
result += String.fromCharCode(0xFFFD); // Replacement character
}
i = nextI;
}
return result;
} }
// Handle array conversion // Handle array conversion
if (Array.isArray(arg)) { if (Array.isArray(arg)) {
var separator = arguments[1] || ""; var separator = arguments[1] || "";
var result = "";
// Check if all items are valid codepoints
var all_codepoints = true;
for (var i = 0; i < arg.length; i++) { for (var i = 0; i < arg.length; i++) {
if (i > 0) result += separator;
var item = arg[i]; var item = arg[i];
if (typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item)) { if (!(typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item))) {
// Unicode codepoint all_codepoints = false;
result += String.fromCharCode(item); break;
} else {
result += String(item);
} }
} }
return result;
if (all_codepoints && separator === "") {
// Use utf8 module to convert codepoints to string
return utf8.from_codepoints(arg);
} else {
// General array to string conversion
var result = "";
for (var i = 0; i < arg.length; i++) {
if (i > 0) result += separator;
var item = arg[i];
if (typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item)) {
// Single codepoint - use utf8 module
result += utf8.from_codepoints([item]);
} else {
result += String(item);
}
}
return result;
}
} }
// Handle number conversion // Handle number conversion

View File

@@ -53,6 +53,8 @@
#include "qjs_debug.h" #include "qjs_debug.h"
#include "qjs_sdl_surface.h" #include "qjs_sdl_surface.h"
#include "qjs_sdl.h" #include "qjs_sdl.h"
#include "qjs_kim.h"
#include "qjs_utf8.h"
#ifndef NSTEAM #ifndef NSTEAM
#include "qjs_steam.h" #include "qjs_steam.h"
#endif #endif
@@ -1554,6 +1556,8 @@ void ffi_load(JSContext *js)
arrput(rt->module_registry, MISTLINE(http)); arrput(rt->module_registry, MISTLINE(http));
arrput(rt->module_registry, MISTLINE(crypto)); arrput(rt->module_registry, MISTLINE(crypto));
arrput(rt->module_registry, MISTLINE(miniz)); arrput(rt->module_registry, MISTLINE(miniz));
arrput(rt->module_registry, MISTLINE(kim));
arrput(rt->module_registry, MISTLINE(utf8));
// power user // power user
arrput(rt->module_registry, MISTLINE(js)); arrput(rt->module_registry, MISTLINE(js));

View File

@@ -10,17 +10,17 @@ void kim_to_utf8(char **kim, char **utf, int runes);
// Return the number of runes in a utf8 string // Return the number of runes in a utf8 string
int utf8_count(const char *utf8); int utf8_count(const char *utf8);
int decode_utf8(char **s);
void encode_utf8(char **s, int code);
void encode_kim(char **s, int code);
int decode_kim(char **s);
#ifdef KIM_IMPLEMENTATION #ifdef KIM_IMPLEMENTATION
#define KIM_CONT 0x80 #define KIM_CONT 0x80
#define KIM_DATA 0x7f #define KIM_DATA 0x7f
#define CONTINUE(CHAR) (CHAR>>7) #define CONTINUE(CHAR) (CHAR>>7)
int decode_utf8(char **s);
void encode_utf8(char **s, int code);
static void encode_kim(char **s, int code);
int decode_kim(char **s);
static inline int utf8_bytes(char c) static inline int utf8_bytes(char c)
{ {
int bytes = __builtin_clz(~(c)); int bytes = __builtin_clz(~(c));
@@ -70,7 +70,7 @@ void encode_utf8(char **s, int rune) {
} }
// write and advance s with rune in kim // write and advance s with rune in kim
static inline void encode_kim(char **s, int rune) void encode_kim(char **s, int rune)
{ {
if (rune < KIM_CONT) { if (rune < KIM_CONT) {
**s = 0 | (KIM_DATA & rune); **s = 0 | (KIM_DATA & rune);

View File

@@ -3,6 +3,7 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "kim.h"
/* Nota type nibble values */ /* Nota type nibble values */
#define NOTA_BLOB 0x00 #define NOTA_BLOB 0x00

82
source/qjs_kim.c Normal file
View File

@@ -0,0 +1,82 @@
#include "qjs_kim.h"
#include "qjs_blob.h"
#include "jsffi.h"
#include <string.h>
#include <stdlib.h>
#define KIM_IMPLEMENTATION
#include "kim.h"
JSC_CCALL(kim_encode,
const char *utf8_str = JS_ToCString(js, argv[0]);
if (!utf8_str) return JS_EXCEPTION;
// Count runes to estimate kim buffer size
int rune_count = utf8_count(utf8_str);
// Allocate kim buffer (worst case: 5 bytes per rune)
size_t kim_size = rune_count * 5;
char *kim_buffer = malloc(kim_size);
char *kim_ptr = kim_buffer;
// Encode utf8 to kim
long long runes_encoded;
utf8_to_kim(&utf8_str, &kim_ptr, &runes_encoded);
// Calculate actual size used
size_t actual_size = kim_ptr - kim_buffer;
// Create blob with the encoded data
ret = js_new_blob_stoned_copy(js, kim_buffer, actual_size);
free(kim_buffer);
JS_FreeCString(js, utf8_str);
)
JSC_CCALL(kim_decode,
size_t kim_len;
void *kim_data = js_get_blob_data(js, &kim_len, argv[0]);
if (!kim_data) return JS_ThrowTypeError(js, "Expected blob");
// Allocate UTF-8 buffer (worst case: 4 bytes per kim byte)
size_t utf8_size = kim_len * 4;
char *utf8_buffer = malloc(utf8_size + 1); // +1 for null terminator
char *utf8_ptr = utf8_buffer;
// Copy kim data since kim_to_utf8 modifies the pointer
char *kim_copy = malloc(kim_len);
memcpy(kim_copy, kim_data, kim_len);
char *kim_ptr = kim_copy;
// Count runes in kim data
int rune_count = 0;
char *temp_ptr = kim_copy;
while (temp_ptr < kim_copy + kim_len) {
decode_kim(&temp_ptr);
rune_count++;
}
// Reset pointer and decode
kim_ptr = kim_copy;
kim_to_utf8(&kim_ptr, &utf8_ptr, rune_count);
// Null terminate
*utf8_ptr = '\0';
ret = JS_NewString(js, utf8_buffer);
free(utf8_buffer);
free(kim_copy);
)
static const JSCFunctionListEntry js_kim_funcs[] = {
MIST_FUNC_DEF(kim, encode, 1),
MIST_FUNC_DEF(kim, decode, 1),
};
JSValue js_kim_use(JSContext *js)
{
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_kim_funcs, countof(js_kim_funcs));
return mod;
}

8
source/qjs_kim.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef QJS_KIM_H
#define QJS_KIM_H
#include "cell.h"
JSValue js_kim_use(JSContext*);
#endif

211
source/qjs_utf8.c Normal file
View File

@@ -0,0 +1,211 @@
#include "qjs_utf8.h"
#include "qjs_blob.h"
#include "jsffi.h"
#include <string.h>
#include <stdlib.h>
#include "kim.h"
// Get codepoints from a UTF-8 string
JSC_CCALL(utf8_codepoints,
const char *str = JS_ToCString(js, argv[0]);
if (!str) return JS_EXCEPTION;
JSValue arr = JS_NewArray(js);
int idx = 0;
char *ptr = (char*)str;
while (*ptr) {
int codepoint = decode_utf8(&ptr);
JS_SetPropertyUint32(js, arr, idx++, JS_NewInt32(js, codepoint));
}
JS_FreeCString(js, str);
ret = arr;
)
// Create UTF-8 string from codepoints
JSC_CCALL(utf8_from_codepoints,
int len = JS_ArrayLength(js, argv[0]);
// Allocate buffer (worst case: 4 bytes per codepoint + null)
char *buffer = malloc(len * 4 + 1);
char *ptr = buffer;
for (int i = 0; i < len; i++) {
JSValue val = JS_GetPropertyUint32(js, argv[0], i);
int codepoint;
JS_ToInt32(js, &codepoint, val);
JS_FreeValue(js, val);
encode_utf8(&ptr, codepoint);
}
*ptr = '\0';
ret = JS_NewString(js, buffer);
free(buffer);
)
// Count UTF-8 characters (runes) in a string
JSC_SCALL(utf8_length,
int count = utf8_count(str);
ret = JS_NewInt32(js, count);
)
// Validate UTF-8 string
JSC_SCALL(utf8_validate,
char *ptr = (char*)str;
int valid = 1;
while (*ptr) {
int start_pos = ptr - str;
int codepoint = decode_utf8(&ptr);
// Check for invalid sequences
if (codepoint < 0 || codepoint > 0x10FFFF ||
(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
valid = 0;
break;
}
// Check for overlong encodings
int bytes_used = ptr - (str + start_pos);
if ((codepoint <= 0x7F && bytes_used != 1) ||
(codepoint <= 0x7FF && bytes_used != 2) ||
(codepoint <= 0xFFFF && bytes_used != 3) ||
(codepoint <= 0x10FFFF && bytes_used != 4)) {
valid = 0;
break;
}
}
ret = JS_NewBool(js, valid);
)
// Get byte length of UTF-8 string
JSC_SCALL(utf8_byte_length,
ret = JS_NewInt32(js, strlen(str));
)
// Encode string to UTF-8 bytes
JSC_SCALL(utf8_encode,
size_t len = strlen(str);
ret = js_new_blob_stoned_copy(js, str, len);
)
// Decode UTF-8 bytes to string
JSC_CCALL(utf8_decode,
size_t len;
void *data = js_get_blob_data(js, &len, argv[0]);
if (!data) return JS_ThrowTypeError(js, "Expected blob");
// Create null-terminated string
char *str = malloc(len + 1);
memcpy(str, data, len);
str[len] = '\0';
ret = JS_NewString(js, str);
free(str);
)
// Slice UTF-8 string by character indices (not byte indices)
JSC_CCALL(utf8_slice,
const char *str = JS_ToCString(js, argv[0]);
if (!str) return JS_EXCEPTION;
int start = 0;
int end = utf8_count(str);
if (argc > 1) JS_ToInt32(js, &start, argv[1]);
if (argc > 2) JS_ToInt32(js, &end, argv[2]);
// Handle negative indices
int total = end;
if (start < 0) start = total + start;
if (end < 0) end = total + end;
// Clamp values
if (start < 0) start = 0;
if (end > total) end = total;
if (start >= end) {
JS_FreeCString(js, str);
return JS_NewString(js, "");
}
// Find start position
char *ptr = (char*)str;
for (int i = 0; i < start && *ptr; i++) {
decode_utf8(&ptr);
}
char *start_ptr = ptr;
// Find end position
for (int i = start; i < end && *ptr; i++) {
decode_utf8(&ptr);
}
// Create substring
size_t slice_len = ptr - start_ptr;
char *slice = malloc(slice_len + 1);
memcpy(slice, start_ptr, slice_len);
slice[slice_len] = '\0';
ret = JS_NewString(js, slice);
free(slice);
JS_FreeCString(js, str);
)
// Get character at index
JSC_CCALL(utf8_char_at,
const char *str = JS_ToCString(js, argv[0]);
if (!str) return JS_EXCEPTION;
int index;
JS_ToInt32(js, &index, argv[1]);
char *ptr = (char*)str;
int count = 0;
// Skip to index
while (*ptr && count < index) {
decode_utf8(&ptr);
count++;
}
if (!*ptr || count != index) {
JS_FreeCString(js, str);
return JS_UNDEFINED;
}
// Get the character
char *char_start = ptr;
decode_utf8(&ptr);
size_t char_len = ptr - char_start;
char *result = malloc(char_len + 1);
memcpy(result, char_start, char_len);
result[char_len] = '\0';
ret = JS_NewString(js, result);
free(result);
JS_FreeCString(js, str);
)
static const JSCFunctionListEntry js_utf8_funcs[] = {
MIST_FUNC_DEF(utf8, codepoints, 1),
MIST_FUNC_DEF(utf8, from_codepoints, 1),
MIST_FUNC_DEF(utf8, length, 1),
MIST_FUNC_DEF(utf8, validate, 1),
MIST_FUNC_DEF(utf8, byte_length, 1),
MIST_FUNC_DEF(utf8, encode, 1),
MIST_FUNC_DEF(utf8, decode, 1),
MIST_FUNC_DEF(utf8, slice, 3),
MIST_FUNC_DEF(utf8, char_at, 2),
};
JSValue js_utf8_use(JSContext *js)
{
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js, mod, js_utf8_funcs, countof(js_utf8_funcs));
return mod;
}

8
source/qjs_utf8.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef QJS_UTF8_H
#define QJS_UTF8_H
#include "cell.h"
JSValue js_utf8_use(JSContext*);
#endif

51
tests/kim.ce Normal file
View File

@@ -0,0 +1,51 @@
var kim = use("kim");
var blob = use('blob')
// Test basic ASCII
var test1 = "Hello, World!";
var encoded1 = kim.encode(test1);
var decoded1 = kim.decode(encoded1);
log.console("ASCII test:", test1 === decoded1 ? "PASS" : "FAIL");
if (test1 !== decoded1) {
log.console(" Expected:", test1);
log.console(" Got:", decoded1);
}
// Test Unicode characters
var test2 = "Hello, 世界! 🌍 Привет мир";
var encoded2 = kim.encode(test2);
var decoded2 = kim.decode(encoded2);
log.console("Unicode test:", test2 === decoded2 ? "PASS" : "FAIL");
if (test2 !== decoded2) {
log.console(" Expected:", test2);
log.console(" Got:", decoded2);
}
// Test empty string
var test3 = "";
var encoded3 = kim.encode(test3);
log.console(typeof encoded3)
log.console(encoded3 instanceof blob)
var decoded3 = kim.decode(encoded3);
log.console("Empty string test:", test3 === decoded3 ? "PASS" : "FAIL");
// Test various Unicode ranges
var test4 = "αβγδε АБВГД 你好 😀😎🎉 ∑∏∫";
var encoded4 = kim.encode(test4);
var decoded4 = kim.decode(encoded4);
log.console("Mixed Unicode test:", test4 === decoded4 ? "PASS" : "FAIL");
if (test4 !== decoded4) {
log.console(" Expected:", test4);
log.console(" Got:", decoded4);
}
// Test efficiency - KIM should be smaller for high codepoints
var highCodepoints = "🌍🌎🌏🗺️🧭";
var encodedHigh = kim.encode(highCodepoints);
var utf8Bytes = new Blob([highCodepoints]).size;
log.console("High codepoint efficiency:");
log.console(" UTF-8 bytes:", utf8Bytes);
log.console(" KIM bytes:", encodedHigh.byteLength);
log.console(" Savings:", utf8Bytes - encodedHigh.byteLength, "bytes");
log.console("\nAll tests completed!");

47
tests/text_utf8.ce Normal file
View File

@@ -0,0 +1,47 @@
var text = use('text');
var blob = use('blob');
var utf8 = use('utf8');
// Test blob to text conversion
var test_string = "Hello, 世界! 🌍";
var encoded_blob = utf8.encode(test_string);
var decoded_text = text(encoded_blob);
log.console("Blob to text test:");
log.console(" Original:", test_string);
log.console(" Decoded:", decoded_text);
log.console(" Match:", test_string === decoded_text ? "PASS" : "FAIL");
// Test array of codepoints conversion
var codepoints = [72, 101, 108, 108, 111, 44, 32, 19990, 30028, 33, 32, 127757];
var from_codepoints = text(codepoints);
log.console("\nCodepoints to text test:");
log.console(" From codepoints:", from_codepoints);
log.console(" Match:", from_codepoints === test_string ? "PASS" : "FAIL");
// Test array with separator
var words = ["Hello", "world", "from", "text"];
var joined = text(words, " ");
log.console("\nArray with separator test:");
log.console(" Joined:", joined);
log.console(" Expected: Hello world from text");
log.console(" Match:", joined === "Hello world from text" ? "PASS" : "FAIL");
// Test mixed array with codepoints
var mixed = [72, "ello", 32, "world"];
var mixed_result = text(mixed, "");
log.console("\nMixed array test:");
log.console(" Result:", mixed_result);
log.console(" Expected: Hello world");
log.console(" Match:", mixed_result === "Hello world" ? "PASS" : "FAIL");
// Test blob encoding formats still work
var test_data = utf8.encode("ABC");
log.console("\nBlob format tests:");
log.console(" Hex:", text(test_data, "h"));
log.console(" Binary:", text(test_data, "b"));
log.console(" Octal:", text(test_data, "o"));
log.console("\nAll tests completed!");
$_.stop();

70
tests/utf8.ce Normal file
View File

@@ -0,0 +1,70 @@
var utf8 = use("utf8");
// Test character counting vs byte counting
var test1 = "Hello";
log.console("ASCII length test:");
log.console(" Characters:", utf8.length(test1));
log.console(" Bytes:", utf8.byte_length(test1));
log.console(" Match:", utf8.length(test1) === utf8.byte_length(test1) ? "PASS" : "FAIL");
var test2 = "Hello 世界";
log.console("\nMixed ASCII/Unicode length test:");
log.console(" Characters:", utf8.length(test2));
log.console(" Bytes:", utf8.byte_length(test2));
log.console(" Bytes > Characters:", utf8.byte_length(test2) > utf8.length(test2) ? "PASS" : "FAIL");
// Test codepoints
var test3 = "A😀B";
var codepoints = utf8.codepoints(test3);
log.console("\nCodepoints test:");
log.console(" String:", test3);
log.console(" Codepoints:", codepoints);
log.console(" A=65:", codepoints[0] === 65 ? "PASS" : "FAIL");
log.console(" 😀=128512:", codepoints[1] === 128512 ? "PASS" : "FAIL");
log.console(" B=66:", codepoints[2] === 66 ? "PASS" : "FAIL");
// Test from_codepoints
var reconstructed = utf8.from_codepoints(codepoints);
log.console(" Reconstructed:", reconstructed);
log.console(" Match:", test3 === reconstructed ? "PASS" : "FAIL");
// Test encode/decode
var test4 = "UTF-8 encoding: 你好世界 🌍";
var encoded = utf8.encode(test4);
var decoded = utf8.decode(encoded);
log.console("\nEncode/decode test:");
log.console(" Original:", test4);
log.console(" Decoded:", decoded);
log.console(" Match:", test4 === decoded ? "PASS" : "FAIL");
// Test validation
log.console("\nValidation tests:");
log.console(" Valid UTF-8:", utf8.validate("Hello 世界") ? "PASS" : "FAIL");
// Test slicing
var test5 = "Hello 世界!";
log.console("\nSlice tests:");
log.console(" Original:", test5);
log.console(" slice(0, 5):", utf8.slice(test5, 0, 5));
log.console(" slice(6, 8):", utf8.slice(test5, 6, 8));
log.console(" slice(-3):", utf8.slice(test5, -3));
log.console(" slice(0, -1):", utf8.slice(test5, 0, -1));
// Test char_at
log.console("\nchar_at tests:");
log.console(" char_at(0):", utf8.char_at(test5, 0));
log.console(" char_at(6):", utf8.char_at(test5, 6));
log.console(" char_at(7):", utf8.char_at(test5, 7));
log.console(" char_at(100):", utf8.char_at(test5, 100));
// Test with emoji sequences
var test6 = "👨‍👩‍👧‍👦";
log.console("\nComplex emoji test:");
log.console(" String:", test6);
log.console(" Length:", utf8.length(test6));
log.console(" Byte length:", utf8.byte_length(test6));
log.console(" Codepoints:", utf8.codepoints(test6).length);
log.console("\nAll tests completed!");
$_.stop()