add utf8 and kim text encoder/decoders

2025-06-07 23:35:19 -05:00
parent d039e2cfe6
commit cbda7dfbc9
12 changed files with 518 additions and 105 deletions
--- a/meson.build
+++ b/meson.build
@@ -295,7 +295,7 @@ src += [
  'anim.c', 'config.c', 'datastream.c','font.c','HandmadeMath.c','jsffi.c','model.c',
  'render.c','simplex.c','spline.c', 'transform.c','cell.c',  'wildmatch.c',
  'sprite.c', 'rtree.c', 'qjs_nota.c', 'qjs_soloud.c', 'qjs_sdl.c', 'qjs_sdl_input.c', 'qjs_sdl_video.c', 'qjs_sdl_surface.c', 'qjs_math.c', 'qjs_geometry.c', 'qjs_transform.c', 'qjs_sprite.c', 'qjs_io.c', 'qjs_fd.c', 'qjs_os.c', 'qjs_actor.c',
-  'qjs_qr.c', 'qjs_wota.c', 'monocypher.c', 'qjs_blob.c', 'qjs_crypto.c', 'qjs_time.c', 'qjs_http.c', 'qjs_rtree.c', 'qjs_spline.c', 'qjs_js.c', 'qjs_debug.c', 'picohttpparser.c', 'qjs_miniz.c', 'timer.c', 'qjs_socket.c'
+  'qjs_qr.c', 'qjs_wota.c', 'monocypher.c', 'qjs_blob.c', 'qjs_crypto.c', 'qjs_time.c', 'qjs_http.c', 'qjs_rtree.c', 'qjs_spline.c', 'qjs_js.c', 'qjs_debug.c', 'picohttpparser.c', 'qjs_miniz.c', 'timer.c', 'qjs_socket.c', 'qjs_kim.c', 'qjs_utf8.c'
 ]
 # quirc src
 src += [
--- a/scripts/text.cm
+++ b/scripts/text.cm
@@ -3,6 +3,7 @@
 /* -------- helper functions ----------------------------------------- */
 var blob = use('blob')
 var utf8 = use('utf8')
 // Convert number to string with given radix
 function to_radix(num, radix) {
@@ -179,113 +180,43 @@ function text() {
    }
    // Default: interpret as UTF-8 text
-    var byte_count = Math.floor(bit_length / 8);
+    // Use the utf8 module to decode the blob
-    var bytes = [];
+    return utf8.decode(arg);
    // Read bytes from the blob
    for (var i = 0; i < byte_count; i++) {
      var byte_val = 0;
      for (var j = 0; j < 8; j++) {
        var bit_pos = i * 8 + j;
        var bit = arg.read_logical(bit_pos);
        if (bit) byte_val |= (1 << j);
      }
      bytes.push(byte_val);
    }
    // Convert bytes to UTF-8 string
    var result = "";
    var i = 0;
    while (i < bytes.length) {
      var b1 = bytes[i];
      var codepoint;
      var nextI;
      if (b1 < 0x80) {
        // 1-byte ASCII
        codepoint = b1;
        nextI = i + 1;
      } else if (b1 < 0xC0) {
        // Invalid start byte, treat as replacement character
        codepoint = 0xFFFD;
        nextI = i + 1;
      } else if (b1 < 0xE0) {
        // 2-byte sequence
        if (i + 1 < bytes.length && (bytes[i + 1] & 0xC0) === 0x80) {
          codepoint = ((b1 & 0x1F) << 6) | (bytes[i + 1] & 0x3F);
          nextI = i + 2;
        } else {
          codepoint = 0xFFFD;
          nextI = i + 1;
        }
      } else if (b1 < 0xF0) {
        // 3-byte sequence
        if (i + 2 < bytes.length && 
            (bytes[i + 1] & 0xC0) === 0x80 && 
            (bytes[i + 2] & 0xC0) === 0x80) {
          codepoint = ((b1 & 0x0F) << 12) | 
                      ((bytes[i + 1] & 0x3F) << 6) | 
                      (bytes[i + 2] & 0x3F);
          nextI = i + 3;
        } else {
          codepoint = 0xFFFD;
          nextI = i + 1;
        }
      } else if (b1 < 0xF8) {
        // 4-byte sequence
        if (i + 3 < bytes.length && 
            (bytes[i + 1] & 0xC0) === 0x80 && 
            (bytes[i + 2] & 0xC0) === 0x80 && 
            (bytes[i + 3] & 0xC0) === 0x80) {
          codepoint = ((b1 & 0x07) << 18) | 
                      ((bytes[i + 1] & 0x3F) << 12) | 
                      ((bytes[i + 2] & 0x3F) << 6) | 
                      (bytes[i + 3] & 0x3F);
          nextI = i + 4;
        } else {
          codepoint = 0xFFFD;
          nextI = i + 1;
        }
      } else {
        // Invalid start byte
        codepoint = 0xFFFD;
        nextI = i + 1;
      }
      // Convert codepoint to string
      if (codepoint <= 0xFFFF) {
        result += String.fromCharCode(codepoint);
      } else if (codepoint <= 0x10FFFF) {
        // Convert to surrogate pair for JavaScript
        codepoint -= 0x10000;
        result += String.fromCharCode(0xD800 + (codepoint >> 10));
        result += String.fromCharCode(0xDC00 + (codepoint & 0x3FF));
      } else {
        result += String.fromCharCode(0xFFFD); // Replacement character
      }
      i = nextI;
    }
    return result;
  }
  // Handle array conversion
  if (Array.isArray(arg)) {
    var separator = arguments[1] || "";
-    var result = "";
+    
    // Check if all items are valid codepoints
    var all_codepoints = true;
    for (var i = 0; i < arg.length; i++) {
      if (i > 0) result += separator;
      var item = arg[i];
-      if (typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item)) {
+      if (!(typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item))) {
-        // Unicode codepoint
+        all_codepoints = false;
-        result += String.fromCharCode(item);
+        break;
      } else {
        result += String(item);
      }
    }
-    return result;
+    
    if (all_codepoints && separator === "") {
      // Use utf8 module to convert codepoints to string
      return utf8.from_codepoints(arg);
    } else {
      // General array to string conversion
      var result = "";
      for (var i = 0; i < arg.length; i++) {
        if (i > 0) result += separator;
        var item = arg[i];
        if (typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item)) {
          // Single codepoint - use utf8 module
          result += utf8.from_codepoints([item]);
        } else {
          result += String(item);
        }
      }
      return result;
    }
  }
  // Handle number conversion
--- a/source/jsffi.c
+++ b/source/jsffi.c
@@ -53,6 +53,8 @@
 #include "qjs_debug.h"
 #include "qjs_sdl_surface.h"
 #include "qjs_sdl.h"
 #include "qjs_kim.h"
 #include "qjs_utf8.h"
 #ifndef NSTEAM
 #include "qjs_steam.h"
 #endif
@@ -1554,6 +1556,8 @@ void ffi_load(JSContext *js)
  arrput(rt->module_registry, MISTLINE(http));
  arrput(rt->module_registry, MISTLINE(crypto));
  arrput(rt->module_registry, MISTLINE(miniz));
  arrput(rt->module_registry, MISTLINE(kim));
  arrput(rt->module_registry, MISTLINE(utf8));
  // power user
  arrput(rt->module_registry, MISTLINE(js));
--- a/source/kim.h
+++ b/source/kim.h
@@ -10,17 +10,17 @@ void kim_to_utf8(char **kim, char **utf, int runes);
 // Return the number of runes in a utf8 string
 int utf8_count(const char *utf8);
 int decode_utf8(char **s);
 void encode_utf8(char **s, int code);
 void encode_kim(char **s, int code);
 int decode_kim(char **s);
 #ifdef KIM_IMPLEMENTATION
 #define KIM_CONT 0x80
 #define KIM_DATA 0x7f
 #define CONTINUE(CHAR) (CHAR>>7)
 int decode_utf8(char **s);
 void encode_utf8(char **s, int code);
 static void encode_kim(char **s, int code);
 int decode_kim(char **s);
 static inline int utf8_bytes(char c)
 {
  int bytes = __builtin_clz(~(c));
@@ -70,7 +70,7 @@ void encode_utf8(char **s, int rune) {
 }
 // write and advance s with rune in kim
-static inline void encode_kim(char **s, int rune)
+void encode_kim(char **s, int rune)
 {
  if (rune < KIM_CONT) {
    **s = 0 | (KIM_DATA & rune);
--- a/source/nota.h
+++ b/source/nota.h
@@ -3,6 +3,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "kim.h"
 /* Nota type nibble values */
 #define NOTA_BLOB  0x00
--- a/source/qjs_kim.c
+++ b/source/qjs_kim.c
@@ -0,0 +1,82 @@
 #include "qjs_kim.h"
 #include "qjs_blob.h"
 #include "jsffi.h"
 #include <string.h>
 #include <stdlib.h>
 #define KIM_IMPLEMENTATION
 #include "kim.h"
 JSC_CCALL(kim_encode,
  const char *utf8_str = JS_ToCString(js, argv[0]);
  if (!utf8_str) return JS_EXCEPTION;
  // Count runes to estimate kim buffer size
  int rune_count = utf8_count(utf8_str);
  // Allocate kim buffer (worst case: 5 bytes per rune)
  size_t kim_size = rune_count * 5;
  char *kim_buffer = malloc(kim_size);
  char *kim_ptr = kim_buffer;
  // Encode utf8 to kim
  long long runes_encoded;
  utf8_to_kim(&utf8_str, &kim_ptr, &runes_encoded);
  // Calculate actual size used
  size_t actual_size = kim_ptr - kim_buffer;
  // Create blob with the encoded data
  ret = js_new_blob_stoned_copy(js, kim_buffer, actual_size);
  free(kim_buffer);
  JS_FreeCString(js, utf8_str);
 )
 JSC_CCALL(kim_decode,
  size_t kim_len;
  void *kim_data = js_get_blob_data(js, &kim_len, argv[0]);
  if (!kim_data) return JS_ThrowTypeError(js, "Expected blob");
  // Allocate UTF-8 buffer (worst case: 4 bytes per kim byte)
  size_t utf8_size = kim_len * 4;
  char *utf8_buffer = malloc(utf8_size + 1); // +1 for null terminator
  char *utf8_ptr = utf8_buffer;
  // Copy kim data since kim_to_utf8 modifies the pointer
  char *kim_copy = malloc(kim_len);
  memcpy(kim_copy, kim_data, kim_len);
  char *kim_ptr = kim_copy;
  // Count runes in kim data
  int rune_count = 0;
  char *temp_ptr = kim_copy;
  while (temp_ptr < kim_copy + kim_len) {
    decode_kim(&temp_ptr);
    rune_count++;
  }
  // Reset pointer and decode
  kim_ptr = kim_copy;
  kim_to_utf8(&kim_ptr, &utf8_ptr, rune_count);
  // Null terminate
  *utf8_ptr = '\0';
  ret = JS_NewString(js, utf8_buffer);
  free(utf8_buffer);
  free(kim_copy);
 )
 static const JSCFunctionListEntry js_kim_funcs[] = {
  MIST_FUNC_DEF(kim, encode, 1),
  MIST_FUNC_DEF(kim, decode, 1),
 };
 JSValue js_kim_use(JSContext *js)
 {
  JSValue mod = JS_NewObject(js);
  JS_SetPropertyFunctionList(js, mod, js_kim_funcs, countof(js_kim_funcs));
  return mod;
 }
--- a/source/qjs_kim.h
+++ b/source/qjs_kim.h
@@ -0,0 +1,8 @@
 #ifndef QJS_KIM_H
 #define QJS_KIM_H
 #include "cell.h"
 JSValue js_kim_use(JSContext*);
 #endif
--- a/source/qjs_utf8.c
+++ b/source/qjs_utf8.c
@@ -0,0 +1,211 @@
 #include "qjs_utf8.h"
 #include "qjs_blob.h"
 #include "jsffi.h"
 #include <string.h>
 #include <stdlib.h>
 #include "kim.h"
 // Get codepoints from a UTF-8 string
 JSC_CCALL(utf8_codepoints,
  const char *str = JS_ToCString(js, argv[0]);
  if (!str) return JS_EXCEPTION;
  JSValue arr = JS_NewArray(js);
  int idx = 0;
  char *ptr = (char*)str;
  while (*ptr) {
    int codepoint = decode_utf8(&ptr);
    JS_SetPropertyUint32(js, arr, idx++, JS_NewInt32(js, codepoint));
  }
  JS_FreeCString(js, str);
  ret = arr;
 )
 // Create UTF-8 string from codepoints
 JSC_CCALL(utf8_from_codepoints,
  int len = JS_ArrayLength(js, argv[0]);
  // Allocate buffer (worst case: 4 bytes per codepoint + null)
  char *buffer = malloc(len * 4 + 1);
  char *ptr = buffer;
  for (int i = 0; i < len; i++) {
    JSValue val = JS_GetPropertyUint32(js, argv[0], i);
    int codepoint;
    JS_ToInt32(js, &codepoint, val);
    JS_FreeValue(js, val);
    encode_utf8(&ptr, codepoint);
  }
  *ptr = '\0';
  ret = JS_NewString(js, buffer);
  free(buffer);
 )
 // Count UTF-8 characters (runes) in a string
 JSC_SCALL(utf8_length,
  int count = utf8_count(str);
  ret = JS_NewInt32(js, count);
 )
 // Validate UTF-8 string
 JSC_SCALL(utf8_validate,
  char *ptr = (char*)str;
  int valid = 1;
  while (*ptr) {
    int start_pos = ptr - str;
    int codepoint = decode_utf8(&ptr);
    // Check for invalid sequences
    if (codepoint < 0 || codepoint > 0x10FFFF || 
        (codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
      valid = 0;
      break;
    }
    // Check for overlong encodings
    int bytes_used = ptr - (str + start_pos);
    if ((codepoint <= 0x7F && bytes_used != 1) ||
        (codepoint <= 0x7FF && bytes_used != 2) ||
        (codepoint <= 0xFFFF && bytes_used != 3) ||
        (codepoint <= 0x10FFFF && bytes_used != 4)) {
      valid = 0;
      break;
    }
  }
  ret = JS_NewBool(js, valid);
 )
 // Get byte length of UTF-8 string
 JSC_SCALL(utf8_byte_length,
  ret = JS_NewInt32(js, strlen(str));
 )
 // Encode string to UTF-8 bytes
 JSC_SCALL(utf8_encode,
  size_t len = strlen(str);
  ret = js_new_blob_stoned_copy(js, str, len);
 )
 // Decode UTF-8 bytes to string
 JSC_CCALL(utf8_decode,
  size_t len;
  void *data = js_get_blob_data(js, &len, argv[0]);
  if (!data) return JS_ThrowTypeError(js, "Expected blob");
  // Create null-terminated string
  char *str = malloc(len + 1);
  memcpy(str, data, len);
  str[len] = '\0';
  ret = JS_NewString(js, str);
  free(str);
 )
 // Slice UTF-8 string by character indices (not byte indices)
 JSC_CCALL(utf8_slice,
  const char *str = JS_ToCString(js, argv[0]);
  if (!str) return JS_EXCEPTION;
  int start = 0;
  int end = utf8_count(str);
  if (argc > 1) JS_ToInt32(js, &start, argv[1]);
  if (argc > 2) JS_ToInt32(js, &end, argv[2]);
  // Handle negative indices
  int total = end;
  if (start < 0) start = total + start;
  if (end < 0) end = total + end;
  // Clamp values
  if (start < 0) start = 0;
  if (end > total) end = total;
  if (start >= end) {
    JS_FreeCString(js, str);
    return JS_NewString(js, "");
  }
  // Find start position
  char *ptr = (char*)str;
  for (int i = 0; i < start && *ptr; i++) {
    decode_utf8(&ptr);
  }
  char *start_ptr = ptr;
  // Find end position
  for (int i = start; i < end && *ptr; i++) {
    decode_utf8(&ptr);
  }
  // Create substring
  size_t slice_len = ptr - start_ptr;
  char *slice = malloc(slice_len + 1);
  memcpy(slice, start_ptr, slice_len);
  slice[slice_len] = '\0';
  ret = JS_NewString(js, slice);
  free(slice);
  JS_FreeCString(js, str);
 )
 // Get character at index
 JSC_CCALL(utf8_char_at,
  const char *str = JS_ToCString(js, argv[0]);
  if (!str) return JS_EXCEPTION;
  int index;
  JS_ToInt32(js, &index, argv[1]);
  char *ptr = (char*)str;
  int count = 0;
  // Skip to index
  while (*ptr && count < index) {
    decode_utf8(&ptr);
    count++;
  }
  if (!*ptr || count != index) {
    JS_FreeCString(js, str);
    return JS_UNDEFINED;
  }
  // Get the character
  char *char_start = ptr;
  decode_utf8(&ptr);
  size_t char_len = ptr - char_start;
  char *result = malloc(char_len + 1);
  memcpy(result, char_start, char_len);
  result[char_len] = '\0';
  ret = JS_NewString(js, result);
  free(result);
  JS_FreeCString(js, str);
 )
 static const JSCFunctionListEntry js_utf8_funcs[] = {
  MIST_FUNC_DEF(utf8, codepoints, 1),
  MIST_FUNC_DEF(utf8, from_codepoints, 1),
  MIST_FUNC_DEF(utf8, length, 1),
  MIST_FUNC_DEF(utf8, validate, 1),
  MIST_FUNC_DEF(utf8, byte_length, 1),
  MIST_FUNC_DEF(utf8, encode, 1),
  MIST_FUNC_DEF(utf8, decode, 1),
  MIST_FUNC_DEF(utf8, slice, 3),
  MIST_FUNC_DEF(utf8, char_at, 2),
 };
 JSValue js_utf8_use(JSContext *js)
 {
  JSValue mod = JS_NewObject(js);
  JS_SetPropertyFunctionList(js, mod, js_utf8_funcs, countof(js_utf8_funcs));
  return mod;
 }
--- a/source/qjs_utf8.h
+++ b/source/qjs_utf8.h
@@ -0,0 +1,8 @@
 #ifndef QJS_UTF8_H
 #define QJS_UTF8_H
 #include "cell.h"
 JSValue js_utf8_use(JSContext*);
 #endif
--- a/tests/kim.ce
+++ b/tests/kim.ce
@@ -0,0 +1,51 @@
 var kim = use("kim");
 var blob = use('blob')
 // Test basic ASCII
 var test1 = "Hello, World!";
 var encoded1 = kim.encode(test1);
 var decoded1 = kim.decode(encoded1);
 log.console("ASCII test:", test1 === decoded1 ? "PASS" : "FAIL");
 if (test1 !== decoded1) {
  log.console("  Expected:", test1);
  log.console("  Got:", decoded1);
 }
 // Test Unicode characters
 var test2 = "Hello, 世界! 🌍 Привет мир";
 var encoded2 = kim.encode(test2);
 var decoded2 = kim.decode(encoded2);
 log.console("Unicode test:", test2 === decoded2 ? "PASS" : "FAIL");
 if (test2 !== decoded2) {
  log.console("  Expected:", test2);
  log.console("  Got:", decoded2);
 }
 // Test empty string
 var test3 = "";
 var encoded3 = kim.encode(test3);
 log.console(typeof encoded3)
 log.console(encoded3 instanceof blob)
 var decoded3 = kim.decode(encoded3);
 log.console("Empty string test:", test3 === decoded3 ? "PASS" : "FAIL");
 // Test various Unicode ranges
 var test4 = "αβγδε АБВГД 你好 😀😎🎉 ∑∏∫";
 var encoded4 = kim.encode(test4);
 var decoded4 = kim.decode(encoded4);
 log.console("Mixed Unicode test:", test4 === decoded4 ? "PASS" : "FAIL");
 if (test4 !== decoded4) {
  log.console("  Expected:", test4);
  log.console("  Got:", decoded4);
 }
 // Test efficiency - KIM should be smaller for high codepoints
 var highCodepoints = "🌍🌎🌏🗺️🧭";
 var encodedHigh = kim.encode(highCodepoints);
 var utf8Bytes = new Blob([highCodepoints]).size;
 log.console("High codepoint efficiency:");
 log.console("  UTF-8 bytes:", utf8Bytes);
 log.console("  KIM bytes:", encodedHigh.byteLength);
 log.console("  Savings:", utf8Bytes - encodedHigh.byteLength, "bytes");
 log.console("\nAll tests completed!");
--- a/tests/text_utf8.ce
+++ b/tests/text_utf8.ce
@@ -0,0 +1,47 @@
 var text = use('text');
 var blob = use('blob');
 var utf8 = use('utf8');
 // Test blob to text conversion
 var test_string = "Hello, 世界! 🌍";
 var encoded_blob = utf8.encode(test_string);
 var decoded_text = text(encoded_blob);
 log.console("Blob to text test:");
 log.console("  Original:", test_string);
 log.console("  Decoded:", decoded_text);
 log.console("  Match:", test_string === decoded_text ? "PASS" : "FAIL");
 // Test array of codepoints conversion
 var codepoints = [72, 101, 108, 108, 111, 44, 32, 19990, 30028, 33, 32, 127757];
 var from_codepoints = text(codepoints);
 log.console("\nCodepoints to text test:");
 log.console("  From codepoints:", from_codepoints);
 log.console("  Match:", from_codepoints === test_string ? "PASS" : "FAIL");
 // Test array with separator
 var words = ["Hello", "world", "from", "text"];
 var joined = text(words, " ");
 log.console("\nArray with separator test:");
 log.console("  Joined:", joined);
 log.console("  Expected: Hello world from text");
 log.console("  Match:", joined === "Hello world from text" ? "PASS" : "FAIL");
 // Test mixed array with codepoints
 var mixed = [72, "ello", 32, "world"];
 var mixed_result = text(mixed, "");
 log.console("\nMixed array test:");
 log.console("  Result:", mixed_result);
 log.console("  Expected: Hello world");
 log.console("  Match:", mixed_result === "Hello world" ? "PASS" : "FAIL");
 // Test blob encoding formats still work
 var test_data = utf8.encode("ABC");
 log.console("\nBlob format tests:");
 log.console("  Hex:", text(test_data, "h"));
 log.console("  Binary:", text(test_data, "b"));
 log.console("  Octal:", text(test_data, "o"));
 log.console("\nAll tests completed!");
 $_.stop();
--- a/tests/utf8.ce
+++ b/tests/utf8.ce
@@ -0,0 +1,70 @@
 var utf8 = use("utf8");
 // Test character counting vs byte counting
 var test1 = "Hello";
 log.console("ASCII length test:");
 log.console("  Characters:", utf8.length(test1));
 log.console("  Bytes:", utf8.byte_length(test1));
 log.console("  Match:", utf8.length(test1) === utf8.byte_length(test1) ? "PASS" : "FAIL");
 var test2 = "Hello 世界";
 log.console("\nMixed ASCII/Unicode length test:");
 log.console("  Characters:", utf8.length(test2));
 log.console("  Bytes:", utf8.byte_length(test2));
 log.console("  Bytes > Characters:", utf8.byte_length(test2) > utf8.length(test2) ? "PASS" : "FAIL");
 // Test codepoints
 var test3 = "A😀B";
 var codepoints = utf8.codepoints(test3);
 log.console("\nCodepoints test:");
 log.console("  String:", test3);
 log.console("  Codepoints:", codepoints);
 log.console("  A=65:", codepoints[0] === 65 ? "PASS" : "FAIL");
 log.console("  😀=128512:", codepoints[1] === 128512 ? "PASS" : "FAIL");
 log.console("  B=66:", codepoints[2] === 66 ? "PASS" : "FAIL");
 // Test from_codepoints
 var reconstructed = utf8.from_codepoints(codepoints);
 log.console("  Reconstructed:", reconstructed);
 log.console("  Match:", test3 === reconstructed ? "PASS" : "FAIL");
 // Test encode/decode
 var test4 = "UTF-8 encoding: 你好世界 🌍";
 var encoded = utf8.encode(test4);
 var decoded = utf8.decode(encoded);
 log.console("\nEncode/decode test:");
 log.console("  Original:", test4);
 log.console("  Decoded:", decoded);
 log.console("  Match:", test4 === decoded ? "PASS" : "FAIL");
 // Test validation
 log.console("\nValidation tests:");
 log.console("  Valid UTF-8:", utf8.validate("Hello 世界") ? "PASS" : "FAIL");
 // Test slicing
 var test5 = "Hello 世界!";
 log.console("\nSlice tests:");
 log.console("  Original:", test5);
 log.console("  slice(0, 5):", utf8.slice(test5, 0, 5));
 log.console("  slice(6, 8):", utf8.slice(test5, 6, 8));
 log.console("  slice(-3):", utf8.slice(test5, -3));
 log.console("  slice(0, -1):", utf8.slice(test5, 0, -1));
 // Test char_at
 log.console("\nchar_at tests:");
 log.console("  char_at(0):", utf8.char_at(test5, 0));
 log.console("  char_at(6):", utf8.char_at(test5, 6));
 log.console("  char_at(7):", utf8.char_at(test5, 7));
 log.console("  char_at(100):", utf8.char_at(test5, 100));
 // Test with emoji sequences
 var test6 = "👨‍👩‍👧‍👦";
 log.console("\nComplex emoji test:");
 log.console("  String:", test6);
 log.console("  Length:", utf8.length(test6));
 log.console("  Byte length:", utf8.byte_length(test6));
 log.console("  Codepoints:", utf8.codepoints(test6).length);
 log.console("\nAll tests completed!");
 $_.stop()