#include "cell.h" #include "jsffi.h" #include #include #include "kim.h" // Get codepoints from a UTF-8 string JSC_CCALL(utf8_codepoints, const char *str = JS_ToCString(js, argv[0]); if (!str) return JS_EXCEPTION; JSValue arr = JS_NewArray(js); int idx = 0; char *ptr = (char*)str; while (*ptr) { int codepoint = decode_utf8(&ptr); JS_SetPropertyUint32(js, arr, idx++, JS_NewInt32(js, codepoint)); } JS_FreeCString(js, str); ret = arr; ) // Create UTF-8 string from codepoints JSC_CCALL(utf8_from_codepoints, int len = JS_ArrayLength(js, argv[0]); // Allocate buffer (worst case: 4 bytes per codepoint + null) char *buffer = malloc(len * 4 + 1); char *ptr = buffer; for (int i = 0; i < len; i++) { JSValue val = JS_GetPropertyUint32(js, argv[0], i); int codepoint; JS_ToInt32(js, &codepoint, val); JS_FreeValue(js, val); encode_utf8(&ptr, codepoint); } *ptr = '\0'; ret = JS_NewString(js, buffer); free(buffer); ) // Count UTF-8 characters (runes) in a string JSC_SCALL(utf8_length, int count = utf8_count(str); ret = JS_NewInt32(js, count); ) // Validate UTF-8 string JSC_SCALL(utf8_validate, char *ptr = (char*)str; int valid = 1; while (*ptr) { int start_pos = ptr - str; int codepoint = decode_utf8(&ptr); // Check for invalid sequences if (codepoint < 0 || codepoint > 0x10FFFF || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) { valid = 0; break; } // Check for overlong encodings int bytes_used = ptr - (str + start_pos); if ((codepoint <= 0x7F && bytes_used != 1) || (codepoint <= 0x7FF && bytes_used != 2) || (codepoint <= 0xFFFF && bytes_used != 3) || (codepoint <= 0x10FFFF && bytes_used != 4)) { valid = 0; break; } } ret = JS_NewBool(js, valid); ) // Get byte length of UTF-8 string JSC_SCALL(utf8_byte_length, ret = JS_NewInt32(js, strlen(str)); ) // Encode string to UTF-8 bytes JSC_SCALL(utf8_encode, size_t len = strlen(str); ret = js_new_blob_stoned_copy(js, str, len); ) // Decode UTF-8 bytes to string JSC_CCALL(utf8_decode, size_t len; void *data = js_get_blob_data(js, &len, argv[0]); if (!data) return JS_ThrowTypeError(js, "Expected blob"); // Create null-terminated string char *str = malloc(len + 1); memcpy(str, data, len); str[len] = '\0'; ret = JS_NewString(js, str); free(str); ) // Slice UTF-8 string by character indices (not byte indices) JSC_CCALL(utf8_slice, const char *str = JS_ToCString(js, argv[0]); if (!str) return JS_EXCEPTION; int start = 0; int end = utf8_count(str); if (argc > 1) JS_ToInt32(js, &start, argv[1]); if (argc > 2) JS_ToInt32(js, &end, argv[2]); // Handle negative indices int total = end; if (start < 0) start = total + start; if (end < 0) end = total + end; // Clamp values if (start < 0) start = 0; if (end > total) end = total; if (start >= end) { JS_FreeCString(js, str); return JS_NewString(js, ""); } // Find start position char *ptr = (char*)str; for (int i = 0; i < start && *ptr; i++) { decode_utf8(&ptr); } char *start_ptr = ptr; // Find end position for (int i = start; i < end && *ptr; i++) { decode_utf8(&ptr); } // Create substring size_t slice_len = ptr - start_ptr; char *slice = malloc(slice_len + 1); memcpy(slice, start_ptr, slice_len); slice[slice_len] = '\0'; ret = JS_NewString(js, slice); free(slice); JS_FreeCString(js, str); ) // Get character at index JSC_CCALL(utf8_char_at, const char *str = JS_ToCString(js, argv[0]); if (!str) return JS_EXCEPTION; int index; JS_ToInt32(js, &index, argv[1]); char *ptr = (char*)str; int count = 0; // Skip to index while (*ptr && count < index) { decode_utf8(&ptr); count++; } if (!*ptr || count != index) { JS_FreeCString(js, str); return JS_NULL; } // Get the character char *char_start = ptr; decode_utf8(&ptr); size_t char_len = ptr - char_start; char *result = malloc(char_len + 1); memcpy(result, char_start, char_len); result[char_len] = '\0'; ret = JS_NewString(js, result); free(result); JS_FreeCString(js, str); ) static const JSCFunctionListEntry js_utf8_funcs[] = { MIST_FUNC_DEF(utf8, codepoints, 1), MIST_FUNC_DEF(utf8, from_codepoints, 1), MIST_FUNC_DEF(utf8, length, 1), MIST_FUNC_DEF(utf8, validate, 1), MIST_FUNC_DEF(utf8, byte_length, 1), MIST_FUNC_DEF(utf8, encode, 1), MIST_FUNC_DEF(utf8, decode, 1), MIST_FUNC_DEF(utf8, slice, 3), MIST_FUNC_DEF(utf8, char_at, 2), }; JSValue js_utf8_use(JSContext *js) { JSValue mod = JS_NewObject(js); JS_SetPropertyFunctionList(js, mod, js_utf8_funcs, countof(js_utf8_funcs)); return mod; }