add text function
This commit is contained in:
456
scripts/text.cm
Normal file
456
scripts/text.cm
Normal file
@@ -0,0 +1,456 @@
|
||||
/* text.cm - text conversion and formatting utilities */
|
||||
|
||||
/* -------- helper functions ----------------------------------------- */
|
||||
|
||||
var blob = use('blob')
|
||||
|
||||
// Convert number to string with given radix
|
||||
function to_radix(num, radix) {
|
||||
if (radix < 2 || radix > 36) return null;
|
||||
|
||||
var digits = "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
var result = "";
|
||||
var n = Math.trunc(num);
|
||||
var negative = n < 0;
|
||||
n = Math.abs(n);
|
||||
|
||||
if (n === 0) return "0";
|
||||
|
||||
while (n > 0) {
|
||||
result = digits[n % radix] + result;
|
||||
n = Math.floor(n / radix);
|
||||
}
|
||||
|
||||
return negative ? "-" + result : result;
|
||||
}
|
||||
|
||||
// Insert separator every n digits from right
|
||||
function add_separator(str, sep, n) {
|
||||
if (!n || n === 0) return str;
|
||||
|
||||
var negative = str[0] === '-';
|
||||
if (negative) str = str.substring(1);
|
||||
|
||||
var parts = str.split('.');
|
||||
var integer = parts[0];
|
||||
var decimal = parts[1] || '';
|
||||
|
||||
// Add separators to integer part
|
||||
var result = "";
|
||||
for (var i = integer.length - 1, count = 0; i >= 0; i--) {
|
||||
if (count === n && i !== integer.length - 1) {
|
||||
result = sep + result;
|
||||
count = 0;
|
||||
}
|
||||
result = integer[i] + result;
|
||||
count++;
|
||||
}
|
||||
|
||||
if (decimal) result += '.' + decimal;
|
||||
return negative ? '-' + result : result;
|
||||
}
|
||||
|
||||
// Format number with separator from left
|
||||
function add_separator_left(str, sep, n) {
|
||||
if (!n || n === 0) return str;
|
||||
|
||||
var negative = str[0] === '-';
|
||||
if (negative) str = str.substring(1);
|
||||
|
||||
var result = "";
|
||||
for (var i = 0, count = 0; i < str.length; i++) {
|
||||
if (count === n && i !== 0) {
|
||||
result += sep;
|
||||
count = 0;
|
||||
}
|
||||
result += str[i];
|
||||
count++;
|
||||
}
|
||||
|
||||
return negative ? '-' + result : result;
|
||||
}
|
||||
|
||||
/* -------- main text function --------------------------------------- */
|
||||
|
||||
function text() {
|
||||
var arg = arguments[0];
|
||||
|
||||
// Handle blob conversion
|
||||
if (arg instanceof blob) {
|
||||
if (!stone.p(arg))
|
||||
throw new Error("text: blob must be stone for reading");
|
||||
|
||||
var format = arguments[1];
|
||||
var bit_length = arg.length;
|
||||
var result = "";
|
||||
|
||||
if (typeof format === 'string') {
|
||||
// Extract style from format
|
||||
var style = '';
|
||||
for (var i = 0; i < format.length; i++) {
|
||||
if ((format[i] >= 'a' && format[i] <= 'z') || (format[i] >= 'A' && format[i] <= 'Z')) {
|
||||
style = format[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle blob encoding styles
|
||||
switch (style) {
|
||||
case 'h': // hexadecimal
|
||||
// Read 8 bits at a time for full bytes
|
||||
var hex_digits = "0123456789ABCDEF";
|
||||
for (var i = 0; i < bit_length; i += 8) {
|
||||
var byte_val = 0;
|
||||
for (var j = 0; j < 8 && i + j < bit_length; j++) {
|
||||
var bit = arg.read_logical(i + j);
|
||||
if (bit) byte_val |= (1 << j);
|
||||
}
|
||||
result += hex_digits[(byte_val >> 4) & 0xF];
|
||||
result += hex_digits[byte_val & 0xF];
|
||||
}
|
||||
return result;
|
||||
|
||||
case 't': // base32
|
||||
var b32_digits = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
|
||||
var bits = 0;
|
||||
var value = 0;
|
||||
|
||||
// Read bits from LSB to MSB within each byte
|
||||
for (var byte_idx = 0; byte_idx < Math.ceil(bit_length / 8); byte_idx++) {
|
||||
for (var bit_in_byte = 0; bit_in_byte < 8 && byte_idx * 8 + bit_in_byte < bit_length; bit_in_byte++) {
|
||||
var bit_pos = byte_idx * 8 + bit_in_byte;
|
||||
var bit = arg.read_logical(bit_pos);
|
||||
|
||||
// Accumulate bits from MSB to LSB for base32
|
||||
value = (value << 1) | (bit ? 1 : 0);
|
||||
bits++;
|
||||
|
||||
if (bits === 5) {
|
||||
result += b32_digits[value];
|
||||
bits = 0;
|
||||
value = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remaining bits
|
||||
if (bits > 0) {
|
||||
value = value << (5 - bits);
|
||||
result += b32_digits[value];
|
||||
}
|
||||
|
||||
// Add padding to make length multiple of 8
|
||||
while (result.length % 8 !== 0) {
|
||||
result += '=';
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
case 'b': // binary
|
||||
for (var i = 0; i < bit_length; i++) {
|
||||
result += arg.read_logical(i) ? '1' : '0';
|
||||
}
|
||||
return result;
|
||||
|
||||
case 'o': // octal
|
||||
var bits = 0;
|
||||
var value = 0;
|
||||
|
||||
for (var i = 0; i < bit_length; i++) {
|
||||
var bit = arg.read_logical(i);
|
||||
value = (value << 1) | (bit ? 1 : 0);
|
||||
bits++;
|
||||
|
||||
if (bits === 3) {
|
||||
result += value.toString();
|
||||
bits = 0;
|
||||
value = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remaining bits
|
||||
if (bits > 0) {
|
||||
value = value << (3 - bits);
|
||||
result += value.toString();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Default: convert to lowercase hex
|
||||
var hex_digits = "0123456789abcdef";
|
||||
for (var i = 0; i < bit_length; i += 8) {
|
||||
var byte_val = 0;
|
||||
for (var j = 0; j < 8 && i + j < bit_length; j++) {
|
||||
var bit = arg.read_logical(i + j);
|
||||
if (bit) byte_val |= (1 << j);
|
||||
}
|
||||
result += hex_digits[(byte_val >> 4) & 0xF];
|
||||
result += hex_digits[byte_val & 0xF];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Handle array conversion
|
||||
if (Array.isArray(arg)) {
|
||||
var separator = arguments[1] || "";
|
||||
var result = "";
|
||||
for (var i = 0; i < arg.length; i++) {
|
||||
if (i > 0) result += separator;
|
||||
|
||||
var item = arg[i];
|
||||
if (typeof item === 'number' && item >= 0 && item <= 0x10FFFF && item === Math.floor(item)) {
|
||||
// Unicode codepoint
|
||||
result += String.fromCharCode(item);
|
||||
} else {
|
||||
result += String(item);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Handle number conversion
|
||||
if (typeof arg === 'number') {
|
||||
var format = arguments[1];
|
||||
|
||||
// Simple radix conversion
|
||||
if (typeof format === 'number') {
|
||||
return to_radix(arg, format);
|
||||
}
|
||||
|
||||
// Format string conversion
|
||||
if (typeof format === 'string') {
|
||||
return format_number(arg, format);
|
||||
}
|
||||
|
||||
// Default conversion
|
||||
return String(arg);
|
||||
}
|
||||
|
||||
// Handle text operations
|
||||
if (typeof arg === 'string') {
|
||||
if (arguments.length === 1) return arg;
|
||||
|
||||
var from = arguments[1];
|
||||
var to = arguments[2];
|
||||
|
||||
if (typeof from !== 'number' || typeof to !== 'number') return arg;
|
||||
|
||||
var len = arg.length;
|
||||
|
||||
// Adjust negative indices
|
||||
if (from < 0) from += len;
|
||||
if (to < 0) to += len;
|
||||
|
||||
// Default values
|
||||
if (from === undefined) from = 0;
|
||||
if (to === undefined) to = len;
|
||||
|
||||
// Validate range
|
||||
if (from < 0 || from > to || to > len) return null;
|
||||
|
||||
return arg.substring(from, to);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/* -------- number formatting ---------------------------------------- */
|
||||
|
||||
function format_number(num, format) {
|
||||
// Parse format string
|
||||
var separation = 0;
|
||||
var style = '';
|
||||
var places = 0;
|
||||
|
||||
var i = 0;
|
||||
|
||||
// Parse separation digit
|
||||
if (i < format.length && format[i] >= '0' && format[i] <= '9') {
|
||||
separation = parseInt(format[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
// Parse style letter
|
||||
if (i < format.length) {
|
||||
style = format[i];
|
||||
i++;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Parse places digits
|
||||
if (i < format.length && format[i] >= '0' && format[i] <= '9') {
|
||||
places = parseInt(format[i]);
|
||||
i++;
|
||||
if (i < format.length && format[i] >= '0' && format[i] <= '9') {
|
||||
places = places * 10 + parseInt(format[i]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// Invalid format if there's more
|
||||
if (i < format.length) return null;
|
||||
|
||||
// Real number styles
|
||||
if (style === 'e' || style === 'n' || style === 's' ||
|
||||
style === 'u' || style === 'd' || style === 'v' || style === 'l') {
|
||||
|
||||
var decimal_point = '.';
|
||||
var separator = '';
|
||||
var default_separation = 0;
|
||||
var default_places = 0;
|
||||
|
||||
switch (style) {
|
||||
case 'e': // exponential
|
||||
decimal_point = '.';
|
||||
separator = '';
|
||||
default_separation = 0;
|
||||
default_places = 0;
|
||||
break;
|
||||
case 'n': // number
|
||||
decimal_point = '.';
|
||||
separator = '';
|
||||
default_separation = 0;
|
||||
default_places = 0;
|
||||
break;
|
||||
case 's': // space
|
||||
decimal_point = '.';
|
||||
separator = ' ';
|
||||
default_separation = 3;
|
||||
default_places = 0;
|
||||
break;
|
||||
case 'u': // underbar
|
||||
decimal_point = '.';
|
||||
separator = '_';
|
||||
default_separation = 0;
|
||||
default_places = 0;
|
||||
break;
|
||||
case 'd': // decimal
|
||||
decimal_point = '.';
|
||||
separator = ',';
|
||||
default_separation = 3;
|
||||
default_places = 2;
|
||||
break;
|
||||
case 'v': // comma (European style)
|
||||
decimal_point = ',';
|
||||
separator = '.';
|
||||
default_separation = 0;
|
||||
default_places = 0;
|
||||
break;
|
||||
case 'l': // locale (default to 'd' style for now)
|
||||
decimal_point = '.';
|
||||
separator = ',';
|
||||
default_separation = 3;
|
||||
default_places = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (separation === 0) separation = default_separation;
|
||||
if (places === 0 && style !== 'e' && style !== 'n') places = default_places;
|
||||
|
||||
// Format the number
|
||||
if (style === 'e') {
|
||||
// Scientific notation
|
||||
var str = places > 0 ? num.toExponential(places) : num.toExponential();
|
||||
return str;
|
||||
} else if (style === 'n' && (Math.abs(num) >= 1e21 || (Math.abs(num) < 1e-6 && num !== 0))) {
|
||||
// Use scientific notation for extreme values
|
||||
return num.toExponential();
|
||||
} else {
|
||||
// Regular decimal formatting
|
||||
var str;
|
||||
if (places > 0) {
|
||||
str = num.toFixed(places);
|
||||
} else {
|
||||
str = num.toString();
|
||||
}
|
||||
|
||||
// Replace decimal point if needed
|
||||
if (decimal_point !== '.') {
|
||||
str = str.replace('.', decimal_point);
|
||||
}
|
||||
|
||||
// Add separators
|
||||
if (separation > 0 && separator) {
|
||||
str = add_separator(str, separator, separation);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
// Integer styles
|
||||
if (style === 'i' || style === 'b' || style === 'o' ||
|
||||
style === 'h' || style === 't') {
|
||||
|
||||
var radix = 10;
|
||||
var default_separation = 0;
|
||||
var default_places = 1;
|
||||
|
||||
switch (style) {
|
||||
case 'i': // integer
|
||||
radix = 10;
|
||||
default_separation = 0;
|
||||
default_places = 1;
|
||||
break;
|
||||
case 'b': // binary
|
||||
radix = 2;
|
||||
default_separation = 0;
|
||||
default_places = 1;
|
||||
break;
|
||||
case 'o': // octal
|
||||
radix = 8;
|
||||
default_separation = 0;
|
||||
default_places = 1;
|
||||
break;
|
||||
case 'h': // hexadecimal
|
||||
radix = 16;
|
||||
default_separation = 0;
|
||||
default_places = 1;
|
||||
break;
|
||||
case 't': // base32
|
||||
radix = 32;
|
||||
default_separation = 0;
|
||||
default_places = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (separation === 0) separation = default_separation;
|
||||
if (places === 0) places = default_places;
|
||||
|
||||
// Convert to integer
|
||||
var n = Math.trunc(num);
|
||||
var str = to_radix(n, radix).toUpperCase();
|
||||
|
||||
// Pad with zeros if needed
|
||||
var negative = str[0] === '-';
|
||||
if (negative) str = str.substring(1);
|
||||
|
||||
while (str.length < places) {
|
||||
str = '0' + str;
|
||||
}
|
||||
|
||||
// Add separators
|
||||
if (separation > 0) {
|
||||
str = add_separator_left(str, '_', separation);
|
||||
}
|
||||
|
||||
return negative ? '-' + str : str;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/* -------- documentation -------------------------------------------- */
|
||||
|
||||
text[cell.DOC] = {
|
||||
doc: "Text conversion and formatting utilities",
|
||||
text: "text(value, ...) → formatted text string"
|
||||
};
|
||||
|
||||
/* -------- exports -------------------------------------------------- */
|
||||
|
||||
return text;
|
||||
185
tests/text.ce
Normal file
185
tests/text.ce
Normal file
@@ -0,0 +1,185 @@
|
||||
var text = use('text')
|
||||
|
||||
log.console("Testing text module...")
|
||||
log.console("")
|
||||
|
||||
// Test array to text conversion
|
||||
log.console("=== Testing array to text conversion ===")
|
||||
|
||||
// Basic array concatenation
|
||||
var arr1 = ["Hello", " ", "World"]
|
||||
var result1 = text(arr1)
|
||||
log.console("text(['Hello', ' ', 'World']) = '" + result1 + "'")
|
||||
log.console("Expected: 'Hello World'")
|
||||
log.console("Passed: " + (result1 === "Hello World"))
|
||||
log.console("")
|
||||
|
||||
// Array with separator
|
||||
var arr2 = ["one", "two", "three"]
|
||||
var result2 = text(arr2, ", ")
|
||||
log.console("text(['one', 'two', 'three'], ', ') = '" + result2 + "'")
|
||||
log.console("Expected: 'one, two, three'")
|
||||
log.console("Passed: " + (result2 === "one, two, three"))
|
||||
log.console("")
|
||||
|
||||
// Unicode codepoints
|
||||
var arr3 = [72, 101, 108, 108, 111]
|
||||
var result3 = text(arr3)
|
||||
log.console("text([72, 101, 108, 108, 111]) = '" + result3 + "'")
|
||||
log.console("Expected: 'Hello'")
|
||||
log.console("Passed: " + (result3 === "Hello"))
|
||||
log.console("")
|
||||
|
||||
// Mixed array with text and codepoints
|
||||
var arr4 = ["Hi", 32, "there", 33]
|
||||
var result4 = text(arr4)
|
||||
log.console("text(['Hi', 32, 'there', 33]) = '" + result4 + "'")
|
||||
log.console("Expected: 'Hi there!'")
|
||||
log.console("Passed: " + (result4 === "Hi there!"))
|
||||
log.console("")
|
||||
|
||||
// Test number to text conversion with radix
|
||||
log.console("=== Testing number to text with radix ===")
|
||||
|
||||
var tests_radix = [
|
||||
{num: 12, radix: 10, expected: "12"},
|
||||
{num: 12, radix: 8, expected: "14"},
|
||||
{num: 12, radix: 16, expected: "c"},
|
||||
{num: 12, radix: 2, expected: "1100"},
|
||||
{num: 12, radix: 32, expected: "c"},
|
||||
{num: 255, radix: 16, expected: "ff"},
|
||||
{num: -42, radix: 10, expected: "-42"},
|
||||
{num: 100, radix: 36, expected: "2s"}
|
||||
]
|
||||
|
||||
for (var i = 0; i < tests_radix.length; i++) {
|
||||
var test = tests_radix[i]
|
||||
var result = text(test.num, test.radix)
|
||||
log.console("text(" + test.num + ", " + test.radix + ") = '" + result + "'")
|
||||
log.console("Expected: '" + test.expected + "'")
|
||||
log.console("Passed: " + (result === test.expected))
|
||||
}
|
||||
log.console("")
|
||||
|
||||
// Test formatted number conversion
|
||||
log.console("=== Testing formatted number conversion ===")
|
||||
|
||||
var num = 123456789.1
|
||||
var format_tests = [
|
||||
{fmt: "n", expected: "123456789.1"},
|
||||
{fmt: "3s4", expected: "123 456 789.1000"},
|
||||
{fmt: "s", expected: "123 456 789.1"},
|
||||
{fmt: "d2", expected: "123,456,789.10"},
|
||||
{fmt: "4d0", expected: "1,2345,6789.1"},
|
||||
{fmt: "e", expected: "1.234567891e+8"},
|
||||
{fmt: "e4", expected: "1.2346e+8"},
|
||||
{fmt: "i", expected: "123456789"},
|
||||
{fmt: "8b", expected: "111_01011011_11001101_00010101"},
|
||||
{fmt: "o", expected: "726746425"},
|
||||
{fmt: "h", expected: "75BCD15"},
|
||||
{fmt: "t", expected: "3NQK8N"}
|
||||
]
|
||||
|
||||
for (var i = 0; i < format_tests.length; i++) {
|
||||
var test = format_tests[i]
|
||||
var result = text(num, test.fmt)
|
||||
log.console("text(" + num + ", '" + test.fmt + "') = '" + result + "'")
|
||||
log.console("Expected: '" + test.expected + "'")
|
||||
log.console("Passed: " + (result === test.expected))
|
||||
}
|
||||
log.console("")
|
||||
|
||||
// Test integer formatting
|
||||
log.console("=== Testing integer formatting ===")
|
||||
|
||||
var int_tests = [
|
||||
{num: 12, fmt: "4b8", expected: "0000_1100"},
|
||||
{num: 12, fmt: "o3", expected: "014"},
|
||||
{num: 12, fmt: "h4", expected: "000C"},
|
||||
{num: 12, fmt: "t2", expected: "0C"},
|
||||
{num: -15, fmt: "h", expected: "-F"},
|
||||
{num: 0, fmt: "b", expected: "0"}
|
||||
]
|
||||
|
||||
for (var i = 0; i < int_tests.length; i++) {
|
||||
var test = int_tests[i]
|
||||
var result = text(test.num, test.fmt)
|
||||
log.console("text(" + test.num + ", '" + test.fmt + "') = '" + result + "'")
|
||||
log.console("Expected: '" + test.expected + "'")
|
||||
log.console("Passed: " + (result === test.expected))
|
||||
}
|
||||
log.console("")
|
||||
|
||||
// Test text substring operations
|
||||
log.console("=== Testing text substring operations ===")
|
||||
|
||||
var str = "miskatonic"
|
||||
var substr_tests = [
|
||||
{from: 0, to: 3, expected: "mis"},
|
||||
{from: 3, to: 6, expected: "kat"},
|
||||
{from: 5, to: undefined, expected: "tonic"},
|
||||
{from: 0, to: -4, expected: "miskat"},
|
||||
{from: -3, to: undefined, expected: "nic"},
|
||||
{from: 0, to: 0, expected: ""},
|
||||
{from: 10, to: undefined, expected: ""},
|
||||
{from: 11, to: undefined, expected: null},
|
||||
{from: 2, to: 1, expected: null}
|
||||
]
|
||||
|
||||
for (var i = 0; i < substr_tests.length; i++) {
|
||||
var test = substr_tests[i]
|
||||
var result = test.to === undefined ? text(str, test.from) : text(str, test.from, test.to)
|
||||
var args = test.to === undefined ? test.from : test.from + ", " + test.to
|
||||
log.console("text('" + str + "', " + args + ") = " + (result === null ? "null" : "'" + result + "'"))
|
||||
log.console("Expected: " + (test.expected === null ? "null" : "'" + test.expected + "'"))
|
||||
log.console("Passed: " + (result === test.expected))
|
||||
}
|
||||
log.console("")
|
||||
|
||||
// Test edge cases
|
||||
log.console("=== Testing edge cases ===")
|
||||
|
||||
// Empty array
|
||||
var empty_result = text([])
|
||||
log.console("text([]) = '" + empty_result + "'")
|
||||
log.console("Passed: " + (empty_result === ""))
|
||||
|
||||
// Single element array
|
||||
var single_result = text([42])
|
||||
log.console("text([42]) = '" + single_result + "'")
|
||||
log.console("Passed: " + (single_result === "42"))
|
||||
|
||||
// Text identity
|
||||
var text_result = text("hello")
|
||||
log.console("text('hello') = '" + text_result + "'")
|
||||
log.console("Passed: " + (text_result === "hello"))
|
||||
|
||||
// Invalid format
|
||||
var invalid_result = text(123, "xyz")
|
||||
log.console("text(123, 'xyz') = " + invalid_result)
|
||||
log.console("Passed: " + (invalid_result === null))
|
||||
|
||||
// Very small numbers with 'n' format
|
||||
var tiny = 0.0000001
|
||||
var tiny_result = text(tiny, "n")
|
||||
log.console("text(0.0000001, 'n') = '" + tiny_result + "'")
|
||||
log.console("Should use scientific notation: " + (tiny_result.indexOf('e') > -1))
|
||||
|
||||
// Very large numbers with 'n' format
|
||||
var huge = 1e22
|
||||
var huge_result = text(huge, "n")
|
||||
log.console("text(1e22, 'n') = '" + huge_result + "'")
|
||||
log.console("Should use scientific notation: " + (huge_result.indexOf('e') > -1))
|
||||
|
||||
log.console("")
|
||||
|
||||
// Summary
|
||||
log.console("=== Test Summary ===")
|
||||
log.console("All major test categories completed.")
|
||||
log.console("The text module provides:")
|
||||
log.console("- Array to text conversion with Unicode support")
|
||||
log.console("- Number formatting with multiple radix options")
|
||||
log.console("- Advanced number formatting with real and integer styles")
|
||||
log.console("- Text substring operations with negative indexing")
|
||||
|
||||
$_.stop()
|
||||
Reference in New Issue
Block a user