602 lines
15 KiB
Plaintext
602 lines
15 KiB
Plaintext
/* text.cm - text conversion and formatting utilities */
|
|
var blob = use('blob')
|
|
var utf8 = use('utf8')
|
|
|
|
var _toLowerCase = String.prototype.toLowerCase
|
|
var _toUpperCase = String.prototype.toUpperCase
|
|
var _trim = String.prototype.trim
|
|
var _indexOf = String.prototype.indexOf
|
|
var _lastIndexOf = String.prototype.lastIndexOf
|
|
var _replace = String.prototype.replace
|
|
var _normalize = String.prototype.normalize
|
|
var _substring = String.prototype.substring
|
|
var _charCodeAt = String.prototype.charCodeAt
|
|
var _codePointAt = String.prototype.codePointAt
|
|
|
|
var _String = String
|
|
|
|
var that = this
|
|
|
|
// Convert number to string with given radix
|
|
function to_radix(num, radix) {
|
|
if (radix < 2 || radix > 36) return null;
|
|
|
|
var digits = "0123456789abcdefghijklmnopqrstuvwxyz";
|
|
var result = "";
|
|
var n = number.whole(num);
|
|
var negative = n < 0;
|
|
n = number.abs(n);
|
|
|
|
if (n == 0) return "0";
|
|
|
|
while (n > 0) {
|
|
result = digits[n % radix] + result;
|
|
n = number.floor(n / radix);
|
|
}
|
|
|
|
return negative ? "-" + result : result;
|
|
}
|
|
|
|
// Insert separator every n digits from right
|
|
function add_separator(str, sep, n) {
|
|
if (!n || n == 0) return str;
|
|
|
|
var negative = str[0] == '-';
|
|
if (negative) str = str.substring(1);
|
|
|
|
var parts = str.split('.');
|
|
var integer = parts[0];
|
|
var decimal = parts[1] || '';
|
|
|
|
// Add separators to integer part
|
|
var result = "";
|
|
for (var i = integer.length - 1, count = 0; i >= 0; i--) {
|
|
if (count == n && i != integer.length - 1) {
|
|
result = sep + result;
|
|
count = 0;
|
|
}
|
|
result = integer[i] + result;
|
|
count++;
|
|
}
|
|
|
|
if (decimal) result += '.' + decimal;
|
|
return negative ? '-' + result : result;
|
|
}
|
|
|
|
// Format number with separator from left
|
|
function add_separator_left(str, sep, n) {
|
|
if (!n || n == 0) return str;
|
|
|
|
var negative = str[0] == '-';
|
|
if (negative) str = str.substring(1);
|
|
|
|
var result = "";
|
|
for (var i = 0, count = 0; i < str.length; i++) {
|
|
if (count == n && i != 0) {
|
|
result += sep;
|
|
count = 0;
|
|
}
|
|
result += str[i];
|
|
count++;
|
|
}
|
|
|
|
return negative ? '-' + result : result;
|
|
}
|
|
|
|
/* -------- main text function --------------------------------------- */
|
|
|
|
function text(...arguments) {
|
|
var arg = arguments[0];
|
|
|
|
// Handle blob conversion
|
|
if (arg instanceof blob) {
|
|
if (!stone.p(arg))
|
|
throw new Error("text: blob must be stone for reading");
|
|
|
|
var format = arguments[1];
|
|
var bit_length = arg.length;
|
|
var result = "";
|
|
|
|
if (typeof format == 'string') {
|
|
// Extract style from format
|
|
var style = '';
|
|
for (var i = 0; i < format.length; i++) {
|
|
if ((format[i] >= 'a' && format[i] <= 'z') || (format[i] >= 'A' && format[i] <= 'Z')) {
|
|
style = format[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Handle blob encoding styles
|
|
switch (style) {
|
|
case 'h': // hexadecimal
|
|
return that.blob_to_hex(arg);
|
|
|
|
case 't': // base32
|
|
return that.blob_to_base32(arg);
|
|
|
|
case 'b': // binary
|
|
for (var i = 0; i < bit_length; i++) {
|
|
result += arg.read_logical(i) ? '1' : '0';
|
|
}
|
|
return result;
|
|
|
|
case 'o': // octal
|
|
var bits = 0;
|
|
var value = 0;
|
|
|
|
for (var i = 0; i < bit_length; i++) {
|
|
var bit = arg.read_logical(i);
|
|
value = (value << 1) | (bit ? 1 : 0);
|
|
bits++;
|
|
|
|
if (bits == 3) {
|
|
result += value.toString();
|
|
bits = 0;
|
|
value = 0;
|
|
}
|
|
}
|
|
|
|
// Handle remaining bits
|
|
if (bits > 0) {
|
|
value = value << (3 - bits);
|
|
result += value.toString();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// Default: interpret as UTF-8 text
|
|
// Use the utf8 module to decode the blob
|
|
if (arg.length == 0) return ""
|
|
return utf8.decode(arg);
|
|
}
|
|
|
|
// Handle array conversion
|
|
if (isa(arg, array)) {
|
|
var separator = arguments[1] || "";
|
|
|
|
// Check if all items are valid codepoints
|
|
var all_codepoints = true;
|
|
for (var i = 0; i < arg.length; i++) {
|
|
var item = arg[i];
|
|
if (!(typeof item == 'number' && item >= 0 && item <= 0x10FFFF && item == number.floor(item))) {
|
|
all_codepoints = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (all_codepoints && separator == "") {
|
|
// Use utf8 module to convert codepoints to string
|
|
return utf8.from_codepoints(arg);
|
|
} else {
|
|
// General array to string conversion
|
|
var result = "";
|
|
for (var i = 0; i < arg.length; i++) {
|
|
if (i > 0) result += separator;
|
|
|
|
var item = arg[i];
|
|
if (typeof item == 'number' && item >= 0 && item <= 0x10FFFF && item == number.floor(item)) {
|
|
// Single codepoint - use utf8 module
|
|
result += utf8.from_codepoints([item]);
|
|
} else {
|
|
result += String(item);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// Handle number conversion
|
|
if (typeof arg == 'number') {
|
|
var format = arguments[1];
|
|
|
|
// Simple radix conversion
|
|
if (typeof format == 'number') {
|
|
return to_radix(arg, format);
|
|
}
|
|
|
|
// Format string conversion
|
|
if (typeof format == 'string') {
|
|
return format_number(arg, format);
|
|
}
|
|
|
|
// Default conversion
|
|
return _String(arg);
|
|
}
|
|
|
|
// Handle text operations
|
|
if (typeof arg == 'string') {
|
|
if (arguments.length == 1) return arg;
|
|
|
|
var from = arguments[1];
|
|
var to = arguments[2];
|
|
|
|
if (typeof from != 'number' || typeof to != 'number') return arg;
|
|
|
|
var len = arg.length;
|
|
|
|
// Adjust negative indices
|
|
if (from < 0) from += len;
|
|
if (to < 0) to += len;
|
|
|
|
// Default values
|
|
if (from == null) from = 0;
|
|
if (to == null) to = len;
|
|
|
|
// Validate range
|
|
if (from < 0 || from > to || to > len) return null;
|
|
|
|
return arg.substring(from, to);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/* -------- number formatting ---------------------------------------- */
|
|
|
|
function format_number(num, format) {
|
|
// Parse format string
|
|
var separation = 0;
|
|
var style = '';
|
|
var places = 0;
|
|
|
|
var i = 0;
|
|
|
|
// Parse separation digit
|
|
if (i < format.length && format[i] >= '0' && format[i] <= '9') {
|
|
separation = number(format[i]);
|
|
i++;
|
|
}
|
|
|
|
// Parse style letter
|
|
if (i < format.length) {
|
|
style = format[i];
|
|
i++;
|
|
} else {
|
|
return null;
|
|
}
|
|
|
|
// Parse places digits
|
|
if (i < format.length && format[i] >= '0' && format[i] <= '9') {
|
|
places = number(format[i]);
|
|
i++;
|
|
if (i < format.length && format[i] >= '0' && format[i] <= '9') {
|
|
places = places * 10 + number(format[i]);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
// Invalid format if there's more
|
|
if (i < format.length) return null;
|
|
|
|
// Real number styles
|
|
if (style == 'e' || style == 'n' || style == 's' ||
|
|
style == 'u' || style == 'd' || style == 'v' || style == 'l') {
|
|
|
|
var decimal_point = '.';
|
|
var separator = '';
|
|
var default_separation = 0;
|
|
var default_places = 0;
|
|
|
|
switch (style) {
|
|
case 'e': // exponential
|
|
decimal_point = '.';
|
|
separator = '';
|
|
default_separation = 0;
|
|
default_places = 0;
|
|
break;
|
|
case 'n': // number
|
|
decimal_point = '.';
|
|
separator = '';
|
|
default_separation = 0;
|
|
default_places = 0;
|
|
break;
|
|
case 's': // space
|
|
decimal_point = '.';
|
|
separator = ' ';
|
|
default_separation = 3;
|
|
default_places = 0;
|
|
break;
|
|
case 'u': // underbar
|
|
decimal_point = '.';
|
|
separator = '_';
|
|
default_separation = 0;
|
|
default_places = 0;
|
|
break;
|
|
case 'd': // decimal
|
|
decimal_point = '.';
|
|
separator = ',';
|
|
default_separation = 3;
|
|
default_places = 2;
|
|
break;
|
|
case 'v': // comma (European style)
|
|
decimal_point = ',';
|
|
separator = '.';
|
|
default_separation = 0;
|
|
default_places = 0;
|
|
break;
|
|
case 'l': // locale (default to 'd' style for now)
|
|
decimal_point = '.';
|
|
separator = ',';
|
|
default_separation = 3;
|
|
default_places = 2;
|
|
break;
|
|
}
|
|
|
|
if (separation == 0) separation = default_separation;
|
|
if (places == 0 && style != 'e' && style != 'n') places = default_places;
|
|
|
|
// Format the number
|
|
if (style == 'e') {
|
|
// Scientific notation
|
|
var str = places > 0 ? num.toExponential(places) : num.toExponential();
|
|
return str;
|
|
} else if (style == 'n' && (number.abs(num) >= 1e21 || (number.abs(num) < 1e-6 && num != 0))) {
|
|
// Use scientific notation for extreme values
|
|
return num.toExponential();
|
|
} else {
|
|
// Regular decimal formatting
|
|
var str;
|
|
if (places > 0) {
|
|
str = num.toFixed(places);
|
|
} else {
|
|
str = num.toString();
|
|
}
|
|
|
|
// Replace decimal point if needed
|
|
if (decimal_point != '.') {
|
|
str = str.replace('.', decimal_point);
|
|
}
|
|
|
|
// Add separators
|
|
if (separation > 0 && separator) {
|
|
str = add_separator(str, separator, separation);
|
|
}
|
|
|
|
return str;
|
|
}
|
|
}
|
|
|
|
// Integer styles
|
|
if (style == 'i' || style == 'b' || style == 'o' ||
|
|
style == 'h' || style == 't') {
|
|
|
|
var radix = 10;
|
|
var default_separation = 0;
|
|
var default_places = 1;
|
|
|
|
switch (style) {
|
|
case 'i': // integer
|
|
radix = 10;
|
|
default_separation = 0;
|
|
default_places = 1;
|
|
break;
|
|
case 'b': // binary
|
|
radix = 2;
|
|
default_separation = 0;
|
|
default_places = 1;
|
|
break;
|
|
case 'o': // octal
|
|
radix = 8;
|
|
default_separation = 0;
|
|
default_places = 1;
|
|
break;
|
|
case 'h': // hexadecimal
|
|
radix = 16;
|
|
default_separation = 0;
|
|
default_places = 1;
|
|
break;
|
|
case 't': // base32
|
|
radix = 32;
|
|
default_separation = 0;
|
|
default_places = 1;
|
|
break;
|
|
}
|
|
|
|
if (separation == 0) separation = default_separation;
|
|
if (places == 0) places = default_places;
|
|
|
|
// Convert to integer
|
|
var n = number.whole(num);
|
|
var str = to_radix(n, radix).toUpperCase();
|
|
|
|
// Pad with zeros if needed
|
|
var negative = str[0] == '-';
|
|
if (negative) str = str.substring(1);
|
|
|
|
while (str.length < places) {
|
|
str = '0' + str;
|
|
}
|
|
|
|
// Add separators
|
|
if (separation > 0) {
|
|
str = add_separator_left(str, '_', separation);
|
|
}
|
|
|
|
return negative ? '-' + str : str;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/* -------- text sub-functions --------------------------------------- */
|
|
|
|
text.lower = function(str) {
|
|
if (typeof str != 'string') return null
|
|
return _toLowerCase.call(str)
|
|
}
|
|
|
|
text.upper = function(str) {
|
|
if (typeof str != 'string') return null
|
|
return _toUpperCase.call(str)
|
|
}
|
|
|
|
text.trim = function(str, reject) {
|
|
if (typeof str != 'string') return null
|
|
if (reject == null) return _trim.call(str)
|
|
|
|
// Custom trim with reject characters
|
|
var start = 0
|
|
var end = str.length
|
|
|
|
while (start < end && reject.indexOf(str[start]) >= 0) start++
|
|
while (end > start && reject.indexOf(str[end - 1]) >= 0) end--
|
|
|
|
return _substring.call(str, start, end)
|
|
}
|
|
|
|
text.normalize = function(str) {
|
|
if (typeof str != 'string') return null
|
|
return _normalize.call(str, 'NFC')
|
|
}
|
|
|
|
text.codepoint = function(str) {
|
|
if (typeof str != 'string' || str.length == 0) return null
|
|
return _codePointAt.call(str, 0)
|
|
}
|
|
|
|
text.search = function(str, target, from) {
|
|
if (typeof str != 'string') return null
|
|
if (typeof target != 'string') return null
|
|
|
|
if (from == null) from = 0
|
|
if (from < 0) from += str.length
|
|
if (from < 0) from = 0
|
|
|
|
var result = _indexOf.call(str, target, from)
|
|
if (result == -1) return null
|
|
return result
|
|
}
|
|
|
|
text.replace = function(str, target, replacement, limit) {
|
|
if (typeof str != 'string') return null
|
|
if (typeof target != 'string') return null
|
|
|
|
if (limit == null) {
|
|
// Replace all
|
|
var result = str
|
|
var pos = 0
|
|
while (true) {
|
|
var idx = _indexOf.call(result, target, pos)
|
|
if (idx == -1) break
|
|
|
|
var rep = replacement
|
|
if (typeof replacement == 'function') {
|
|
rep = replacement(target, idx)
|
|
if (rep == null) {
|
|
pos = idx + target.length
|
|
continue
|
|
}
|
|
}
|
|
|
|
result = _substring.call(result, 0, idx) + rep + _substring.call(result, idx + target.length)
|
|
pos = idx + rep.length
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Replace with limit
|
|
var result = str
|
|
var pos = 0
|
|
var count = 0
|
|
|
|
while (count < limit) {
|
|
var idx = _indexOf.call(result, target, pos)
|
|
if (idx == -1) break
|
|
|
|
var rep = replacement
|
|
if (typeof replacement == 'function') {
|
|
rep = replacement(target, idx)
|
|
if (rep == null) {
|
|
pos = idx + target.length
|
|
count++
|
|
continue
|
|
}
|
|
}
|
|
|
|
result = _substring.call(result, 0, idx) + rep + _substring.call(result, idx + target.length)
|
|
pos = idx + rep.length
|
|
count++
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
text.format = function(str, collection, transformer) {
|
|
if (typeof str != 'string') return null
|
|
|
|
var result = ""
|
|
var i = 0
|
|
|
|
while (i < str.length) {
|
|
if (str[i] == '{') {
|
|
var end = _indexOf.call(str, '}', i)
|
|
if (end == -1) {
|
|
result += str[i]
|
|
i++
|
|
continue
|
|
}
|
|
|
|
var middle = _substring.call(str, i + 1, end)
|
|
var colonIdx = _indexOf.call(middle, ':')
|
|
var key = colonIdx >= 0 ? _substring.call(middle, 0, colonIdx) : middle
|
|
var formatSpec = colonIdx >= 0 ? _substring.call(middle, colonIdx + 1) : ""
|
|
|
|
var value = null
|
|
if (isa(collection, array)) {
|
|
var idx = number(key)
|
|
if (!isNaN(idx) && idx >= 0 && idx < collection.length) {
|
|
value = collection[idx]
|
|
}
|
|
} else if (isa(collection, object)) {
|
|
value = collection[key]
|
|
}
|
|
|
|
var substitution = null
|
|
|
|
if (transformer != null) {
|
|
if (typeof transformer == 'function') {
|
|
substitution = transformer(value, formatSpec)
|
|
} else if (typeof transformer == 'object') {
|
|
var fn = transformer[formatSpec]
|
|
if (typeof fn == 'function') {
|
|
substitution = fn(value)
|
|
}
|
|
}
|
|
}
|
|
|
|
if (substitution == null && typeof value == 'number' && formatSpec) {
|
|
// Try number formatting
|
|
substitution = String(value) // simplified
|
|
}
|
|
|
|
if (substitution == null && value != null) {
|
|
substitution = String(value)
|
|
}
|
|
|
|
if (substitution != null) {
|
|
result += substitution
|
|
} else {
|
|
result += _substring.call(str, i, end + 1)
|
|
}
|
|
|
|
i = end + 1
|
|
} else {
|
|
result += str[i]
|
|
i++
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
text.extract = function(str, pattern, from, to) {
|
|
// Simplified pattern matching - returns null for now
|
|
// Full implementation would require regex or custom pattern language
|
|
if (typeof str != 'string') return null
|
|
return null
|
|
}
|
|
|
|
return text |