text extract
This commit is contained in:
@@ -124,10 +124,10 @@ function caller_data(depth = 0)
|
||||
|
||||
var caller = array(Error().stack, "\n")[1+depth]
|
||||
if (caller) {
|
||||
var md = caller.match(/\((.*)\:/)
|
||||
var md = extract(caller, /\((.*)\:/)
|
||||
var m = md ? md[1] : "SCRIPT"
|
||||
if (m) file = m
|
||||
md = caller.match(/\:(\d*)\)/)
|
||||
md = extract(caller, /\:(\d*)\)/)
|
||||
m = md ? md[1] : 0
|
||||
if (m) line = m
|
||||
}
|
||||
|
||||
734
source/quickjs.c
734
source/quickjs.c
@@ -30399,56 +30399,6 @@ static int check_regexp_g_flag(JSContext *ctx, JSValueConst regexp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static JSValue js_string_match(JSContext *ctx, JSValueConst this_val,
|
||||
int argc, JSValueConst *argv, int atom)
|
||||
{
|
||||
// match(rx), search(rx), matchAll(rx)
|
||||
// atom is JS_ATOM_Symbol_match, JS_ATOM_Symbol_search, or JS_ATOM_Symbol_matchAll
|
||||
JSValueConst O = this_val, regexp = argv[0], args[2];
|
||||
JSValue matcher, S, rx, result, str;
|
||||
int args_len;
|
||||
|
||||
if (JS_IsNull(O) || JS_IsNull(O))
|
||||
return JS_ThrowTypeError(ctx, "cannot convert to object");
|
||||
|
||||
if (!JS_IsNull(regexp) && !JS_IsNull(regexp)) {
|
||||
matcher = JS_GetProperty(ctx, regexp, atom);
|
||||
if (JS_IsException(matcher))
|
||||
return JS_EXCEPTION;
|
||||
if (atom == JS_ATOM_Symbol_matchAll) {
|
||||
if (check_regexp_g_flag(ctx, regexp) < 0) {
|
||||
JS_FreeValue(ctx, matcher);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
}
|
||||
if (!JS_IsNull(matcher) && !JS_IsNull(matcher)) {
|
||||
return JS_CallFree(ctx, matcher, regexp, 1, &O);
|
||||
}
|
||||
}
|
||||
S = JS_ToString(ctx, O);
|
||||
if (JS_IsException(S))
|
||||
return JS_EXCEPTION;
|
||||
args_len = 1;
|
||||
args[0] = regexp;
|
||||
str = JS_NULL;
|
||||
if (atom == JS_ATOM_Symbol_matchAll) {
|
||||
str = js_new_string8(ctx, "g");
|
||||
if (JS_IsException(str))
|
||||
goto fail;
|
||||
args[args_len++] = (JSValueConst)str;
|
||||
}
|
||||
rx = JS_CallConstructor(ctx, ctx->regexp_ctor, args_len, args);
|
||||
JS_FreeValue(ctx, str);
|
||||
if (JS_IsException(rx)) {
|
||||
fail:
|
||||
JS_FreeValue(ctx, S);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
result = JS_InvokeFree(ctx, rx, atom, 1, (JSValueConst *)&S);
|
||||
JS_FreeValue(ctx, S);
|
||||
return result;
|
||||
}
|
||||
|
||||
static JSValue js_string___GetSubstitution(JSContext *ctx, JSValueConst this_val,
|
||||
int argc, JSValueConst *argv)
|
||||
{
|
||||
@@ -30738,7 +30688,6 @@ static const JSCFunctionListEntry js_string_proto_funcs[] = {
|
||||
JS_CFUNC_DEF("concat", 1, js_string_concat),
|
||||
JS_CFUNC_MAGIC_DEF("indexOf", 1, js_string_indexOf, 0 ),
|
||||
JS_CFUNC_MAGIC_DEF("lastIndexOf", 1, js_string_indexOf, 1 ),
|
||||
JS_CFUNC_MAGIC_DEF("match", 1, js_string_match, JS_ATOM_Symbol_match ),
|
||||
JS_CFUNC_MAGIC_DEF("replace", 2, js_string_replace, 0 ),
|
||||
JS_CFUNC_MAGIC_DEF("replaceAll", 2, js_string_replace, 1 ),
|
||||
JS_CFUNC_DEF("toString", 0, js_string_toString ),
|
||||
@@ -34345,60 +34294,6 @@ static JSValue js_cell_text_codepoint(JSContext *ctx, JSValueConst this_val,
|
||||
return JS_NewInt32(ctx, c);
|
||||
}
|
||||
|
||||
/* text.search(str, target, from) - find substring */
|
||||
static JSValue js_cell_text_search(JSContext *ctx, JSValueConst this_val,
|
||||
int argc, JSValueConst *argv)
|
||||
{
|
||||
if (argc < 2) return JS_NULL;
|
||||
|
||||
int tag1 = JS_VALUE_GET_TAG(argv[0]);
|
||||
int tag2 = JS_VALUE_GET_TAG(argv[1]);
|
||||
if ((tag1 != JS_TAG_STRING && tag1 != JS_TAG_STRING_ROPE) ||
|
||||
(tag2 != JS_TAG_STRING && tag2 != JS_TAG_STRING_ROPE))
|
||||
return JS_NULL;
|
||||
|
||||
JSValue str = JS_ToString(ctx, argv[0]);
|
||||
if (JS_IsException(str)) return str;
|
||||
|
||||
JSValue target = JS_ToString(ctx, argv[1]);
|
||||
if (JS_IsException(target)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return target;
|
||||
}
|
||||
|
||||
JSString *p = JS_VALUE_GET_STRING(str);
|
||||
JSString *t = JS_VALUE_GET_STRING(target);
|
||||
|
||||
int from = 0;
|
||||
if (argc > 2 && !JS_IsNull(argv[2])) {
|
||||
if (JS_ToInt32(ctx, &from, argv[2])) {
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return JS_NULL;
|
||||
}
|
||||
if (from < 0) from += p->len;
|
||||
if (from < 0) from = 0;
|
||||
}
|
||||
|
||||
int result = -1;
|
||||
int len = p->len;
|
||||
int t_len = t->len;
|
||||
|
||||
if (len >= t_len) {
|
||||
for (int i = from; i <= len - t_len; i++) {
|
||||
if (!string_cmp(p, t, i, 0, t_len)) {
|
||||
result = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
|
||||
if (result == -1) return JS_NULL;
|
||||
return JS_NewInt32(ctx, result);
|
||||
}
|
||||
/* Helpers (C, not C++). Put these above js_cell_text_replace in the same C file. */
|
||||
|
||||
static int sb_concat_value_to_string_free(JSContext *ctx, StringBuffer *b, JSValue v)
|
||||
@@ -34438,6 +34333,18 @@ static JSValue make_replacement(JSContext *ctx, int argc, JSValueConst *argv, in
|
||||
return JS_AtomToString(ctx, JS_ATOM_empty_string);
|
||||
}
|
||||
|
||||
static int JS_IsRegExp(JSContext *ctx, JSValueConst v)
|
||||
{
|
||||
if (!JS_IsObject(v)) return 0;
|
||||
|
||||
JSValue exec = JS_GetPropertyStr(ctx, v, "exec");
|
||||
if (JS_IsException(exec)) return -1;
|
||||
|
||||
int ok = JS_IsFunction(ctx, exec);
|
||||
JS_FreeValue(ctx, exec);
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* text.replace(text, target, replacement, limit)
|
||||
*
|
||||
* Return a new text in which the target is replaced by the replacement.
|
||||
@@ -34451,145 +34358,502 @@ static JSValue make_replacement(JSContext *ctx, int argc, JSValueConst *argv, in
|
||||
* Example: replace("abc", "", "-") => "-a-b-c-"
|
||||
* Boundaries count toward limit even if replacement returns null.
|
||||
*/
|
||||
static JSValue js_cell_text_replace(JSContext *ctx, JSValueConst this_val,
|
||||
int argc, JSValueConst *argv)
|
||||
|
||||
static JSValue js_cell_text_replace(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv)
|
||||
{
|
||||
if (argc < 2) return JS_NULL;
|
||||
|
||||
/* Require text + target be strings (or ropes) */
|
||||
int tag_text = JS_VALUE_GET_TAG(argv[0]);
|
||||
if (tag_text != JS_TAG_STRING && tag_text != JS_TAG_STRING_ROPE) return JS_NULL;
|
||||
|
||||
int target_is_regex = 0;
|
||||
{
|
||||
int tag_text = JS_VALUE_GET_TAG(argv[0]);
|
||||
int tag_tgt = JS_VALUE_GET_TAG(argv[1]);
|
||||
if ((tag_text != JS_TAG_STRING && tag_text != JS_TAG_STRING_ROPE) ||
|
||||
(tag_tgt != JS_TAG_STRING && tag_tgt != JS_TAG_STRING_ROPE))
|
||||
if (tag_tgt == JS_TAG_STRING || tag_tgt == JS_TAG_STRING_ROPE) {
|
||||
target_is_regex = 0;
|
||||
} else if (JS_IsObject(argv[1]) && JS_IsRegExp(ctx, argv[1])) {
|
||||
target_is_regex = 1;
|
||||
} else {
|
||||
return JS_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
JSValue str = JS_ToString(ctx, argv[0]);
|
||||
if (JS_IsException(str)) return str;
|
||||
|
||||
JSValue target = JS_ToString(ctx, argv[1]);
|
||||
if (JS_IsException(target)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return target;
|
||||
}
|
||||
|
||||
JSString *sp = JS_VALUE_GET_STRING(str);
|
||||
JSString *tp = JS_VALUE_GET_STRING(target);
|
||||
int len = (int)sp->len;
|
||||
|
||||
int32_t limit = -1; /* -1 means unlimited */
|
||||
int32_t limit = -1;
|
||||
if (argc > 3 && !JS_IsNull(argv[3])) {
|
||||
if (JS_ToInt32(ctx, &limit, argv[3])) {
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return JS_NULL;
|
||||
}
|
||||
if (limit < 0) limit = -1;
|
||||
}
|
||||
|
||||
int len = (int)sp->len;
|
||||
int t_len = (int)tp->len;
|
||||
|
||||
StringBuffer b_s, *b = &b_s;
|
||||
string_buffer_init(ctx, b, len);
|
||||
|
||||
/* Empty target: boundary replacements */
|
||||
if (t_len == 0) {
|
||||
if (!target_is_regex) {
|
||||
JSValue target = JS_ToString(ctx, argv[1]);
|
||||
if (JS_IsException(target)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return target;
|
||||
}
|
||||
|
||||
JSString *tp = JS_VALUE_GET_STRING(target);
|
||||
int t_len = (int)tp->len;
|
||||
|
||||
if (t_len == 0) {
|
||||
int32_t count = 0;
|
||||
|
||||
for (int boundary = 0; boundary <= len; boundary++) {
|
||||
if (limit >= 0 && count >= limit) break;
|
||||
|
||||
JSValue match = JS_AtomToString(ctx, JS_ATOM_empty_string);
|
||||
if (JS_IsException(match)) goto fail_str_target;
|
||||
|
||||
JSValue rep = make_replacement(ctx, argc, argv, boundary, match);
|
||||
if (JS_IsException(rep)) goto fail_str_target;
|
||||
|
||||
count++;
|
||||
|
||||
if (!JS_IsNull(rep)) {
|
||||
if (sb_concat_value_to_string_free(ctx, b, rep) < 0) goto fail_str_target;
|
||||
} else {
|
||||
JS_FreeValue(ctx, rep);
|
||||
}
|
||||
|
||||
if (boundary < len) {
|
||||
JSValue ch = js_sub_string(ctx, sp, boundary, boundary + 1);
|
||||
if (JS_IsException(ch)) goto fail_str_target;
|
||||
if (string_buffer_concat_value_free(b, ch)) goto fail_str_target;
|
||||
}
|
||||
}
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return string_buffer_end(b);
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
int32_t count = 0;
|
||||
|
||||
for (int boundary = 0; boundary <= len; boundary++) {
|
||||
if (limit >= 0 && count >= limit) break;
|
||||
while (pos <= len - t_len && (limit < 0 || count < limit)) {
|
||||
int found = -1;
|
||||
|
||||
/* match text is "" */
|
||||
JSValue match = JS_AtomToString(ctx, JS_ATOM_empty_string);
|
||||
if (JS_IsException(match)) goto fail;
|
||||
for (int i = pos; i <= len - t_len; i++) {
|
||||
if (!string_cmp(sp, tp, i, 0, t_len)) {
|
||||
found = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found < 0) break;
|
||||
|
||||
JSValue rep = make_replacement(ctx, argc, argv, boundary, match);
|
||||
if (JS_IsException(rep)) goto fail;
|
||||
if (found > pos) {
|
||||
JSValue sub = js_sub_string(ctx, sp, pos, found);
|
||||
if (JS_IsException(sub)) goto fail_str_target;
|
||||
if (string_buffer_concat_value_free(b, sub)) goto fail_str_target;
|
||||
}
|
||||
|
||||
JSValue match = js_sub_string(ctx, sp, found, found + t_len);
|
||||
if (JS_IsException(match)) goto fail_str_target;
|
||||
|
||||
JSValue rep = make_replacement(ctx, argc, argv, found, match);
|
||||
if (JS_IsException(rep)) goto fail_str_target;
|
||||
|
||||
/* Count includes null matches */
|
||||
count++;
|
||||
|
||||
if (!JS_IsNull(rep)) {
|
||||
if (sb_concat_value_to_string_free(ctx, b, rep) < 0) goto fail;
|
||||
if (sb_concat_value_to_string_free(ctx, b, rep) < 0) goto fail_str_target;
|
||||
} else {
|
||||
JS_FreeValue(ctx, rep);
|
||||
}
|
||||
|
||||
/* Copy next character between boundaries (does not affect count) */
|
||||
if (boundary < len) {
|
||||
JSValue ch = js_sub_string(ctx, sp, boundary, boundary + 1);
|
||||
if (JS_IsException(ch)) goto fail;
|
||||
if (string_buffer_concat_value_free(b, ch)) goto fail;
|
||||
}
|
||||
pos = found + t_len;
|
||||
}
|
||||
|
||||
if (pos < len) {
|
||||
JSValue sub = js_sub_string(ctx, sp, pos, len);
|
||||
if (JS_IsException(sub)) goto fail_str_target;
|
||||
if (string_buffer_concat_value_free(b, sub)) goto fail_str_target;
|
||||
}
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return string_buffer_end(b);
|
||||
|
||||
fail_str_target:
|
||||
string_buffer_free(b);
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
/* Non-empty target: left-to-right, non-overlapping */
|
||||
/* Regex target */
|
||||
JSValue rx = argv[1];
|
||||
JSValue orig_last_index = JS_GetPropertyStr(ctx, rx, "lastIndex");
|
||||
if (JS_IsException(orig_last_index)) goto fail_rx;
|
||||
int have_orig_last_index = 1;
|
||||
|
||||
int pos = 0;
|
||||
int32_t count = 0;
|
||||
|
||||
while (pos <= len - t_len && (limit < 0 || count < limit)) {
|
||||
int found = -1;
|
||||
while (pos <= len && (limit < 0 || count < limit)) {
|
||||
if (JS_SetPropertyStr(ctx, rx, "lastIndex", JS_NewInt32(ctx, 0)) < 0) goto fail_rx;
|
||||
|
||||
/* Find next occurrence (naive search) */
|
||||
for (int i = pos; i <= len - t_len; i++) {
|
||||
if (!string_cmp(sp, tp, i, 0, t_len)) {
|
||||
found = i;
|
||||
break;
|
||||
}
|
||||
JSValue sub_str = js_sub_string(ctx, sp, pos, len);
|
||||
if (JS_IsException(sub_str)) goto fail_rx;
|
||||
|
||||
JSValue exec_res = JS_Invoke(ctx, rx, JS_ATOM_exec, 1, (JSValueConst *)&sub_str);
|
||||
JS_FreeValue(ctx, sub_str);
|
||||
if (JS_IsException(exec_res)) goto fail_rx;
|
||||
|
||||
if (JS_IsNull(exec_res)) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
break;
|
||||
}
|
||||
|
||||
JSValue idx_val = JS_GetPropertyStr(ctx, exec_res, "index");
|
||||
if (JS_IsException(idx_val)) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
goto fail_rx;
|
||||
}
|
||||
|
||||
int32_t local_index = 0;
|
||||
if (JS_ToInt32(ctx, &local_index, idx_val)) {
|
||||
JS_FreeValue(ctx, idx_val);
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
goto fail_rx;
|
||||
}
|
||||
JS_FreeValue(ctx, idx_val);
|
||||
|
||||
if (local_index < 0) local_index = 0;
|
||||
int found = pos + local_index;
|
||||
if (found < pos) found = pos;
|
||||
if (found > len) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
break;
|
||||
}
|
||||
|
||||
JSValue match = JS_GetPropertyUint32(ctx, exec_res, 0);
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
if (JS_IsException(match)) goto fail_rx;
|
||||
|
||||
int match_len = 0;
|
||||
{
|
||||
JSValue mstr = JS_ToString(ctx, match);
|
||||
if (JS_IsException(mstr)) goto fail_rx;
|
||||
JSString *mp = JS_VALUE_GET_STRING(mstr);
|
||||
match_len = (int)mp->len;
|
||||
JS_FreeValue(ctx, mstr);
|
||||
}
|
||||
if (found < 0) break;
|
||||
|
||||
/* Copy prefix up to match */
|
||||
if (found > pos) {
|
||||
JSValue sub = js_sub_string(ctx, sp, pos, found);
|
||||
if (JS_IsException(sub)) goto fail;
|
||||
if (string_buffer_concat_value_free(b, sub)) goto fail;
|
||||
JSValue prefix = js_sub_string(ctx, sp, pos, found);
|
||||
if (JS_IsException(prefix)) goto fail_rx;
|
||||
if (string_buffer_concat_value_free(b, prefix)) goto fail_rx;
|
||||
}
|
||||
|
||||
/* Match text for callback */
|
||||
JSValue match = js_sub_string(ctx, sp, found, found + t_len);
|
||||
if (JS_IsException(match)) goto fail;
|
||||
|
||||
JSValue rep = make_replacement(ctx, argc, argv, found, match);
|
||||
if (JS_IsException(rep)) goto fail;
|
||||
if (JS_IsException(rep)) goto fail_rx;
|
||||
|
||||
/* Count includes null matches */
|
||||
count++;
|
||||
|
||||
if (!JS_IsNull(rep)) {
|
||||
if (sb_concat_value_to_string_free(ctx, b, rep) < 0) goto fail;
|
||||
if (sb_concat_value_to_string_free(ctx, b, rep) < 0) goto fail_rx;
|
||||
} else {
|
||||
JS_FreeValue(ctx, rep);
|
||||
}
|
||||
|
||||
pos = found + t_len;
|
||||
pos = found + match_len;
|
||||
if (match_len == 0) {
|
||||
if (pos < len) pos++;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy remainder */
|
||||
if (pos < len) {
|
||||
JSValue sub = js_sub_string(ctx, sp, pos, len);
|
||||
if (JS_IsException(sub)) goto fail;
|
||||
if (string_buffer_concat_value_free(b, sub)) goto fail;
|
||||
JSValue tail = js_sub_string(ctx, sp, pos, len);
|
||||
if (JS_IsException(tail)) goto fail_rx;
|
||||
if (string_buffer_concat_value_free(b, tail)) goto fail_rx;
|
||||
}
|
||||
|
||||
if (have_orig_last_index) JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return string_buffer_end(b);
|
||||
|
||||
fail:
|
||||
fail_rx:
|
||||
string_buffer_free(b);
|
||||
if (!JS_IsNull(orig_last_index) && !JS_IsException(orig_last_index)) {
|
||||
JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
} else {
|
||||
JS_FreeValue(ctx, orig_last_index);
|
||||
}
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
|
||||
/* text.search(str, target, from) - find substring or regex match */
|
||||
static JSValue js_cell_text_search(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv)
|
||||
{
|
||||
if (argc < 2) return JS_NULL;
|
||||
|
||||
int tag1 = JS_VALUE_GET_TAG(argv[0]);
|
||||
if (tag1 != JS_TAG_STRING && tag1 != JS_TAG_STRING_ROPE) return JS_NULL;
|
||||
|
||||
int target_is_regex = 0;
|
||||
int tag2 = JS_VALUE_GET_TAG(argv[1]);
|
||||
if (tag2 == JS_TAG_STRING || tag2 == JS_TAG_STRING_ROPE) {
|
||||
target_is_regex = 0;
|
||||
} else if (JS_IsObject(argv[1]) && JS_IsRegExp(ctx, argv[1])) {
|
||||
target_is_regex = 1;
|
||||
} else {
|
||||
return JS_NULL;
|
||||
}
|
||||
|
||||
JSValue str = JS_ToString(ctx, argv[0]);
|
||||
if (JS_IsException(str)) return str;
|
||||
|
||||
JSString *p = JS_VALUE_GET_STRING(str);
|
||||
int len = (int)p->len;
|
||||
|
||||
int from = 0;
|
||||
if (argc > 2 && !JS_IsNull(argv[2])) {
|
||||
if (JS_ToInt32(ctx, &from, argv[2])) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_NULL;
|
||||
}
|
||||
if (from < 0) from += len;
|
||||
if (from < 0) from = 0;
|
||||
}
|
||||
if (from > len) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_NULL;
|
||||
}
|
||||
|
||||
if (!target_is_regex) {
|
||||
JSValue target = JS_ToString(ctx, argv[1]);
|
||||
if (JS_IsException(target)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return target;
|
||||
}
|
||||
|
||||
JSString *t = JS_VALUE_GET_STRING(target);
|
||||
int t_len = (int)t->len;
|
||||
|
||||
int result = -1;
|
||||
if (len >= t_len) {
|
||||
for (int i = from; i <= len - t_len; i++) {
|
||||
if (!string_cmp(p, t, i, 0, t_len)) {
|
||||
result = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
JS_FreeValue(ctx, target);
|
||||
|
||||
if (result == -1) return JS_NULL;
|
||||
return JS_NewInt32(ctx, result);
|
||||
}
|
||||
|
||||
/* Regex target */
|
||||
JSValue rx = argv[1];
|
||||
JSValue orig_last_index = JS_GetPropertyStr(ctx, rx, "lastIndex");
|
||||
if (JS_IsException(orig_last_index)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
int have_orig_last_index = 1;
|
||||
|
||||
if (JS_SetPropertyStr(ctx, rx, "lastIndex", JS_NewInt32(ctx, 0)) < 0) goto fail_rx_search;
|
||||
|
||||
JSValue sub_str = js_sub_string(ctx, p, from, len);
|
||||
if (JS_IsException(sub_str)) goto fail_rx_search;
|
||||
|
||||
JSValue exec_res = JS_Invoke(ctx, rx, JS_ATOM_exec, 1, (JSValueConst *)&sub_str);
|
||||
JS_FreeValue(ctx, sub_str);
|
||||
if (JS_IsException(exec_res)) goto fail_rx_search;
|
||||
|
||||
if (JS_IsNull(exec_res)) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
if (have_orig_last_index) JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_NULL;
|
||||
}
|
||||
|
||||
JSValue idx_val = JS_GetPropertyStr(ctx, exec_res, "index");
|
||||
if (JS_IsException(idx_val)) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
goto fail_rx_search;
|
||||
}
|
||||
|
||||
int32_t local_index = 0;
|
||||
if (JS_ToInt32(ctx, &local_index, idx_val)) {
|
||||
JS_FreeValue(ctx, idx_val);
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
goto fail_rx_search;
|
||||
}
|
||||
JS_FreeValue(ctx, idx_val);
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
|
||||
if (local_index < 0) local_index = 0;
|
||||
|
||||
if (have_orig_last_index) JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_NewInt32(ctx, from + local_index);
|
||||
|
||||
fail_rx_search:
|
||||
if (!JS_IsNull(orig_last_index) && !JS_IsException(orig_last_index)) {
|
||||
JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
} else {
|
||||
JS_FreeValue(ctx, orig_last_index);
|
||||
}
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
static inline uint16_t js_str_get(JSString *s, int idx) {
|
||||
return s->is_wide_char ? s->u.str16[idx] : s->u.str8[idx];
|
||||
}
|
||||
|
||||
static int js_str_find_range(JSString *hay, int from, int to, JSString *needle) {
|
||||
int nlen = (int)needle->len;
|
||||
int hlen = (int)hay->len;
|
||||
|
||||
if (from < 0) from = 0;
|
||||
if (to < 0) to = 0;
|
||||
if (to > hlen) to = hlen;
|
||||
if (from > to) return -1;
|
||||
|
||||
if (nlen == 0) return from;
|
||||
if (nlen > (to - from)) return -1;
|
||||
|
||||
int limit = to - nlen;
|
||||
for (int i = from; i <= limit; i++) {
|
||||
int j = 0;
|
||||
for (; j < nlen; j++) {
|
||||
if (js_str_get(hay, i + j) != js_str_get(needle, j)) break;
|
||||
}
|
||||
if (j == nlen) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* text_extract(text, pattern, from?, to?) - extract match using regexp or literal text */
|
||||
static JSValue js_cell_text_extract(JSContext *ctx, JSValueConst this_val,
|
||||
int argc, JSValueConst *argv)
|
||||
{
|
||||
if (argc < 2) return JS_NULL;
|
||||
|
||||
JSValue str = JS_ToString(ctx, argv[0]);
|
||||
if (JS_IsException(str)) return JS_EXCEPTION;
|
||||
|
||||
JSString *p = JS_VALUE_GET_STRING(str);
|
||||
int len = (int)p->len;
|
||||
|
||||
int from = 0;
|
||||
if (argc >= 3 && !JS_IsNull(argv[2])) {
|
||||
if (JS_ToInt32(ctx, &from, argv[2])) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
if (from < 0) from += len;
|
||||
if (from < 0) from = 0;
|
||||
if (from > len) from = len;
|
||||
}
|
||||
|
||||
int to = len;
|
||||
if (argc >= 4 && !JS_IsNull(argv[3])) {
|
||||
if (JS_ToInt32(ctx, &to, argv[3])) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
if (to < 0) to += len;
|
||||
if (to < 0) to = 0;
|
||||
if (to > len) to = len;
|
||||
}
|
||||
|
||||
if (from > to) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_NULL;
|
||||
}
|
||||
|
||||
/* RegExp path */
|
||||
if (js_is_regexp(ctx, argv[1])) {
|
||||
JSValue substr;
|
||||
|
||||
if (from == 0 && to == len) {
|
||||
substr = JS_DupValue(ctx, str);
|
||||
} else {
|
||||
substr = js_sub_string(ctx, p, from, to);
|
||||
if (JS_IsException(substr)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
}
|
||||
|
||||
JSValue exec_func = JS_GetPropertyStr(ctx, argv[1], "exec");
|
||||
if (JS_IsException(exec_func)) {
|
||||
JS_FreeValue(ctx, substr);
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
JSValue result = JS_Call(ctx, exec_func, argv[1], 1, &substr);
|
||||
|
||||
JS_FreeValue(ctx, exec_func);
|
||||
JS_FreeValue(ctx, substr);
|
||||
JS_FreeValue(ctx, str);
|
||||
|
||||
if (JS_IsException(result)) return JS_EXCEPTION;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Literal text path */
|
||||
JSValue needle_val = JS_ToString(ctx, argv[1]);
|
||||
if (JS_IsException(needle_val)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
JSString *needle = JS_VALUE_GET_STRING(needle_val);
|
||||
int pos = js_str_find_range(p, from, to, needle);
|
||||
|
||||
JS_FreeValue(ctx, needle_val);
|
||||
|
||||
if (pos < 0) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_NULL;
|
||||
}
|
||||
|
||||
JSValue arr = JS_NewArray(ctx);
|
||||
if (JS_IsException(arr)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
JSValue match = js_sub_string(ctx, p, pos, pos + (int)needle->len);
|
||||
if (JS_IsException(match)) {
|
||||
JS_FreeValue(ctx, arr);
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
JS_DefinePropertyValueUint32(ctx, arr, 0, match, JS_PROP_C_W_E);
|
||||
JS_DefinePropertyValueStr(ctx, arr, "index", JS_NewInt32(ctx, pos), JS_PROP_C_W_E);
|
||||
JS_DefinePropertyValueStr(ctx, arr, "input", JS_DupValue(ctx, str), JS_PROP_C_W_E);
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
return arr;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* array function and sub-functions
|
||||
* ---------------------------------------------------------------------------- */
|
||||
@@ -34833,7 +35097,8 @@ static JSValue js_cell_array(JSContext *ctx, JSValueConst this_val,
|
||||
return result;
|
||||
}
|
||||
|
||||
int tag2 = JS_VALUE_GET_TAG(argv[1]);
|
||||
int tag2 = JS_VALUE_GET_TAG(argv[1]);
|
||||
|
||||
if (tag2 == JS_TAG_STRING || tag2 == JS_TAG_STRING_ROPE) {
|
||||
/* Split by separator */
|
||||
const char *cstr = JS_ToCString(ctx, str);
|
||||
@@ -34859,7 +35124,6 @@ static JSValue js_cell_array(JSContext *ctx, JSValueConst this_val,
|
||||
const char *found;
|
||||
|
||||
if (sep_len == 0) {
|
||||
/* Split into characters */
|
||||
for (int i = 0; i < len; i++) {
|
||||
JSValue ch = js_sub_string(ctx, p, i, i + 1);
|
||||
JS_SetPropertyInt64(ctx, result, idx++, ch);
|
||||
@@ -34880,6 +35144,131 @@ static JSValue js_cell_array(JSContext *ctx, JSValueConst this_val,
|
||||
return result;
|
||||
}
|
||||
|
||||
if (JS_IsObject(argv[1]) && JS_IsRegExp(ctx, argv[1])) {
|
||||
/* Split by regex (manual "global" iteration; ignore g flag semantics) */
|
||||
JSValue rx = argv[1];
|
||||
|
||||
JSValue result = JS_NewArray(ctx);
|
||||
if (JS_IsException(result)) {
|
||||
JS_FreeValue(ctx, str);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Save & restore lastIndex to avoid mutating caller-visible state */
|
||||
JSValue orig_last_index = JS_GetPropertyStr(ctx, rx, "lastIndex");
|
||||
if (JS_IsException(orig_last_index)) {
|
||||
JS_FreeValue(ctx, result);
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
int64_t out_idx = 0;
|
||||
|
||||
while (pos <= len) {
|
||||
/* force lastIndex = 0 so flags don't matter and we fully control iteration */
|
||||
if (JS_SetPropertyStr(ctx, rx, "lastIndex", JS_NewInt32(ctx, 0)) < 0) goto fail_rx_split;
|
||||
|
||||
JSValue sub_str = js_sub_string(ctx, p, pos, len);
|
||||
if (JS_IsException(sub_str)) goto fail_rx_split;
|
||||
|
||||
JSValue exec_res = JS_Invoke(ctx, rx, JS_ATOM_exec, 1, (JSValueConst *)&sub_str);
|
||||
JS_FreeValue(ctx, sub_str);
|
||||
if (JS_IsException(exec_res)) goto fail_rx_split;
|
||||
|
||||
if (JS_IsNull(exec_res)) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
/* remainder */
|
||||
JSValue tail = js_sub_string(ctx, p, pos, len);
|
||||
if (JS_IsException(tail)) goto fail_rx_split;
|
||||
JS_SetPropertyInt64(ctx, result, out_idx++, tail);
|
||||
break;
|
||||
}
|
||||
|
||||
/* local match index within sub_str */
|
||||
JSValue idx_val = JS_GetPropertyStr(ctx, exec_res, "index");
|
||||
if (JS_IsException(idx_val)) {
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
goto fail_rx_split;
|
||||
}
|
||||
|
||||
int32_t local_index = 0;
|
||||
if (JS_ToInt32(ctx, &local_index, idx_val)) {
|
||||
JS_FreeValue(ctx, idx_val);
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
goto fail_rx_split;
|
||||
}
|
||||
JS_FreeValue(ctx, idx_val);
|
||||
|
||||
if (local_index < 0) local_index = 0;
|
||||
|
||||
int found = pos + local_index;
|
||||
if (found < pos) found = pos;
|
||||
if (found > len) {
|
||||
/* treat as no more matches */
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
JSValue tail = js_sub_string(ctx, p, pos, len);
|
||||
if (JS_IsException(tail)) goto fail_rx_split;
|
||||
JS_SetPropertyInt64(ctx, result, out_idx++, tail);
|
||||
break;
|
||||
}
|
||||
|
||||
/* match text is exec_res[0] */
|
||||
JSValue match = JS_GetPropertyUint32(ctx, exec_res, 0);
|
||||
JS_FreeValue(ctx, exec_res);
|
||||
if (JS_IsException(match)) goto fail_rx_split;
|
||||
|
||||
/* compute match length in code units */
|
||||
int match_len = 0;
|
||||
{
|
||||
JSValue mstr = JS_ToString(ctx, match);
|
||||
if (JS_IsException(mstr)) {
|
||||
JS_FreeValue(ctx, match);
|
||||
goto fail_rx_split;
|
||||
}
|
||||
JSString *mp = JS_VALUE_GET_STRING(mstr);
|
||||
match_len = (int)mp->len;
|
||||
JS_FreeValue(ctx, mstr);
|
||||
}
|
||||
JS_FreeValue(ctx, match);
|
||||
|
||||
/* emit piece before match */
|
||||
JSValue part = js_sub_string(ctx, p, pos, found);
|
||||
if (JS_IsException(part)) goto fail_rx_split;
|
||||
JS_SetPropertyInt64(ctx, result, out_idx++, part);
|
||||
|
||||
/* advance past match; ensure progress on empty matches */
|
||||
pos = found + match_len;
|
||||
if (match_len == 0) {
|
||||
if (found >= len) {
|
||||
/* match at end: add trailing empty field and stop */
|
||||
JSValue empty = JS_NewStringLen(ctx, "", 0);
|
||||
if (JS_IsException(empty)) goto fail_rx_split;
|
||||
JS_SetPropertyInt64(ctx, result, out_idx++, empty);
|
||||
break;
|
||||
}
|
||||
pos = found + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* restore lastIndex */
|
||||
JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
|
||||
JS_FreeValue(ctx, str);
|
||||
return result;
|
||||
|
||||
fail_rx_split:
|
||||
/* best-effort restore lastIndex */
|
||||
if (!JS_IsException(orig_last_index)) {
|
||||
JS_SetPropertyStr(ctx, rx, "lastIndex", orig_last_index);
|
||||
} else {
|
||||
JS_FreeValue(ctx, orig_last_index);
|
||||
}
|
||||
JS_FreeValue(ctx, result);
|
||||
JS_FreeValue(ctx, str);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
if (tag2 == JS_TAG_INT || tag2 == JS_TAG_FLOAT64) {
|
||||
/* Dice into chunks */
|
||||
int chunk_len;
|
||||
@@ -37126,6 +37515,9 @@ void JS_AddIntrinsicBaseObjects(JSContext *ctx)
|
||||
JS_DefinePropertyValueStr(ctx, ctx->global_obj, "search",
|
||||
JS_NewCFunction(ctx, js_cell_text_search, "search", 3),
|
||||
JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE);
|
||||
JS_DefinePropertyValueStr(ctx, ctx->global_obj, "extract",
|
||||
JS_NewCFunction(ctx, js_cell_text_extract, "extract", 3),
|
||||
JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE);
|
||||
JS_DefinePropertyValueStr(ctx, ctx->global_obj, "reduce",
|
||||
JS_NewCFunction(ctx, js_cell_array_reduce, "reduce", 4),
|
||||
JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE);
|
||||
|
||||
@@ -2858,6 +2858,26 @@ return {
|
||||
if (result != "he[2][3]o") throw "replace with function failed: " + result
|
||||
},
|
||||
|
||||
test_replace_with_function_limit: function() {
|
||||
var result = replace("banana", "a", (match, pos) => `[${pos}]`, 2)
|
||||
if (result != "b[1]n[3]na") throw "replace with function limit failed: " + result
|
||||
},
|
||||
|
||||
test_replace_with_regex: function() {
|
||||
var result = replace("banana", /a/, "o")
|
||||
if (result != "bonono") throw "replace with regex failed"
|
||||
},
|
||||
|
||||
test_replace_with_regex_limit: function() {
|
||||
var result = replace("banana", /a/, "o", 2)
|
||||
if (result != "bonona") throw "replace with regex limit failed: " + result
|
||||
},
|
||||
|
||||
test_replace_with_regex_function: function() {
|
||||
var result = replace("hello", /l/, (match, pos) => `[${pos}]`)
|
||||
if (result != "he[2][3]o") throw "replace with regex function failed: " + result
|
||||
},
|
||||
|
||||
// ============================================================================
|
||||
// TEXT FUNCTION (Conversion and Slicing)
|
||||
// ============================================================================
|
||||
@@ -3520,4 +3540,82 @@ return {
|
||||
if (result != 42) throw "immediately invoked function failed"
|
||||
},
|
||||
|
||||
test_text_split_text: function() {
|
||||
var text = "hello world"
|
||||
var result = array(text, " ")
|
||||
if (result.length != 2) throw "text split failed"
|
||||
if (result[0] != "hello") throw "text split failed"
|
||||
if (result[1] != "world") throw "text split failed"
|
||||
},
|
||||
|
||||
test_text_split_regex: function() {
|
||||
var text = "hello world"
|
||||
var result = array(text, /\s+/)
|
||||
if (result.length != 2) throw "text split failed"
|
||||
if (result[0] != "hello") throw "text split failed"
|
||||
if (result[1] != "world") throw "text split failed"
|
||||
},
|
||||
|
||||
test_text_search_text: function() {
|
||||
var text = "hello world"
|
||||
var result = search(text, "world")
|
||||
if (result != 6) throw "text search failed"
|
||||
},
|
||||
|
||||
test_text_search_regex: function() {
|
||||
var text = "hello world"
|
||||
var result = search(text, /world/)
|
||||
if (result != 6) throw "text search failed"
|
||||
},
|
||||
|
||||
test_extract_basic_text: function() {
|
||||
var text = "hello world"
|
||||
var result = extract(text, "world")
|
||||
if (result[0] != "world") throw "extract basic text failed"
|
||||
},
|
||||
|
||||
test_extract_text_not_found: function() {
|
||||
var text = "hello world"
|
||||
var result = extract(text, "xyz")
|
||||
if (result != null) throw "extract not found should return null"
|
||||
},
|
||||
|
||||
test_extract_regex_basic: function() {
|
||||
var text = "hello world"
|
||||
var result = extract(text, /world/)
|
||||
if (result[0] != "world") throw "extract regex basic failed"
|
||||
},
|
||||
|
||||
test_extract_regex_with_capture_group: function() {
|
||||
var text = "hello world"
|
||||
var result = extract(text, /(\w+) (\w+)/)
|
||||
if (result[0] != "hello world") throw "extract regex full match failed"
|
||||
if (result[1] != "hello") throw "extract regex capture group 1 failed"
|
||||
if (result[2] != "world") throw "extract regex capture group 2 failed"
|
||||
},
|
||||
|
||||
test_extract_regex_digits: function() {
|
||||
var text = "abc123def456"
|
||||
var result = extract(text, /(\d+)/)
|
||||
if (result[0] != "123") throw "extract regex digits failed"
|
||||
if (result[1] != "123") throw "extract regex digits capture failed"
|
||||
},
|
||||
|
||||
test_extract_with_from: function() {
|
||||
var text = "hello hello world"
|
||||
var result = extract(text, "hello", 1)
|
||||
if (result[0] != "hello") throw "extract with from failed"
|
||||
},
|
||||
|
||||
test_extract_with_from_to: function() {
|
||||
var text = "hello world hello"
|
||||
var result = extract(text, "hello", 0, 10)
|
||||
if (result[0] != "hello") throw "extract with from to failed"
|
||||
},
|
||||
|
||||
test_extract_regex_case_insensitive: function() {
|
||||
var text = "Hello World"
|
||||
var result = extract(text, /hello/i)
|
||||
if (result[0] != "Hello") throw "extract regex case insensitive failed"
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user