optimize tokenize

This commit is contained in:
2026-02-10 05:52:19 -06:00
parent b8b110b616
commit ff11c49c39
2 changed files with 44 additions and 46 deletions

View File

@@ -1,11 +1,6 @@
var tokenize = function(src, filename) {
var len = length(src)
var cp = []
var _i = 0
while (_i < len) {
push(cp, codepoint(src[_i]))
_i = _i + 1
}
var cp = array(array(src), codepoint)
var pos = 0
var row = 0
@@ -148,46 +143,45 @@ var tokenize = function(src, filename) {
}
var substr = function(start, end) {
var s = ""
var i = start
while (i < end) {
s = s + character(cp[i])
i = i + 1
}
return s
return text(src, start, end)
}
var read_string = function(quote_cp) {
var start = pos
var start_row = row
var start_col = col
var value = ""
var parts = []
var run_start = 0
var esc = 0
adv() // skip opening quote
run_start = pos
while (pos < len && pk() != quote_cp) {
if (pk() == CP_BSLASH) {
if (pos > run_start) push(parts, text(src, run_start, pos))
adv()
esc = adv()
if (esc == CP_n) { value = value + "\n" }
else if (esc == CP_t) { value = value + "\t" }
else if (esc == CP_r) { value = value + "\r" }
else if (esc == CP_BSLASH) { value = value + "\\" }
else if (esc == CP_SQUOTE) { value = value + "'" }
else if (esc == CP_DQUOTE) { value = value + "\"" }
else if (esc == CP_0) { value = value + character(0) }
else if (esc == CP_BACKTICK) { value = value + "`" }
else if (esc == CP_u) { value = value + read_unicode_escape() }
else { value = value + character(esc) }
if (esc == CP_n) { push(parts, "\n") }
else if (esc == CP_t) { push(parts, "\t") }
else if (esc == CP_r) { push(parts, "\r") }
else if (esc == CP_BSLASH) { push(parts, "\\") }
else if (esc == CP_SQUOTE) { push(parts, "'") }
else if (esc == CP_DQUOTE) { push(parts, "\"") }
else if (esc == CP_0) { push(parts, character(0)) }
else if (esc == CP_BACKTICK) { push(parts, "`") }
else if (esc == CP_u) { push(parts, read_unicode_escape()) }
else { push(parts, character(esc)) }
run_start = pos
} else {
value = value + character(adv())
adv()
}
}
if (pos > run_start) push(parts, text(src, run_start, pos))
if (pos < len) adv() // skip closing quote
push(tokens, {
kind: "text", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: value
value: text(parts)
})
}
@@ -195,49 +189,54 @@ var tokenize = function(src, filename) {
var start = pos
var start_row = row
var start_col = col
var value = ""
var parts = []
var run_start = 0
var depth = 0
var tc = 0
var q = 0
var interp_start = 0
adv() // skip opening backtick
run_start = pos
while (pos < len && pk() != CP_BACKTICK) {
if (pk() == CP_BSLASH && pos + 1 < len) {
value = value + character(adv())
value = value + character(adv())
if (pos > run_start) push(parts, text(src, run_start, pos))
push(parts, text(src, pos, pos + 2))
adv(); adv()
run_start = pos
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
value = value + character(adv()) // $
value = value + character(adv()) // {
if (pos > run_start) push(parts, text(src, run_start, pos))
interp_start = pos
adv(); adv() // $ {
depth = 1
while (pos < len && depth > 0) {
tc = pk()
if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) }
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
else if (tc == CP_RBRACE) {
depth = depth - 1
if (depth > 0) { value = value + character(adv()) }
else { value = value + character(adv()) }
adv()
}
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
q = adv()
value = value + character(q)
while (pos < len && pk() != q) {
if (pk() == CP_BSLASH && pos + 1 < len) {
value = value + character(adv())
}
value = value + character(adv())
if (pk() == CP_BSLASH && pos + 1 < len) adv()
adv()
}
if (pos < len) { value = value + character(adv()) }
} else { value = value + character(adv()) }
if (pos < len) adv()
} else { adv() }
}
push(parts, text(src, interp_start, pos))
run_start = pos
} else {
value = value + character(adv())
adv()
}
}
if (pos > run_start) push(parts, text(src, run_start, pos))
if (pos < len) adv() // skip closing backtick
push(tokens, {
kind: "text", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: value
value: text(parts)
})
}
@@ -344,14 +343,13 @@ var tokenize = function(src, filename) {
var start = pos
var start_row = row
var start_col = col
var val = ""
var i = 0
while (i < count) { val = val + character(adv()); i = i + 1 }
while (i < count) { adv(); i = i + 1 }
push(tokens, {
kind: "name", at: start,
from_row: start_row, from_column: start_col,
to_row: row, to_column: col,
value: val
value: text(src, start, pos)
})
}

Binary file not shown.