optimize tokenize

This commit is contained in:
2026-02-10 05:52:19 -06:00
parent b8b110b616
commit ff11c49c39
2 changed files with 44 additions and 46 deletions

View File

@@ -1,11 +1,6 @@
var tokenize = function(src, filename) { var tokenize = function(src, filename) {
var len = length(src) var len = length(src)
var cp = [] var cp = array(array(src), codepoint)
var _i = 0
while (_i < len) {
push(cp, codepoint(src[_i]))
_i = _i + 1
}
var pos = 0 var pos = 0
var row = 0 var row = 0
@@ -148,46 +143,45 @@ var tokenize = function(src, filename) {
} }
var substr = function(start, end) { var substr = function(start, end) {
var s = "" return text(src, start, end)
var i = start
while (i < end) {
s = s + character(cp[i])
i = i + 1
}
return s
} }
var read_string = function(quote_cp) { var read_string = function(quote_cp) {
var start = pos var start = pos
var start_row = row var start_row = row
var start_col = col var start_col = col
var value = "" var parts = []
var run_start = 0
var esc = 0 var esc = 0
adv() // skip opening quote adv() // skip opening quote
run_start = pos
while (pos < len && pk() != quote_cp) { while (pos < len && pk() != quote_cp) {
if (pk() == CP_BSLASH) { if (pk() == CP_BSLASH) {
if (pos > run_start) push(parts, text(src, run_start, pos))
adv() adv()
esc = adv() esc = adv()
if (esc == CP_n) { value = value + "\n" } if (esc == CP_n) { push(parts, "\n") }
else if (esc == CP_t) { value = value + "\t" } else if (esc == CP_t) { push(parts, "\t") }
else if (esc == CP_r) { value = value + "\r" } else if (esc == CP_r) { push(parts, "\r") }
else if (esc == CP_BSLASH) { value = value + "\\" } else if (esc == CP_BSLASH) { push(parts, "\\") }
else if (esc == CP_SQUOTE) { value = value + "'" } else if (esc == CP_SQUOTE) { push(parts, "'") }
else if (esc == CP_DQUOTE) { value = value + "\"" } else if (esc == CP_DQUOTE) { push(parts, "\"") }
else if (esc == CP_0) { value = value + character(0) } else if (esc == CP_0) { push(parts, character(0)) }
else if (esc == CP_BACKTICK) { value = value + "`" } else if (esc == CP_BACKTICK) { push(parts, "`") }
else if (esc == CP_u) { value = value + read_unicode_escape() } else if (esc == CP_u) { push(parts, read_unicode_escape()) }
else { value = value + character(esc) } else { push(parts, character(esc)) }
run_start = pos
} else { } else {
value = value + character(adv()) adv()
} }
} }
if (pos > run_start) push(parts, text(src, run_start, pos))
if (pos < len) adv() // skip closing quote if (pos < len) adv() // skip closing quote
push(tokens, { push(tokens, {
kind: "text", at: start, kind: "text", at: start,
from_row: start_row, from_column: start_col, from_row: start_row, from_column: start_col,
to_row: row, to_column: col, to_row: row, to_column: col,
value: value value: text(parts)
}) })
} }
@@ -195,49 +189,54 @@ var tokenize = function(src, filename) {
var start = pos var start = pos
var start_row = row var start_row = row
var start_col = col var start_col = col
var value = "" var parts = []
var run_start = 0
var depth = 0 var depth = 0
var tc = 0 var tc = 0
var q = 0 var q = 0
var interp_start = 0
adv() // skip opening backtick adv() // skip opening backtick
run_start = pos
while (pos < len && pk() != CP_BACKTICK) { while (pos < len && pk() != CP_BACKTICK) {
if (pk() == CP_BSLASH && pos + 1 < len) { if (pk() == CP_BSLASH && pos + 1 < len) {
value = value + character(adv()) if (pos > run_start) push(parts, text(src, run_start, pos))
value = value + character(adv()) push(parts, text(src, pos, pos + 2))
adv(); adv()
run_start = pos
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) { } else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
value = value + character(adv()) // $ if (pos > run_start) push(parts, text(src, run_start, pos))
value = value + character(adv()) // { interp_start = pos
adv(); adv() // $ {
depth = 1 depth = 1
while (pos < len && depth > 0) { while (pos < len && depth > 0) {
tc = pk() tc = pk()
if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) } if (tc == CP_LBRACE) { depth = depth + 1; adv() }
else if (tc == CP_RBRACE) { else if (tc == CP_RBRACE) {
depth = depth - 1 depth = depth - 1
if (depth > 0) { value = value + character(adv()) } adv()
else { value = value + character(adv()) }
} }
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) { else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
q = adv() q = adv()
value = value + character(q)
while (pos < len && pk() != q) { while (pos < len && pk() != q) {
if (pk() == CP_BSLASH && pos + 1 < len) { if (pk() == CP_BSLASH && pos + 1 < len) adv()
value = value + character(adv()) adv()
}
value = value + character(adv())
} }
if (pos < len) { value = value + character(adv()) } if (pos < len) adv()
} else { value = value + character(adv()) } } else { adv() }
} }
push(parts, text(src, interp_start, pos))
run_start = pos
} else { } else {
value = value + character(adv()) adv()
} }
} }
if (pos > run_start) push(parts, text(src, run_start, pos))
if (pos < len) adv() // skip closing backtick if (pos < len) adv() // skip closing backtick
push(tokens, { push(tokens, {
kind: "text", at: start, kind: "text", at: start,
from_row: start_row, from_column: start_col, from_row: start_row, from_column: start_col,
to_row: row, to_column: col, to_row: row, to_column: col,
value: value value: text(parts)
}) })
} }
@@ -344,14 +343,13 @@ var tokenize = function(src, filename) {
var start = pos var start = pos
var start_row = row var start_row = row
var start_col = col var start_col = col
var val = ""
var i = 0 var i = 0
while (i < count) { val = val + character(adv()); i = i + 1 } while (i < count) { adv(); i = i + 1 }
push(tokens, { push(tokens, {
kind: "name", at: start, kind: "name", at: start,
from_row: start_row, from_column: start_col, from_row: start_row, from_column: start_col,
to_row: row, to_column: col, to_row: row, to_column: col,
value: val value: text(src, start, pos)
}) })
} }

Binary file not shown.