optimize tokenize
This commit is contained in:
90
tokenize.cm
90
tokenize.cm
@@ -1,11 +1,6 @@
|
||||
var tokenize = function(src, filename) {
|
||||
var len = length(src)
|
||||
var cp = []
|
||||
var _i = 0
|
||||
while (_i < len) {
|
||||
push(cp, codepoint(src[_i]))
|
||||
_i = _i + 1
|
||||
}
|
||||
var cp = array(array(src), codepoint)
|
||||
|
||||
var pos = 0
|
||||
var row = 0
|
||||
@@ -148,46 +143,45 @@ var tokenize = function(src, filename) {
|
||||
}
|
||||
|
||||
var substr = function(start, end) {
|
||||
var s = ""
|
||||
var i = start
|
||||
while (i < end) {
|
||||
s = s + character(cp[i])
|
||||
i = i + 1
|
||||
}
|
||||
return s
|
||||
return text(src, start, end)
|
||||
}
|
||||
|
||||
var read_string = function(quote_cp) {
|
||||
var start = pos
|
||||
var start_row = row
|
||||
var start_col = col
|
||||
var value = ""
|
||||
var parts = []
|
||||
var run_start = 0
|
||||
var esc = 0
|
||||
adv() // skip opening quote
|
||||
run_start = pos
|
||||
while (pos < len && pk() != quote_cp) {
|
||||
if (pk() == CP_BSLASH) {
|
||||
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||
adv()
|
||||
esc = adv()
|
||||
if (esc == CP_n) { value = value + "\n" }
|
||||
else if (esc == CP_t) { value = value + "\t" }
|
||||
else if (esc == CP_r) { value = value + "\r" }
|
||||
else if (esc == CP_BSLASH) { value = value + "\\" }
|
||||
else if (esc == CP_SQUOTE) { value = value + "'" }
|
||||
else if (esc == CP_DQUOTE) { value = value + "\"" }
|
||||
else if (esc == CP_0) { value = value + character(0) }
|
||||
else if (esc == CP_BACKTICK) { value = value + "`" }
|
||||
else if (esc == CP_u) { value = value + read_unicode_escape() }
|
||||
else { value = value + character(esc) }
|
||||
if (esc == CP_n) { push(parts, "\n") }
|
||||
else if (esc == CP_t) { push(parts, "\t") }
|
||||
else if (esc == CP_r) { push(parts, "\r") }
|
||||
else if (esc == CP_BSLASH) { push(parts, "\\") }
|
||||
else if (esc == CP_SQUOTE) { push(parts, "'") }
|
||||
else if (esc == CP_DQUOTE) { push(parts, "\"") }
|
||||
else if (esc == CP_0) { push(parts, character(0)) }
|
||||
else if (esc == CP_BACKTICK) { push(parts, "`") }
|
||||
else if (esc == CP_u) { push(parts, read_unicode_escape()) }
|
||||
else { push(parts, character(esc)) }
|
||||
run_start = pos
|
||||
} else {
|
||||
value = value + character(adv())
|
||||
adv()
|
||||
}
|
||||
}
|
||||
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||
if (pos < len) adv() // skip closing quote
|
||||
push(tokens, {
|
||||
kind: "text", at: start,
|
||||
from_row: start_row, from_column: start_col,
|
||||
to_row: row, to_column: col,
|
||||
value: value
|
||||
value: text(parts)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -195,49 +189,54 @@ var tokenize = function(src, filename) {
|
||||
var start = pos
|
||||
var start_row = row
|
||||
var start_col = col
|
||||
var value = ""
|
||||
var parts = []
|
||||
var run_start = 0
|
||||
var depth = 0
|
||||
var tc = 0
|
||||
var q = 0
|
||||
var interp_start = 0
|
||||
adv() // skip opening backtick
|
||||
run_start = pos
|
||||
while (pos < len && pk() != CP_BACKTICK) {
|
||||
if (pk() == CP_BSLASH && pos + 1 < len) {
|
||||
value = value + character(adv())
|
||||
value = value + character(adv())
|
||||
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||
push(parts, text(src, pos, pos + 2))
|
||||
adv(); adv()
|
||||
run_start = pos
|
||||
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
|
||||
value = value + character(adv()) // $
|
||||
value = value + character(adv()) // {
|
||||
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||
interp_start = pos
|
||||
adv(); adv() // $ {
|
||||
depth = 1
|
||||
while (pos < len && depth > 0) {
|
||||
tc = pk()
|
||||
if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) }
|
||||
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
|
||||
else if (tc == CP_RBRACE) {
|
||||
depth = depth - 1
|
||||
if (depth > 0) { value = value + character(adv()) }
|
||||
else { value = value + character(adv()) }
|
||||
adv()
|
||||
}
|
||||
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
|
||||
q = adv()
|
||||
value = value + character(q)
|
||||
while (pos < len && pk() != q) {
|
||||
if (pk() == CP_BSLASH && pos + 1 < len) {
|
||||
value = value + character(adv())
|
||||
}
|
||||
value = value + character(adv())
|
||||
if (pk() == CP_BSLASH && pos + 1 < len) adv()
|
||||
adv()
|
||||
}
|
||||
if (pos < len) { value = value + character(adv()) }
|
||||
} else { value = value + character(adv()) }
|
||||
if (pos < len) adv()
|
||||
} else { adv() }
|
||||
}
|
||||
push(parts, text(src, interp_start, pos))
|
||||
run_start = pos
|
||||
} else {
|
||||
value = value + character(adv())
|
||||
adv()
|
||||
}
|
||||
}
|
||||
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||
if (pos < len) adv() // skip closing backtick
|
||||
push(tokens, {
|
||||
kind: "text", at: start,
|
||||
from_row: start_row, from_column: start_col,
|
||||
to_row: row, to_column: col,
|
||||
value: value
|
||||
value: text(parts)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -344,14 +343,13 @@ var tokenize = function(src, filename) {
|
||||
var start = pos
|
||||
var start_row = row
|
||||
var start_col = col
|
||||
var val = ""
|
||||
var i = 0
|
||||
while (i < count) { val = val + character(adv()); i = i + 1 }
|
||||
while (i < count) { adv(); i = i + 1 }
|
||||
push(tokens, {
|
||||
kind: "name", at: start,
|
||||
from_row: start_row, from_column: start_col,
|
||||
to_row: row, to_column: col,
|
||||
value: val
|
||||
value: text(src, start, pos)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
BIN
tokenize.mach
BIN
tokenize.mach
Binary file not shown.
Reference in New Issue
Block a user