optimize tokenize
This commit is contained in:
90
tokenize.cm
90
tokenize.cm
@@ -1,11 +1,6 @@
|
|||||||
var tokenize = function(src, filename) {
|
var tokenize = function(src, filename) {
|
||||||
var len = length(src)
|
var len = length(src)
|
||||||
var cp = []
|
var cp = array(array(src), codepoint)
|
||||||
var _i = 0
|
|
||||||
while (_i < len) {
|
|
||||||
push(cp, codepoint(src[_i]))
|
|
||||||
_i = _i + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
var pos = 0
|
var pos = 0
|
||||||
var row = 0
|
var row = 0
|
||||||
@@ -148,46 +143,45 @@ var tokenize = function(src, filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var substr = function(start, end) {
|
var substr = function(start, end) {
|
||||||
var s = ""
|
return text(src, start, end)
|
||||||
var i = start
|
|
||||||
while (i < end) {
|
|
||||||
s = s + character(cp[i])
|
|
||||||
i = i + 1
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var read_string = function(quote_cp) {
|
var read_string = function(quote_cp) {
|
||||||
var start = pos
|
var start = pos
|
||||||
var start_row = row
|
var start_row = row
|
||||||
var start_col = col
|
var start_col = col
|
||||||
var value = ""
|
var parts = []
|
||||||
|
var run_start = 0
|
||||||
var esc = 0
|
var esc = 0
|
||||||
adv() // skip opening quote
|
adv() // skip opening quote
|
||||||
|
run_start = pos
|
||||||
while (pos < len && pk() != quote_cp) {
|
while (pos < len && pk() != quote_cp) {
|
||||||
if (pk() == CP_BSLASH) {
|
if (pk() == CP_BSLASH) {
|
||||||
|
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||||
adv()
|
adv()
|
||||||
esc = adv()
|
esc = adv()
|
||||||
if (esc == CP_n) { value = value + "\n" }
|
if (esc == CP_n) { push(parts, "\n") }
|
||||||
else if (esc == CP_t) { value = value + "\t" }
|
else if (esc == CP_t) { push(parts, "\t") }
|
||||||
else if (esc == CP_r) { value = value + "\r" }
|
else if (esc == CP_r) { push(parts, "\r") }
|
||||||
else if (esc == CP_BSLASH) { value = value + "\\" }
|
else if (esc == CP_BSLASH) { push(parts, "\\") }
|
||||||
else if (esc == CP_SQUOTE) { value = value + "'" }
|
else if (esc == CP_SQUOTE) { push(parts, "'") }
|
||||||
else if (esc == CP_DQUOTE) { value = value + "\"" }
|
else if (esc == CP_DQUOTE) { push(parts, "\"") }
|
||||||
else if (esc == CP_0) { value = value + character(0) }
|
else if (esc == CP_0) { push(parts, character(0)) }
|
||||||
else if (esc == CP_BACKTICK) { value = value + "`" }
|
else if (esc == CP_BACKTICK) { push(parts, "`") }
|
||||||
else if (esc == CP_u) { value = value + read_unicode_escape() }
|
else if (esc == CP_u) { push(parts, read_unicode_escape()) }
|
||||||
else { value = value + character(esc) }
|
else { push(parts, character(esc)) }
|
||||||
|
run_start = pos
|
||||||
} else {
|
} else {
|
||||||
value = value + character(adv())
|
adv()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||||
if (pos < len) adv() // skip closing quote
|
if (pos < len) adv() // skip closing quote
|
||||||
push(tokens, {
|
push(tokens, {
|
||||||
kind: "text", at: start,
|
kind: "text", at: start,
|
||||||
from_row: start_row, from_column: start_col,
|
from_row: start_row, from_column: start_col,
|
||||||
to_row: row, to_column: col,
|
to_row: row, to_column: col,
|
||||||
value: value
|
value: text(parts)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -195,49 +189,54 @@ var tokenize = function(src, filename) {
|
|||||||
var start = pos
|
var start = pos
|
||||||
var start_row = row
|
var start_row = row
|
||||||
var start_col = col
|
var start_col = col
|
||||||
var value = ""
|
var parts = []
|
||||||
|
var run_start = 0
|
||||||
var depth = 0
|
var depth = 0
|
||||||
var tc = 0
|
var tc = 0
|
||||||
var q = 0
|
var q = 0
|
||||||
|
var interp_start = 0
|
||||||
adv() // skip opening backtick
|
adv() // skip opening backtick
|
||||||
|
run_start = pos
|
||||||
while (pos < len && pk() != CP_BACKTICK) {
|
while (pos < len && pk() != CP_BACKTICK) {
|
||||||
if (pk() == CP_BSLASH && pos + 1 < len) {
|
if (pk() == CP_BSLASH && pos + 1 < len) {
|
||||||
value = value + character(adv())
|
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||||
value = value + character(adv())
|
push(parts, text(src, pos, pos + 2))
|
||||||
|
adv(); adv()
|
||||||
|
run_start = pos
|
||||||
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
|
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
|
||||||
value = value + character(adv()) // $
|
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||||
value = value + character(adv()) // {
|
interp_start = pos
|
||||||
|
adv(); adv() // $ {
|
||||||
depth = 1
|
depth = 1
|
||||||
while (pos < len && depth > 0) {
|
while (pos < len && depth > 0) {
|
||||||
tc = pk()
|
tc = pk()
|
||||||
if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) }
|
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
|
||||||
else if (tc == CP_RBRACE) {
|
else if (tc == CP_RBRACE) {
|
||||||
depth = depth - 1
|
depth = depth - 1
|
||||||
if (depth > 0) { value = value + character(adv()) }
|
adv()
|
||||||
else { value = value + character(adv()) }
|
|
||||||
}
|
}
|
||||||
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
|
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
|
||||||
q = adv()
|
q = adv()
|
||||||
value = value + character(q)
|
|
||||||
while (pos < len && pk() != q) {
|
while (pos < len && pk() != q) {
|
||||||
if (pk() == CP_BSLASH && pos + 1 < len) {
|
if (pk() == CP_BSLASH && pos + 1 < len) adv()
|
||||||
value = value + character(adv())
|
adv()
|
||||||
}
|
|
||||||
value = value + character(adv())
|
|
||||||
}
|
}
|
||||||
if (pos < len) { value = value + character(adv()) }
|
if (pos < len) adv()
|
||||||
} else { value = value + character(adv()) }
|
} else { adv() }
|
||||||
}
|
}
|
||||||
|
push(parts, text(src, interp_start, pos))
|
||||||
|
run_start = pos
|
||||||
} else {
|
} else {
|
||||||
value = value + character(adv())
|
adv()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (pos > run_start) push(parts, text(src, run_start, pos))
|
||||||
if (pos < len) adv() // skip closing backtick
|
if (pos < len) adv() // skip closing backtick
|
||||||
push(tokens, {
|
push(tokens, {
|
||||||
kind: "text", at: start,
|
kind: "text", at: start,
|
||||||
from_row: start_row, from_column: start_col,
|
from_row: start_row, from_column: start_col,
|
||||||
to_row: row, to_column: col,
|
to_row: row, to_column: col,
|
||||||
value: value
|
value: text(parts)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -344,14 +343,13 @@ var tokenize = function(src, filename) {
|
|||||||
var start = pos
|
var start = pos
|
||||||
var start_row = row
|
var start_row = row
|
||||||
var start_col = col
|
var start_col = col
|
||||||
var val = ""
|
|
||||||
var i = 0
|
var i = 0
|
||||||
while (i < count) { val = val + character(adv()); i = i + 1 }
|
while (i < count) { adv(); i = i + 1 }
|
||||||
push(tokens, {
|
push(tokens, {
|
||||||
kind: "name", at: start,
|
kind: "name", at: start,
|
||||||
from_row: start_row, from_column: start_col,
|
from_row: start_row, from_column: start_col,
|
||||||
to_row: row, to_column: col,
|
to_row: row, to_column: col,
|
||||||
value: val
|
value: text(src, start, pos)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
BIN
tokenize.mach
BIN
tokenize.mach
Binary file not shown.
Reference in New Issue
Block a user