diff --git a/tokenize.cm b/tokenize.cm index c49d9019..cfae5df2 100644 --- a/tokenize.cm +++ b/tokenize.cm @@ -1,11 +1,6 @@ var tokenize = function(src, filename) { var len = length(src) - var cp = [] - var _i = 0 - while (_i < len) { - push(cp, codepoint(src[_i])) - _i = _i + 1 - } + var cp = array(array(src), codepoint) var pos = 0 var row = 0 @@ -148,46 +143,45 @@ var tokenize = function(src, filename) { } var substr = function(start, end) { - var s = "" - var i = start - while (i < end) { - s = s + character(cp[i]) - i = i + 1 - } - return s + return text(src, start, end) } var read_string = function(quote_cp) { var start = pos var start_row = row var start_col = col - var value = "" + var parts = [] + var run_start = 0 var esc = 0 adv() // skip opening quote + run_start = pos while (pos < len && pk() != quote_cp) { if (pk() == CP_BSLASH) { + if (pos > run_start) push(parts, text(src, run_start, pos)) adv() esc = adv() - if (esc == CP_n) { value = value + "\n" } - else if (esc == CP_t) { value = value + "\t" } - else if (esc == CP_r) { value = value + "\r" } - else if (esc == CP_BSLASH) { value = value + "\\" } - else if (esc == CP_SQUOTE) { value = value + "'" } - else if (esc == CP_DQUOTE) { value = value + "\"" } - else if (esc == CP_0) { value = value + character(0) } - else if (esc == CP_BACKTICK) { value = value + "`" } - else if (esc == CP_u) { value = value + read_unicode_escape() } - else { value = value + character(esc) } + if (esc == CP_n) { push(parts, "\n") } + else if (esc == CP_t) { push(parts, "\t") } + else if (esc == CP_r) { push(parts, "\r") } + else if (esc == CP_BSLASH) { push(parts, "\\") } + else if (esc == CP_SQUOTE) { push(parts, "'") } + else if (esc == CP_DQUOTE) { push(parts, "\"") } + else if (esc == CP_0) { push(parts, character(0)) } + else if (esc == CP_BACKTICK) { push(parts, "`") } + else if (esc == CP_u) { push(parts, read_unicode_escape()) } + else { push(parts, character(esc)) } + run_start = pos } else { - value = value + character(adv()) + adv() } } + if (pos > run_start) push(parts, text(src, run_start, pos)) if (pos < len) adv() // skip closing quote push(tokens, { kind: "text", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, - value: value + value: text(parts) }) } @@ -195,49 +189,54 @@ var tokenize = function(src, filename) { var start = pos var start_row = row var start_col = col - var value = "" + var parts = [] + var run_start = 0 var depth = 0 var tc = 0 var q = 0 + var interp_start = 0 adv() // skip opening backtick + run_start = pos while (pos < len && pk() != CP_BACKTICK) { if (pk() == CP_BSLASH && pos + 1 < len) { - value = value + character(adv()) - value = value + character(adv()) + if (pos > run_start) push(parts, text(src, run_start, pos)) + push(parts, text(src, pos, pos + 2)) + adv(); adv() + run_start = pos } else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) { - value = value + character(adv()) // $ - value = value + character(adv()) // { + if (pos > run_start) push(parts, text(src, run_start, pos)) + interp_start = pos + adv(); adv() // $ { depth = 1 while (pos < len && depth > 0) { tc = pk() - if (tc == CP_LBRACE) { depth = depth + 1; value = value + character(adv()) } + if (tc == CP_LBRACE) { depth = depth + 1; adv() } else if (tc == CP_RBRACE) { depth = depth - 1 - if (depth > 0) { value = value + character(adv()) } - else { value = value + character(adv()) } + adv() } else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) { q = adv() - value = value + character(q) while (pos < len && pk() != q) { - if (pk() == CP_BSLASH && pos + 1 < len) { - value = value + character(adv()) - } - value = value + character(adv()) + if (pk() == CP_BSLASH && pos + 1 < len) adv() + adv() } - if (pos < len) { value = value + character(adv()) } - } else { value = value + character(adv()) } + if (pos < len) adv() + } else { adv() } } + push(parts, text(src, interp_start, pos)) + run_start = pos } else { - value = value + character(adv()) + adv() } } + if (pos > run_start) push(parts, text(src, run_start, pos)) if (pos < len) adv() // skip closing backtick push(tokens, { kind: "text", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, - value: value + value: text(parts) }) } @@ -344,14 +343,13 @@ var tokenize = function(src, filename) { var start = pos var start_row = row var start_col = col - var val = "" var i = 0 - while (i < count) { val = val + character(adv()); i = i + 1 } + while (i < count) { adv(); i = i + 1 } push(tokens, { kind: "name", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, - value: val + value: text(src, start, pos) }) } diff --git a/tokenize.mach b/tokenize.mach index 7e12ea62..a8d0e522 100644 Binary files a/tokenize.mach and b/tokenize.mach differ