From 78e64c50675c849917ec79a9166d6815d6e843cd Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Tue, 10 Feb 2026 05:53:49 -0600 Subject: [PATCH] optimize parse --- parse.cm | 147 ++++++++++++++++++++++------------------------------- parse.mach | Bin 49190 -> 48815 bytes 2 files changed, 61 insertions(+), 86 deletions(-) diff --git a/parse.cm b/parse.cm index 1f9dd688..77fcbe60 100644 --- a/parse.cm +++ b/parse.cm @@ -1,18 +1,5 @@ -def CP_SLASH = 47 -def CP_BSLASH = 92 - -var is_alpha = function(c) { - return (c >= 65 && c <= 90) || (c >= 97 && c <= 122) -} - var parse = function(tokens, src, filename, tokenizer) { var _src_len = length(src) - var cp = [] - var _i = 0 - while (_i < _src_len) { - push(cp, codepoint(src[_i])) - _i = _i + 1 - } // ============================================================ // Parser Cursor @@ -103,14 +90,18 @@ var parse = function(tokens, src, filename, tokenizer) { }) } + var _keywords = { + "if": true, in: true, "do": true, go: true, + "var": true, def: true, "for": true, + "else": true, "this": true, "null": true, "true": true, + "false": true, "while": true, "break": true, + "return": true, "delete": true, + disrupt: true, "function": true, "continue": true, + disruption: true + } + var is_keyword = function(kind) { - return kind == "if" || kind == "in" || kind == "do" || kind == "go" || - kind == "var" || kind == "def" || kind == "for" || - kind == "else" || kind == "this" || kind == "null" || kind == "true" || - kind == "false" || kind == "while" || kind == "break" || - kind == "return" || kind == "delete" || - kind == "disrupt" || kind == "function" || kind == "continue" || - kind == "disruption" + return _keywords[kind] == true } // ============================================================ @@ -165,17 +156,18 @@ var parse = function(tokens, src, filename, tokenizer) { var params = null var param = null var rpos = 0 - var pattern_str = "" - var flags = "" + var pattern_parts = null + var flags_parts = null var tv = null var has_interp = false var ti = 0 var tpl_list = null - var fmt = null + var fmt_parts = null var idx = 0 var tvi = 0 var tvlen = 0 var depth = 0 + var expr_parts = null var expr_str = null var tc = null var tq = null @@ -218,52 +210,53 @@ var parse = function(tokens, src, filename, tokenizer) { node = ast_node("text literal", start) tpl_list = [] node.list = tpl_list - fmt = "" + fmt_parts = [] idx = 0 tvi = 0 tvlen = length(tv) while (tvi < tvlen) { if (tv[tvi] == "\\" && tvi + 1 < tvlen) { esc_ch = tv[tvi + 1] - if (esc_ch == "n") { fmt = fmt + "\n" } - else if (esc_ch == "t") { fmt = fmt + "\t" } - else if (esc_ch == "r") { fmt = fmt + "\r" } - else if (esc_ch == "\\") { fmt = fmt + "\\" } - else if (esc_ch == "`") { fmt = fmt + "`" } - else if (esc_ch == "$") { fmt = fmt + "$" } - else if (esc_ch == "0") { fmt = fmt + character(0) } - else { fmt = fmt + esc_ch } + if (esc_ch == "n") { push(fmt_parts, "\n") } + else if (esc_ch == "t") { push(fmt_parts, "\t") } + else if (esc_ch == "r") { push(fmt_parts, "\r") } + else if (esc_ch == "\\") { push(fmt_parts, "\\") } + else if (esc_ch == "`") { push(fmt_parts, "`") } + else if (esc_ch == "$") { push(fmt_parts, "$") } + else if (esc_ch == "0") { push(fmt_parts, character(0)) } + else { push(fmt_parts, esc_ch) } tvi = tvi + 2 } else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") { tvi = tvi + 2 depth = 1 - expr_str = "" + expr_parts = [] while (tvi < tvlen && depth > 0) { tc = tv[tvi] - if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 } + if (tc == "{") { depth = depth + 1; push(expr_parts, tc); tvi = tvi + 1 } else if (tc == "}") { depth = depth - 1 - if (depth > 0) { expr_str = expr_str + tc } + if (depth > 0) { push(expr_parts, tc) } tvi = tvi + 1 } else if (tc == "'" || tc == "\"" || tc == "`") { tq = tc - expr_str = expr_str + tc + push(expr_parts, tc) tvi = tvi + 1 while (tvi < tvlen && tv[tvi] != tq) { if (tv[tvi] == "\\" && tvi + 1 < tvlen) { - expr_str = expr_str + tv[tvi] + push(expr_parts, tv[tvi]) tvi = tvi + 1 } - expr_str = expr_str + tv[tvi] + push(expr_parts, tv[tvi]) tvi = tvi + 1 } - if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 } + if (tvi < tvlen) { push(expr_parts, tv[tvi]); tvi = tvi + 1 } } else { - expr_str = expr_str + tc + push(expr_parts, tc) tvi = tvi + 1 } } + expr_str = text(expr_parts) expr_tokens = tokenizer(expr_str, "