optimize parse

This commit is contained in:
2026-02-10 05:53:49 -06:00
parent b8b110b616
commit 78e64c5067
2 changed files with 61 additions and 86 deletions

147
parse.cm
View File

@@ -1,18 +1,5 @@
def CP_SLASH = 47
def CP_BSLASH = 92
var is_alpha = function(c) {
return (c >= 65 && c <= 90) || (c >= 97 && c <= 122)
}
var parse = function(tokens, src, filename, tokenizer) {
var _src_len = length(src)
var cp = []
var _i = 0
while (_i < _src_len) {
push(cp, codepoint(src[_i]))
_i = _i + 1
}
// ============================================================
// Parser Cursor
@@ -103,14 +90,18 @@ var parse = function(tokens, src, filename, tokenizer) {
})
}
var _keywords = {
"if": true, in: true, "do": true, go: true,
"var": true, def: true, "for": true,
"else": true, "this": true, "null": true, "true": true,
"false": true, "while": true, "break": true,
"return": true, "delete": true,
disrupt: true, "function": true, "continue": true,
disruption: true
}
var is_keyword = function(kind) {
return kind == "if" || kind == "in" || kind == "do" || kind == "go" ||
kind == "var" || kind == "def" || kind == "for" ||
kind == "else" || kind == "this" || kind == "null" || kind == "true" ||
kind == "false" || kind == "while" || kind == "break" ||
kind == "return" || kind == "delete" ||
kind == "disrupt" || kind == "function" || kind == "continue" ||
kind == "disruption"
return _keywords[kind] == true
}
// ============================================================
@@ -165,17 +156,18 @@ var parse = function(tokens, src, filename, tokenizer) {
var params = null
var param = null
var rpos = 0
var pattern_str = ""
var flags = ""
var pattern_parts = null
var flags_parts = null
var tv = null
var has_interp = false
var ti = 0
var tpl_list = null
var fmt = null
var fmt_parts = null
var idx = 0
var tvi = 0
var tvlen = 0
var depth = 0
var expr_parts = null
var expr_str = null
var tc = null
var tq = null
@@ -218,52 +210,53 @@ var parse = function(tokens, src, filename, tokenizer) {
node = ast_node("text literal", start)
tpl_list = []
node.list = tpl_list
fmt = ""
fmt_parts = []
idx = 0
tvi = 0
tvlen = length(tv)
while (tvi < tvlen) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
esc_ch = tv[tvi + 1]
if (esc_ch == "n") { fmt = fmt + "\n" }
else if (esc_ch == "t") { fmt = fmt + "\t" }
else if (esc_ch == "r") { fmt = fmt + "\r" }
else if (esc_ch == "\\") { fmt = fmt + "\\" }
else if (esc_ch == "`") { fmt = fmt + "`" }
else if (esc_ch == "$") { fmt = fmt + "$" }
else if (esc_ch == "0") { fmt = fmt + character(0) }
else { fmt = fmt + esc_ch }
if (esc_ch == "n") { push(fmt_parts, "\n") }
else if (esc_ch == "t") { push(fmt_parts, "\t") }
else if (esc_ch == "r") { push(fmt_parts, "\r") }
else if (esc_ch == "\\") { push(fmt_parts, "\\") }
else if (esc_ch == "`") { push(fmt_parts, "`") }
else if (esc_ch == "$") { push(fmt_parts, "$") }
else if (esc_ch == "0") { push(fmt_parts, character(0)) }
else { push(fmt_parts, esc_ch) }
tvi = tvi + 2
} else if (tv[tvi] == "$" && tvi + 1 < tvlen && tv[tvi + 1] == "{") {
tvi = tvi + 2
depth = 1
expr_str = ""
expr_parts = []
while (tvi < tvlen && depth > 0) {
tc = tv[tvi]
if (tc == "{") { depth = depth + 1; expr_str = expr_str + tc; tvi = tvi + 1 }
if (tc == "{") { depth = depth + 1; push(expr_parts, tc); tvi = tvi + 1 }
else if (tc == "}") {
depth = depth - 1
if (depth > 0) { expr_str = expr_str + tc }
if (depth > 0) { push(expr_parts, tc) }
tvi = tvi + 1
}
else if (tc == "'" || tc == "\"" || tc == "`") {
tq = tc
expr_str = expr_str + tc
push(expr_parts, tc)
tvi = tvi + 1
while (tvi < tvlen && tv[tvi] != tq) {
if (tv[tvi] == "\\" && tvi + 1 < tvlen) {
expr_str = expr_str + tv[tvi]
push(expr_parts, tv[tvi])
tvi = tvi + 1
}
expr_str = expr_str + tv[tvi]
push(expr_parts, tv[tvi])
tvi = tvi + 1
}
if (tvi < tvlen) { expr_str = expr_str + tv[tvi]; tvi = tvi + 1 }
if (tvi < tvlen) { push(expr_parts, tv[tvi]); tvi = tvi + 1 }
} else {
expr_str = expr_str + tc
push(expr_parts, tc)
tvi = tvi + 1
}
}
expr_str = text(expr_parts)
expr_tokens = tokenizer(expr_str, "<template>").tokens
sub_ast = parse(expr_tokens, expr_str, "<template>", tokenizer)
if (sub_ast != null && sub_ast.statements != null && length(sub_ast.statements) > 0) {
@@ -276,14 +269,16 @@ var parse = function(tokens, src, filename, tokenizer) {
}
push(tpl_list, sub_expr)
}
fmt = fmt + "{" + text(idx) + "}"
push(fmt_parts, "{")
push(fmt_parts, text(idx))
push(fmt_parts, "}")
idx = idx + 1
} else {
fmt = fmt + tv[tvi]
push(fmt_parts, tv[tvi])
tvi = tvi + 1
}
}
node.value = fmt
node.value = text(fmt_parts)
advance()
ast_node_end(node)
return node
@@ -445,24 +440,25 @@ var parse = function(tokens, src, filename, tokenizer) {
if (k == "/") {
node = ast_node("regexp", start)
rpos = tok.at + 1
pattern_str = ""
flags = ""
while (rpos < _src_len && cp[rpos] != CP_SLASH) {
if (cp[rpos] == CP_BSLASH && rpos + 1 < _src_len) {
pattern_str = pattern_str + character(cp[rpos]) + character(cp[rpos + 1])
pattern_parts = []
flags_parts = []
while (rpos < _src_len && src[rpos] != "/") {
if (src[rpos] == "\\" && rpos + 1 < _src_len) {
push(pattern_parts, src[rpos])
push(pattern_parts, src[rpos + 1])
rpos = rpos + 2
} else {
pattern_str = pattern_str + character(cp[rpos])
push(pattern_parts, src[rpos])
rpos = rpos + 1
}
}
if (rpos < _src_len) rpos = rpos + 1
while (rpos < _src_len && is_alpha(cp[rpos])) {
flags = flags + character(cp[rpos])
while (rpos < _src_len && is_letter(src[rpos])) {
push(flags_parts, src[rpos])
rpos = rpos + 1
}
node.pattern = pattern_str
if (length(flags) > 0) node.flags = flags
node.pattern = text(pattern_parts)
if (length(flags_parts) > 0) node.flags = text(flags_parts)
// Skip all tokens consumed by the regex re-scan
while (true) {
advance()
@@ -791,8 +787,6 @@ var parse = function(tokens, src, filename, tokenizer) {
var param = null
var prev_names = null
var pname = null
var dup = false
var j = 0
var old_dis = 0
if (in_disruption) {
@@ -815,13 +809,7 @@ var parse = function(tokens, src, filename, tokenizer) {
param = ast_node("name", tok)
param.name = tok.value
pname = tok.value
dup = false
j = 0
while (j < length(prev_names)) {
if (prev_names[j] == pname) { dup = true; break }
j = j + 1
}
if (dup) parse_error(tok, "duplicate parameter name '" + pname + "'")
if (find(prev_names, pname) != null) parse_error(tok, "duplicate parameter name '" + pname + "'")
push(prev_names, pname)
advance()
ast_node_end(param)
@@ -887,8 +875,6 @@ var parse = function(tokens, src, filename, tokenizer) {
var expr = null
var prev_names = null
var pname = null
var dup = false
var j = 0
node.arrow = true
if (in_disruption) {
@@ -911,13 +897,7 @@ var parse = function(tokens, src, filename, tokenizer) {
param = ast_node("name", tok)
param.name = tok.value
pname = tok.value
dup = false
j = 0
while (j < length(prev_names)) {
if (prev_names[j] == pname) { dup = true; break }
j = j + 1
}
if (dup) parse_error(tok, "duplicate parameter name '" + pname + "'")
if (find(prev_names, pname) != null) parse_error(tok, "duplicate parameter name '" + pname + "'")
push(prev_names, pname)
advance()
ast_node_end(param)
@@ -1345,12 +1325,7 @@ var parse = function(tokens, src, filename, tokenizer) {
}
var sem_add_intrinsic = function(name) {
var i = 0
while (i < length(intrinsics)) {
if (intrinsics[i] == name) return null
i = i + 1
}
push(intrinsics, name)
if (find(intrinsics, name) == null) push(intrinsics, name)
}
var functino_names = {
@@ -1364,12 +1339,15 @@ var parse = function(tokens, src, filename, tokenizer) {
return functino_names[name] == true
}
var _assign_kinds = {
assign: true, "+=": true, "-=": true, "*=": true, "/=": true, "%=": true,
"<<=": true, ">>=": true, ">>>=": true,
"&=": true, "^=": true, "|=": true, "**=": true,
"&&=": true, "||=": true
}
var sem_propagate_vars = function(parent, child) {
var i = 0
while (i < length(child.vars)) {
push(parent.vars, child.vars[i])
i = i + 1
}
parent.vars = array(parent.vars, child.vars)
}
var sem_build_scope_record = function(scope) {
@@ -1487,10 +1465,7 @@ var parse = function(tokens, src, filename, tokenizer) {
var def_val = null
var sr = null
if (kind == "assign" || kind == "+=" || kind == "-=" || kind == "*=" ||
kind == "/=" || kind == "%=" || kind == "<<=" || kind == ">>=" ||
kind == ">>>=" || kind == "&=" || kind == "^=" || kind == "|=" ||
kind == "**=" || kind == "&&=" || kind == "||=") {
if (_assign_kinds[kind] == true) {
sem_check_assign_target(scope, expr.left)
sem_check_expr(scope, expr.right)
return null

Binary file not shown.