respsect array and object length requests

This commit is contained in:
2026-02-14 15:42:19 -06:00
parent 356c51bde3
commit e75596ce30
18 changed files with 54250 additions and 62692 deletions

View File

@@ -1,72 +1,11 @@
var tokenize = function(src, filename) {
var len = length(src)
var cp = array(array(src), codepoint)
var pos = 0
var row = 0
var col = 0
var tokens = []
// Codepoint constants
def CP_LF = 10
def CP_CR = 13
def CP_TAB = 9
def CP_SPACE = 32
def CP_BANG = 33
def CP_DQUOTE = 34
def CP_HASH = 35
def CP_DOLLAR = 36
def CP_PERCENT = 37
def CP_AMP = 38
def CP_SQUOTE = 39
def CP_LPAREN = 40
def CP_RPAREN = 41
def CP_STAR = 42
def CP_PLUS = 43
def CP_COMMA = 44
def CP_MINUS = 45
def CP_DOT = 46
def CP_SLASH = 47
def CP_0 = 48
def CP_1 = 49
def CP_7 = 55
def CP_9 = 57
def CP_COLON = 58
def CP_SEMI = 59
def CP_LT = 60
def CP_EQ = 61
def CP_GT = 62
def CP_QMARK = 63
def CP_AT = 64
def CP_A = 65
def CP_B = 66
def CP_E = 69
def CP_F = 70
def CP_O = 79
def CP_X = 88
def CP_Z = 90
def CP_LBRACKET = 91
def CP_BSLASH = 92
def CP_RBRACKET = 93
def CP_CARET = 94
def CP_UNDERSCORE = 95
def CP_BACKTICK = 96
def CP_a = 97
def CP_b = 98
def CP_e = 101
def CP_f = 102
def CP_n = 110
def CP_o = 111
def CP_r = 114
def CP_t = 116
def CP_u = 117
def CP_x = 120
def CP_z = 122
def CP_LBRACE = 123
def CP_PIPE = 124
def CP_RBRACE = 125
def CP_TILDE = 126
// Keywords lookup
var keywords = {
if: "if", in: "in", do: "do", go: "go",
@@ -79,20 +18,20 @@ var tokenize = function(src, filename) {
}
var pk = function() {
if (pos >= len) return -1
return cp[pos]
if (pos >= len) return null
return src[pos]
}
var pk_at = function(n) {
var idx = pos + n
if (idx >= len) return -1
return cp[idx]
if (idx >= len) return null
return src[idx]
}
var adv = function() {
var c = cp[pos]
var c = src[pos]
pos = pos + 1
if (c == CP_LF) {
if (c == "\n") {
row = row + 1
col = 0
} else {
@@ -102,17 +41,17 @@ var tokenize = function(src, filename) {
}
var is_digit = function(c) {
return c >= CP_0 && c <= CP_9
return c >= "0" && c <= "9"
}
var is_hex = function(c) {
return (c >= CP_0 && c <= CP_9) || (c >= CP_a && c <= CP_f) || (c >= CP_A && c <= CP_F)
return (c >= "0" && c <= "9") || (c >= "a" && c <= "f") || (c >= "A" && c <= "F")
}
var hex_val = function(c) {
if (c >= CP_0 && c <= CP_9) return c - CP_0
if (c >= CP_a && c <= CP_f) return c - CP_a + 10
if (c >= CP_A && c <= CP_F) return c - CP_A + 10
if (c >= "0" && c <= "9") return codepoint(c) - codepoint("0")
if (c >= "a" && c <= "f") return codepoint(c) - codepoint("a") + 10
if (c >= "A" && c <= "F") return codepoint(c) - codepoint("A") + 10
return 0
}
@@ -127,7 +66,7 @@ var tokenize = function(src, filename) {
}
var is_alpha = function(c) {
return (c >= CP_a && c <= CP_z) || (c >= CP_A && c <= CP_Z)
return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z")
}
var is_alnum = function(c) {
@@ -135,41 +74,41 @@ var tokenize = function(src, filename) {
}
var is_ident_start = function(c) {
return is_alpha(c) || c == CP_UNDERSCORE || c == CP_DOLLAR
return is_alpha(c) || c == "_" || c == "$"
}
var is_ident_char = function(c) {
return is_alnum(c) || c == CP_UNDERSCORE || c == CP_DOLLAR || c == CP_QMARK || c == CP_BANG
return is_alnum(c) || c == "_" || c == "$" || c == "?" || c == "!"
}
var substr = function(start, end) {
return text(src, start, end)
}
var read_string = function(quote_cp) {
var read_string = function(quote) {
var start = pos
var start_row = row
var start_col = col
var parts = []
var run_start = 0
var esc = 0
var esc = null
adv() // skip opening quote
run_start = pos
while (pos < len && pk() != quote_cp) {
if (pk() == CP_BSLASH) {
while (pos < len && pk() != quote) {
if (pk() == "\\") {
if (pos > run_start) push(parts, text(src, run_start, pos))
adv()
esc = adv()
if (esc == CP_n) { push(parts, "\n") }
else if (esc == CP_t) { push(parts, "\t") }
else if (esc == CP_r) { push(parts, "\r") }
else if (esc == CP_BSLASH) { push(parts, "\\") }
else if (esc == CP_SQUOTE) { push(parts, "'") }
else if (esc == CP_DQUOTE) { push(parts, "\"") }
else if (esc == CP_0) { push(parts, character(0)) }
else if (esc == CP_BACKTICK) { push(parts, "`") }
else if (esc == CP_u) { push(parts, read_unicode_escape()) }
else { push(parts, character(esc)) }
if (esc == "n") { push(parts, "\n") }
else if (esc == "t") { push(parts, "\t") }
else if (esc == "r") { push(parts, "\r") }
else if (esc == "\\") { push(parts, "\\") }
else if (esc == "'") { push(parts, "'") }
else if (esc == "\"") { push(parts, "\"") }
else if (esc == "0") { push(parts, character(0)) }
else if (esc == "`") { push(parts, "`") }
else if (esc == "u") { push(parts, read_unicode_escape()) }
else { push(parts, esc) }
run_start = pos
} else {
adv()
@@ -192,33 +131,33 @@ var tokenize = function(src, filename) {
var parts = []
var run_start = 0
var depth = 0
var tc = 0
var q = 0
var tc = null
var q = null
var interp_start = 0
adv() // skip opening backtick
run_start = pos
while (pos < len && pk() != CP_BACKTICK) {
if (pk() == CP_BSLASH && pos + 1 < len) {
while (pos < len && pk() != "`") {
if (pk() == "\\" && pos + 1 < len) {
if (pos > run_start) push(parts, text(src, run_start, pos))
push(parts, text(src, pos, pos + 2))
adv(); adv()
run_start = pos
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
} else if (pk() == "$" && pos + 1 < len && pk_at(1) == "{") {
if (pos > run_start) push(parts, text(src, run_start, pos))
interp_start = pos
adv(); adv() // $ {
depth = 1
while (pos < len && depth > 0) {
tc = pk()
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
else if (tc == CP_RBRACE) {
if (tc == "{") { depth = depth + 1; adv() }
else if (tc == "}") {
depth = depth - 1
adv()
}
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
else if (tc == "'" || tc == "\"" || tc == "`") {
q = adv()
while (pos < len && pk() != q) {
if (pk() == CP_BSLASH && pos + 1 < len) adv()
if (pk() == "\\" && pos + 1 < len) adv()
adv()
}
if (pos < len) adv()
@@ -245,24 +184,24 @@ var tokenize = function(src, filename) {
var start_row = row
var start_col = col
var raw = ""
if (pk() == CP_0 && (pk_at(1) == CP_x || pk_at(1) == CP_X)) {
if (pk() == "0" && (pk_at(1) == "x" || pk_at(1) == "X")) {
adv(); adv()
while (pos < len && (is_hex(pk()) || pk() == CP_UNDERSCORE)) adv()
} else if (pk() == CP_0 && (pk_at(1) == CP_b || pk_at(1) == CP_B)) {
while (pos < len && (is_hex(pk()) || pk() == "_")) adv()
} else if (pk() == "0" && (pk_at(1) == "b" || pk_at(1) == "B")) {
adv(); adv()
while (pos < len && (pk() == CP_0 || pk() == CP_1 || pk() == CP_UNDERSCORE)) adv()
} else if (pk() == CP_0 && (pk_at(1) == CP_o || pk_at(1) == CP_O)) {
while (pos < len && (pk() == "0" || pk() == "1" || pk() == "_")) adv()
} else if (pk() == "0" && (pk_at(1) == "o" || pk_at(1) == "O")) {
adv(); adv()
while (pos < len && pk() >= CP_0 && pk() <= CP_7) adv()
while (pos < len && pk() >= "0" && pk() <= "7") adv()
} else {
while (pos < len && (is_digit(pk()) || pk() == CP_UNDERSCORE)) adv()
if (pos < len && pk() == CP_DOT) {
while (pos < len && (is_digit(pk()) || pk() == "_")) adv()
if (pos < len && pk() == ".") {
adv()
while (pos < len && (is_digit(pk()) || pk() == CP_UNDERSCORE)) adv()
while (pos < len && (is_digit(pk()) || pk() == "_")) adv()
}
if (pos < len && (pk() == CP_e || pk() == CP_E)) {
if (pos < len && (pk() == "e" || pk() == "E")) {
adv()
if (pos < len && (pk() == CP_PLUS || pk() == CP_MINUS)) adv()
if (pos < len && (pk() == "+" || pk() == "-")) adv()
while (pos < len && is_digit(pk())) adv()
}
}
@@ -305,12 +244,12 @@ var tokenize = function(src, filename) {
var start_row = row
var start_col = col
var raw = ""
if (pk_at(1) == CP_SLASH) {
while (pos < len && pk() != CP_LF && pk() != CP_CR) adv()
if (pk_at(1) == "/") {
while (pos < len && pk() != "\n" && pk() != "\r") adv()
} else {
adv(); adv() // skip /*
while (pos < len) {
if (pk() == CP_STAR && pk_at(1) == CP_SLASH) {
if (pk() == "*" && pk_at(1) == "/") {
adv(); adv()
break
}
@@ -359,144 +298,144 @@ var tokenize = function(src, filename) {
var start_row = 0
var start_col = 0
var raw = ""
if (c == -1) return false
if (c == null) return false
if (c == CP_LF) {
if (c == "\n") {
start = pos; start_row = row; start_col = col
adv()
push(tokens, { kind: "newline", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, value: "\n" })
return true
}
if (c == CP_CR) {
if (c == "\r") {
start = pos; start_row = row; start_col = col
adv()
if (pos < len && pk() == CP_LF) adv()
if (pos < len && pk() == "\n") adv()
push(tokens, { kind: "newline", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, value: "\n" })
return true
}
if (c == CP_SPACE || c == CP_TAB) {
if (c == " " || c == "\t") {
start = pos; start_row = row; start_col = col
while (pos < len && (pk() == CP_SPACE || pk() == CP_TAB)) adv()
while (pos < len && (pk() == " " || pk() == "\t")) adv()
raw = substr(start, pos)
push(tokens, { kind: "space", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, value: raw })
return true
}
if (c == CP_SQUOTE || c == CP_DQUOTE) { read_string(c); return true }
if (c == CP_BACKTICK) { read_template(); return true }
if (c == "'" || c == "\"") { read_string(c); return true }
if (c == "`") { read_template(); return true }
if (is_digit(c)) { read_number(); return true }
if (c == CP_DOT && is_digit(pk_at(1))) { read_number(); return true }
if (c == "." && is_digit(pk_at(1))) { read_number(); return true }
if (is_ident_start(c)) { read_name(); return true }
if (c == CP_SLASH) {
if (pk_at(1) == CP_SLASH || pk_at(1) == CP_STAR) { read_comment(); return true }
if (pk_at(1) == CP_EQ) { emit_op("/=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "/") {
if (pk_at(1) == "/" || pk_at(1) == "*") { read_comment(); return true }
if (pk_at(1) == "=") { emit_op("/=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("/", 1); return true
}
if (c == CP_STAR) {
if (pk_at(1) == CP_STAR) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("**=", 3); return true }
if (c == "*") {
if (pk_at(1) == "*") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("**=", 3); return true }
emit_op("**", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("*=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "=") { emit_op("*=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("*", 1); return true
}
if (c == CP_PERCENT) {
if (pk_at(1) == CP_EQ) { emit_op("%=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "%") {
if (pk_at(1) == "=") { emit_op("%=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("%", 1); return true
}
if (c == CP_PLUS) {
if (pk_at(1) == CP_EQ) { emit_op("+=", 2); return true }
if (pk_at(1) == CP_PLUS) { emit_op("++", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "+") {
if (pk_at(1) == "=") { emit_op("+=", 2); return true }
if (pk_at(1) == "+") { emit_op("++", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("+", 1); return true
}
if (c == CP_MINUS) {
if (pk_at(1) == CP_EQ) { emit_op("-=", 2); return true }
if (pk_at(1) == CP_MINUS) { emit_op("--", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "-") {
if (pk_at(1) == "=") { emit_op("-=", 2); return true }
if (pk_at(1) == "-") { emit_op("--", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("-", 1); return true
}
if (c == CP_LT) {
if (pk_at(1) == CP_EQ && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(1) == CP_EQ) { emit_op("<=", 2); return true }
if (pk_at(1) == CP_LT) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("<<=", 3); return true }
if (c == "<") {
if (pk_at(1) == "=" && pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(1) == "=") { emit_op("<=", 2); return true }
if (pk_at(1) == "<") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("<<=", 3); return true }
emit_op("<<", 2); return true
}
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("<", 1); return true
}
if (c == CP_GT) {
if (pk_at(1) == CP_EQ && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(1) == CP_EQ) { emit_op(">=", 2); return true }
if (pk_at(1) == CP_GT) {
if (pk_at(2) == CP_GT) {
if (pk_at(3) == CP_BANG) { emit_ident(4); return true }
if (pk_at(3) == CP_EQ) { emit_op(">>>=", 4); return true }
if (c == ">") {
if (pk_at(1) == "=" && pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(1) == "=") { emit_op(">=", 2); return true }
if (pk_at(1) == ">") {
if (pk_at(2) == ">") {
if (pk_at(3) == "!") { emit_ident(4); return true }
if (pk_at(3) == "=") { emit_op(">>>=", 4); return true }
emit_op(">>>", 3); return true
}
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op(">>=", 3); return true }
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op(">>=", 3); return true }
emit_op(">>", 2); return true
}
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op(">", 1); return true
}
if (c == CP_EQ) {
if (pk_at(1) == CP_EQ) {
if (pk_at(2) == CP_EQ) { emit_op("===", 3); return true }
if (c == "=") {
if (pk_at(1) == "=") {
if (pk_at(2) == "=") { emit_op("===", 3); return true }
emit_op("==", 2); return true
}
if (pk_at(1) == CP_GT) { emit_op("=>", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == ">") { emit_op("=>", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("=", 1); return true
}
if (c == CP_BANG) {
if (pk_at(1) == CP_EQ) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("!==", 3); return true }
if (c == "!") {
if (pk_at(1) == "=") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("!==", 3); return true }
emit_op("!=", 2); return true
}
emit_op("!", 1); return true
}
if (c == CP_AMP) {
if (pk_at(1) == CP_AMP) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("&&=", 3); return true }
if (c == "&") {
if (pk_at(1) == "&") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("&&=", 3); return true }
emit_op("&&", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("&=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "=") { emit_op("&=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("&", 1); return true
}
if (c == CP_PIPE) {
if (pk_at(1) == CP_PIPE) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("||=", 3); return true }
if (c == "|") {
if (pk_at(1) == "|") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("||=", 3); return true }
emit_op("||", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("|=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "=") { emit_op("|=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("|", 1); return true
}
if (c == CP_CARET) {
if (pk_at(1) == CP_EQ) { emit_op("^=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "^") {
if (pk_at(1) == "=") { emit_op("^=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("^", 1); return true
}
if (c == CP_LBRACKET) {
if (pk_at(1) == CP_RBRACKET && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (c == "[") {
if (pk_at(1) == "]" && pk_at(2) == "!") { emit_ident(3); return true }
emit_op("[", 1); return true
}
if (c == CP_TILDE) {
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "~") {
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("~", 1); return true
}
emit_op(character(c), 1)
emit_op(c, 1)
return true
}
@@ -508,7 +447,7 @@ var tokenize = function(src, filename) {
// EOF token
push(tokens, { kind: "eof", at: pos, from_row: row, from_column: col, to_row: row, to_column: col })
return {filename: filename, tokens: tokens, cp: cp}
return {filename: filename, tokens: tokens}
}
return tokenize