respsect array and object length requests

This commit is contained in:
2026-02-14 15:42:19 -06:00
parent 356c51bde3
commit e75596ce30
18 changed files with 54250 additions and 62692 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -13,6 +13,34 @@ Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine
Mcode is produced by `mcode.cm`, optimized by `streamline.cm`, then either serialized to 32-bit bytecode for the Mach VM (`mach.c`), or lowered to QBE/LLVM IL for native compilation (`qbe_emit.cm`). See [Compilation Pipeline](pipeline.md) for the full overview.
## Module Structure
An `.mcode` file is a JSON object representing a compiled module:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Module name (typically the source filename) |
| `filename` | string | Source filename |
| `data` | object | Constant pool — string and number literals used by instructions |
| `main` | function | The top-level function (module body) |
| `functions` | array | Nested function definitions (referenced by `function dest, id`) |
### Function Record
Each function (both `main` and entries in `functions`) has:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Function name (`"<anonymous>"` for lambdas) |
| `filename` | string | Source filename |
| `nr_args` | integer | Number of parameters |
| `nr_slots` | integer | Total register slots needed (args + locals + temporaries) |
| `nr_close_slots` | integer | Number of closure slots captured from parent scope |
| `disruption_pc` | integer | Instruction index of the disruption handler (0 if none) |
| `instructions` | array | Instruction arrays and label strings |
Slot 0 is reserved. Slots 1 through `nr_args` hold parameters. Remaining slots up to `nr_slots - 1` are locals and temporaries.
## Instruction Format
Each instruction is a JSON array. The first element is the instruction name (string), followed by operands. The last two elements are line and column numbers for source mapping:

View File

@@ -1930,7 +1930,7 @@ var mcode = function(ast) {
_i = _i + 1
}
dest = alloc_slot()
add_instr(["array", dest, 0])
add_instr(["array", dest, count])
_i = 0
while (_i < count) {
emit_2("push", dest, elem_slots[_i])
@@ -1943,7 +1943,7 @@ var mcode = function(ast) {
if (kind == "record") {
list = expr.list
dest = alloc_slot()
push(s_instructions, ["record", dest, 0])
push(s_instructions, ["record", dest, length(list)])
_i = 0
while (_i < length(list)) {
pair = list[_i]

View File

@@ -1196,7 +1196,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
}
case MACH_NEWARRAY: {
JSValue arr = JS_NewArray(ctx);
JSValue arr = JS_NewArrayCap(ctx, b);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(arr)) { goto disrupt; }
frame->slots[a] = arr;
@@ -1550,7 +1550,7 @@ JSValue JS_CallRegisterVM(JSContext *ctx, JSCodeRegister *code,
/* New record */
case MACH_NEWRECORD: {
JSValue obj = JS_NewObject(ctx);
JSValue obj = b > 0 ? JS_NewObjectCap(ctx, b) : JS_NewObject(ctx);
frame = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_ref.val);
if (JS_IsException(obj)) goto disrupt;
frame->slots[a] = obj;
@@ -2008,11 +2008,9 @@ static int mcode_reg_items(cJSON *it, cJSON **out) {
/* record: [1]=dest, [2]=0(const) — no line/col suffix */
if (!strcmp(op, "record")) { ADD(1); return c; }
/* array: [1]=dest, [2]=count(const), [3..]=elements (no line/col suffix) */
/* array: [1]=dest, [2]=count(const)elements added via separate push instrs */
if (!strcmp(op, "array")) {
ADD(1);
int cnt = (int)cJSON_GetArrayItem(it, 2)->valuedouble;
for (int j = 0; j < cnt; j++) ADD(3 + j);
return c;
}
@@ -2475,15 +2473,10 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
}
/* Array/Object creation */
else if (strcmp(op, "array") == 0) {
int dest = A1, count = A2;
EM(MACH_ABC(MACH_NEWARRAY, dest, 0, 0));
for (int j = 0; j < count; j++) {
int elem = ml_int(it, 3 + j);
EM(MACH_ABC(MACH_PUSH, dest, elem, 0));
}
EM(MACH_ABC(MACH_NEWARRAY, A1, A2, 0));
}
else if (strcmp(op, "record") == 0) {
EM(MACH_ABC(MACH_NEWRECORD, A1, 0, 0));
EM(MACH_ABC(MACH_NEWRECORD, A1, A2, 0));
}
/* Push/Pop */
else if (strcmp(op, "push") == 0) {

View File

@@ -514,9 +514,11 @@ JSValue JS_NewObjectProtoClass (JSContext *ctx, JSValue proto, JSClassID class_i
JSValue JS_NewObjectClass (JSContext *ctx, int class_id);
JSValue JS_NewObjectProto (JSContext *ctx, JSValue proto);
JSValue JS_NewObject (JSContext *ctx);
JSValue JS_NewObjectCap (JSContext *ctx, uint32_t n);
JSValue JS_NewArray (JSContext *ctx);
JSValue JS_NewArrayLen (JSContext *ctx, uint32_t len);
JSValue JS_NewArrayCap (JSContext *ctx, uint32_t cap);
JSValue JS_NewArrayFrom (JSContext *ctx, int count, JSValue *values);
int JS_ArrayPush (JSContext *ctx, JSValue *arr_ptr, JSValue val);
JSValue JS_ArrayPop (JSContext *ctx, JSValue obj);

View File

@@ -2136,8 +2136,24 @@ static JSValue js_sub_string_val (JSContext *ctx, JSValue src, int start, int en
return js_new_string8_len (ctx, buf, len);
}
/* Heap string — fast path for short ASCII substrings (avoids heap alloc) */
JSText *p = JS_VALUE_GET_STRING (src);
if (len <= MIST_ASCII_MAX_LEN) {
char buf[MIST_ASCII_MAX_LEN];
int all_ascii = 1;
for (int i = 0; i < len; i++) {
uint32_t c = string_get (p, start + i);
if (c >= 0x80) { all_ascii = 0; break; }
buf[i] = (char)c;
}
if (all_ascii) {
JSValue imm = MIST_TryNewImmediateASCII (buf, len);
if (!JS_IsNull (imm)) return imm;
}
}
/* Heap string — delegate to existing js_sub_string */
return js_sub_string (ctx, JS_VALUE_GET_STRING (src), start, end);
return js_sub_string (ctx, p, start, end);
}
/* Allocate a new pretext (mutable JSText) with initial capacity */
@@ -2646,11 +2662,51 @@ JSValue JS_NewArrayLen (JSContext *ctx, uint32_t len) {
JSValue JS_NewArray (JSContext *ctx) { return JS_NewArrayLen (ctx, 0); }
/* Create array with pre-allocated capacity but len=0 (for push-fill patterns) */
JSValue JS_NewArrayCap (JSContext *ctx, uint32_t cap) {
if (cap == 0) cap = JS_ARRAY_INITIAL_SIZE;
size_t values_size = sizeof (JSValue) * cap;
size_t total_size = sizeof (JSArray) + values_size;
JSArray *arr = js_malloc (ctx, total_size);
if (!arr) return JS_EXCEPTION;
arr->mist_hdr = objhdr_make (cap, OBJ_ARRAY, false, false, false, false);
arr->len = 0;
for (uint32_t i = 0; i < cap; i++)
arr->values[i] = JS_NULL;
return JS_MKPTR (arr);
}
JSValue JS_NewObject (JSContext *ctx) {
/* inline JS_NewObjectClass(ctx, JS_CLASS_OBJECT); */
return JS_NewObjectProtoClass (ctx, ctx->class_proto[JS_CLASS_OBJECT], JS_CLASS_OBJECT);
}
/* Create object with pre-allocated hash table for n properties */
JSValue JS_NewObjectCap (JSContext *ctx, uint32_t n) {
/* slot 0 is reserved, so need n+1 slots minimum.
Hash table needs ~2x entries for good load factor.
mask must be power-of-2 minus 1. */
uint32_t need = (n + 1) * 2;
uint32_t mask = JS_RECORD_INITIAL_MASK;
while (mask + 1 < need) mask = (mask << 1) | 1;
JSGCRef proto_ref;
JS_PushGCRef (ctx, &proto_ref);
proto_ref.val = ctx->class_proto[JS_CLASS_OBJECT];
JSRecord *rec = js_new_record_class (ctx, mask, JS_CLASS_OBJECT);
JSValue proto_val = proto_ref.val;
JS_PopGCRef (ctx, &proto_ref);
if (!rec) return JS_EXCEPTION;
if (JS_IsRecord (proto_val))
rec->proto = proto_val;
return JS_MKPTR (rec);
}
/* Note: at least 'length' arguments will be readable in 'argv' */
static JSValue JS_NewCFunction3 (JSContext *ctx, JSCFunction *func, const char *name, int length, JSCFunctionEnum cproto, int magic) {

View File

@@ -123,45 +123,45 @@ var streamline = function(ir, log) {
var op = instr[0]
var src_type = null
if (op == "access") {
slot_types[text(instr[1])] = access_value_type(instr[2])
slot_types[instr[1]] = access_value_type(instr[2])
} else if (op == "int") {
slot_types[text(instr[1])] = T_INT
slot_types[instr[1]] = T_INT
} else if (op == "true" || op == "false") {
slot_types[text(instr[1])] = T_BOOL
slot_types[instr[1]] = T_BOOL
} else if (op == "null") {
slot_types[text(instr[1])] = T_NULL
slot_types[instr[1]] = T_NULL
} else if (op == "move") {
src_type = slot_types[text(instr[2])]
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
src_type = slot_types[instr[2]]
slot_types[instr[1]] = src_type != null ? src_type : T_UNKNOWN
} else if (op == "concat") {
slot_types[text(instr[1])] = T_TEXT
slot_types[instr[1]] = T_TEXT
} else if (bool_result_ops[op] == true) {
slot_types[text(instr[1])] = T_BOOL
slot_types[instr[1]] = T_BOOL
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
slot_types[text(instr[1])] = T_UNKNOWN
slot_types[instr[1]] = T_UNKNOWN
} else if (op == "invoke" || op == "tail_invoke") {
slot_types[text(instr[2])] = T_UNKNOWN
slot_types[instr[2]] = T_UNKNOWN
} else if (op == "pop" || op == "get") {
slot_types[text(instr[1])] = T_UNKNOWN
slot_types[instr[1]] = T_UNKNOWN
} else if (op == "array") {
slot_types[text(instr[1])] = T_ARRAY
slot_types[instr[1]] = T_ARRAY
} else if (op == "record") {
slot_types[text(instr[1])] = T_RECORD
slot_types[instr[1]] = T_RECORD
} else if (op == "function") {
slot_types[text(instr[1])] = T_FUNCTION
slot_types[instr[1]] = T_FUNCTION
} else if (op == "length") {
slot_types[text(instr[1])] = T_INT
slot_types[instr[1]] = T_INT
} else if (op == "negate" || numeric_ops[op] == true) {
slot_types[text(instr[1])] = T_UNKNOWN
slot_types[instr[1]] = T_UNKNOWN
} else if (op == "bitnot" || op == "bitand" || op == "bitor" ||
op == "bitxor" || op == "shl" || op == "shr" || op == "ushr") {
slot_types[text(instr[1])] = T_INT
slot_types[instr[1]] = T_INT
}
return null
}
var slot_is = function(slot_types, slot, typ) {
var known = slot_types[text(slot)]
var known = slot_types[slot]
if (known == null) {
return false
}
@@ -175,24 +175,22 @@ var streamline = function(ir, log) {
}
var merge_backward = function(backward_types, slot, typ) {
var sk = null
var existing = null
if (!is_number(slot)) {
return null
}
sk = text(slot)
existing = backward_types[sk]
existing = backward_types[slot]
if (existing == null) {
backward_types[sk] = typ
backward_types[slot] = typ
} else if (existing != typ && existing != T_UNKNOWN) {
if ((existing == T_INT || existing == T_FLOAT) && typ == T_NUM) {
// Keep more specific
} else if (existing == T_NUM && (typ == T_INT || typ == T_FLOAT)) {
backward_types[sk] = typ
backward_types[slot] = typ
} else if ((existing == T_INT && typ == T_FLOAT) || (existing == T_FLOAT && typ == T_INT)) {
backward_types[sk] = T_NUM
backward_types[slot] = T_NUM
} else {
backward_types[sk] = T_UNKNOWN
backward_types[slot] = T_UNKNOWN
}
}
return null
@@ -201,8 +199,8 @@ var streamline = function(ir, log) {
var seed_params = function(slot_types, param_types, nr_args) {
var j = 1
while (j <= nr_args) {
if (param_types[text(j)] != null) {
slot_types[text(j)] = param_types[text(j)]
if (param_types[j] != null) {
slot_types[j] = param_types[j]
}
j = j + 1
}
@@ -210,10 +208,11 @@ var streamline = function(ir, log) {
}
var seed_writes = function(slot_types, write_types) {
var keys = array(write_types)
var k = 0
while (k < length(keys)) {
slot_types[keys[k]] = write_types[keys[k]]
while (k < length(write_types)) {
if (write_types[k] != null) {
slot_types[k] = write_types[k]
}
k = k + 1
}
return null
@@ -236,11 +235,11 @@ var streamline = function(ir, log) {
var bt = null
if (instructions == null || nr_args == 0) {
return {}
return array(func.nr_slots)
}
num_instr = length(instructions)
backward_types = {}
backward_types = array(func.nr_slots)
i = 0
while (i < num_instr) {
instr = instructions[i]
@@ -296,12 +295,12 @@ var streamline = function(ir, log) {
i = i + 1
}
param_types = {}
param_types = array(func.nr_slots)
j = 1
while (j <= nr_args) {
bt = backward_types[text(j)]
bt = backward_types[j]
if (bt != null && bt != T_UNKNOWN) {
param_types[text(j)] = bt
param_types[j] = bt
}
j = j + 1
}
@@ -319,22 +318,19 @@ var streamline = function(ir, log) {
var nr_args = func.nr_args != null ? func.nr_args : 0
var num_instr = 0
var write_types = null
var result = null
var keys = null
var i = 0
var k = 0
var instr = null
var op = null
var slot = 0
var typ = null
var wt = null
if (instructions == null) {
return {}
return array(func.nr_slots)
}
num_instr = length(instructions)
write_types = {}
write_types = array(func.nr_slots)
i = 0
while (i < num_instr) {
instr = instructions[i]
@@ -410,17 +406,14 @@ var streamline = function(ir, log) {
}
// Filter to only slots with known (non-unknown) types
result = {}
keys = array(write_types)
k = 0
while (k < length(keys)) {
wt = write_types[keys[k]]
if (wt != null && wt != T_UNKNOWN) {
result[keys[k]] = wt
while (k < length(write_types)) {
if (write_types[k] == T_UNKNOWN) {
write_types[k] = null
}
k = k + 1
}
return result
return write_types
}
// =========================================================
@@ -462,14 +455,14 @@ var streamline = function(ir, log) {
num_instr = length(instructions)
j = 1
while (j <= nr_args) {
if (param_types[text(j)] != null) {
if (param_types[j] != null) {
has_params = true
}
j = j + 1
}
has_writes = length(array(write_types)) > 0
has_writes = some(write_types, function(x) { return x != null })
slot_types = {}
slot_types = array(func.nr_slots)
if (has_params) {
seed_params(slot_types, param_types, nr_args)
}
@@ -482,7 +475,7 @@ var streamline = function(ir, log) {
instr = instructions[i]
if (is_text(instr)) {
slot_types = {}
slot_types = array(func.nr_slots)
if (has_params) {
seed_params(slot_types, param_types, nr_args)
}
@@ -525,14 +518,14 @@ var streamline = function(ir, log) {
at: i,
before: [instr, next],
after: [instructions[i], instructions[i + 1]],
why: {slot: src, known_type: slot_types[text(src)], checked_type: checked_type}
why: {slot: src, known_type: slot_types[src], checked_type: checked_type}
}
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 2
continue
}
src_known = slot_types[text(src)]
src_known = slot_types[src]
if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) {
if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) {
nc = nc + 1
@@ -550,7 +543,7 @@ var streamline = function(ir, log) {
why: {slot: src, known_type: src_known, checked_type: checked_type}
}
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 2
continue
}
@@ -569,12 +562,12 @@ var streamline = function(ir, log) {
why: {slot: src, known_type: src_known, checked_type: checked_type}
}
}
slot_types[text(dest)] = T_UNKNOWN
slot_types[dest] = T_UNKNOWN
i = i + 2
continue
}
slot_types[text(dest)] = T_BOOL
slot_types[text(src)] = checked_type
slot_types[dest] = T_BOOL
slot_types[src] = checked_type
i = i + 2
continue
}
@@ -594,14 +587,14 @@ var streamline = function(ir, log) {
at: i,
before: [instr, next],
after: [instructions[i], instructions[i + 1]],
why: {slot: src, known_type: slot_types[text(src)], checked_type: checked_type}
why: {slot: src, known_type: slot_types[src], checked_type: checked_type}
}
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 2
continue
}
src_known = slot_types[text(src)]
src_known = slot_types[src]
if (src_known != null && src_known != T_UNKNOWN && src_known != checked_type) {
if (checked_type == T_NUM && (src_known == T_INT || src_known == T_FLOAT)) {
nc = nc + 1
@@ -619,7 +612,7 @@ var streamline = function(ir, log) {
why: {slot: src, known_type: src_known, checked_type: checked_type}
}
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 2
continue
}
@@ -638,17 +631,17 @@ var streamline = function(ir, log) {
why: {slot: src, known_type: src_known, checked_type: checked_type}
}
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 2
continue
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 2
continue
}
}
slot_types[text(dest)] = T_BOOL
slot_types[dest] = T_BOOL
i = i + 1
continue
}
@@ -664,7 +657,7 @@ var streamline = function(ir, log) {
pass: "eliminate_type_checks",
rule: "dynamic_to_field",
at: i, before: old_op, after: instr[0],
why: {slot: instr[3], known_type: slot_types[text(instr[3])]}
why: {slot: instr[3], known_type: slot_types[instr[3]]}
}
}
} else if (slot_is(slot_types, instr[3], T_INT)) {
@@ -675,11 +668,11 @@ var streamline = function(ir, log) {
pass: "eliminate_type_checks",
rule: "dynamic_to_index",
at: i, before: old_op, after: instr[0],
why: {slot: instr[3], known_type: slot_types[text(instr[3])]}
why: {slot: instr[3], known_type: slot_types[instr[3]]}
}
}
}
slot_types[text(instr[1])] = T_UNKNOWN
slot_types[instr[1]] = T_UNKNOWN
i = i + 1
continue
}
@@ -693,7 +686,7 @@ var streamline = function(ir, log) {
pass: "eliminate_type_checks",
rule: "dynamic_to_field",
at: i, before: old_op, after: instr[0],
why: {slot: instr[3], known_type: slot_types[text(instr[3])]}
why: {slot: instr[3], known_type: slot_types[instr[3]]}
}
}
} else if (slot_is(slot_types, instr[3], T_INT)) {
@@ -704,7 +697,7 @@ var streamline = function(ir, log) {
pass: "eliminate_type_checks",
rule: "dynamic_to_index",
at: i, before: old_op, after: instr[0],
why: {slot: instr[3], known_type: slot_types[text(instr[3])]}
why: {slot: instr[3], known_type: slot_types[instr[3]]}
}
}
}
@@ -745,14 +738,14 @@ var streamline = function(ir, log) {
}
num_instr = length(instructions)
slot_values = {}
slot_values = array(func.nr_slots)
i = 0
while (i < num_instr) {
instr = instructions[i]
if (is_text(instr)) {
slot_values = {}
slot_values = array(func.nr_slots)
i = i + 1
continue
}
@@ -766,19 +759,19 @@ var streamline = function(ir, log) {
// Track known constant values
if (op == "int") {
slot_values[text(instr[1])] = instr[2]
slot_values[instr[1]] = instr[2]
} else if (op == "access" && is_number(instr[2])) {
slot_values[text(instr[1])] = instr[2]
slot_values[instr[1]] = instr[2]
} else if (op == "true") {
slot_values[text(instr[1])] = true
slot_values[instr[1]] = true
} else if (op == "false") {
slot_values[text(instr[1])] = false
slot_values[instr[1]] = false
} else if (op == "move") {
sv = slot_values[text(instr[2])]
sv = slot_values[instr[2]]
if (sv != null) {
slot_values[text(instr[1])] = sv
slot_values[instr[1]] = sv
} else {
slot_values[text(instr[1])] = null
slot_values[instr[1]] = null
}
}
@@ -797,7 +790,7 @@ var streamline = function(ir, log) {
why: {op: op, slot: instr[2]}
}
}
slot_values[text(instr[1])] = true
slot_values[instr[1]] = true
i = i + 1
continue
}
@@ -814,7 +807,7 @@ var streamline = function(ir, log) {
why: {op: op, slot: instr[2]}
}
}
slot_values[text(instr[1])] = false
slot_values[instr[1]] = false
i = i + 1
continue
}
@@ -822,7 +815,7 @@ var streamline = function(ir, log) {
// Clear value tracking for dest-producing ops (not reads-only)
if (op == "invoke" || op == "tail_invoke") {
slot_values[text(instr[2])] = null
slot_values[instr[2]] = null
} else if (op != "int" && op != "access" && op != "true" &&
op != "false" && op != "move" && op != "null" &&
op != "jump" && op != "jump_true" && op != "jump_false" &&
@@ -830,7 +823,7 @@ var streamline = function(ir, log) {
op != "store_field" && op != "store_index" &&
op != "store_dynamic" && op != "push" && op != "setarg") {
if (is_number(instr[1])) {
slot_values[text(instr[1])] = null
slot_values[instr[1]] = null
}
}

View File

@@ -1,72 +1,11 @@
var tokenize = function(src, filename) {
var len = length(src)
var cp = array(array(src), codepoint)
var pos = 0
var row = 0
var col = 0
var tokens = []
// Codepoint constants
def CP_LF = 10
def CP_CR = 13
def CP_TAB = 9
def CP_SPACE = 32
def CP_BANG = 33
def CP_DQUOTE = 34
def CP_HASH = 35
def CP_DOLLAR = 36
def CP_PERCENT = 37
def CP_AMP = 38
def CP_SQUOTE = 39
def CP_LPAREN = 40
def CP_RPAREN = 41
def CP_STAR = 42
def CP_PLUS = 43
def CP_COMMA = 44
def CP_MINUS = 45
def CP_DOT = 46
def CP_SLASH = 47
def CP_0 = 48
def CP_1 = 49
def CP_7 = 55
def CP_9 = 57
def CP_COLON = 58
def CP_SEMI = 59
def CP_LT = 60
def CP_EQ = 61
def CP_GT = 62
def CP_QMARK = 63
def CP_AT = 64
def CP_A = 65
def CP_B = 66
def CP_E = 69
def CP_F = 70
def CP_O = 79
def CP_X = 88
def CP_Z = 90
def CP_LBRACKET = 91
def CP_BSLASH = 92
def CP_RBRACKET = 93
def CP_CARET = 94
def CP_UNDERSCORE = 95
def CP_BACKTICK = 96
def CP_a = 97
def CP_b = 98
def CP_e = 101
def CP_f = 102
def CP_n = 110
def CP_o = 111
def CP_r = 114
def CP_t = 116
def CP_u = 117
def CP_x = 120
def CP_z = 122
def CP_LBRACE = 123
def CP_PIPE = 124
def CP_RBRACE = 125
def CP_TILDE = 126
// Keywords lookup
var keywords = {
if: "if", in: "in", do: "do", go: "go",
@@ -79,20 +18,20 @@ var tokenize = function(src, filename) {
}
var pk = function() {
if (pos >= len) return -1
return cp[pos]
if (pos >= len) return null
return src[pos]
}
var pk_at = function(n) {
var idx = pos + n
if (idx >= len) return -1
return cp[idx]
if (idx >= len) return null
return src[idx]
}
var adv = function() {
var c = cp[pos]
var c = src[pos]
pos = pos + 1
if (c == CP_LF) {
if (c == "\n") {
row = row + 1
col = 0
} else {
@@ -102,17 +41,17 @@ var tokenize = function(src, filename) {
}
var is_digit = function(c) {
return c >= CP_0 && c <= CP_9
return c >= "0" && c <= "9"
}
var is_hex = function(c) {
return (c >= CP_0 && c <= CP_9) || (c >= CP_a && c <= CP_f) || (c >= CP_A && c <= CP_F)
return (c >= "0" && c <= "9") || (c >= "a" && c <= "f") || (c >= "A" && c <= "F")
}
var hex_val = function(c) {
if (c >= CP_0 && c <= CP_9) return c - CP_0
if (c >= CP_a && c <= CP_f) return c - CP_a + 10
if (c >= CP_A && c <= CP_F) return c - CP_A + 10
if (c >= "0" && c <= "9") return codepoint(c) - codepoint("0")
if (c >= "a" && c <= "f") return codepoint(c) - codepoint("a") + 10
if (c >= "A" && c <= "F") return codepoint(c) - codepoint("A") + 10
return 0
}
@@ -127,7 +66,7 @@ var tokenize = function(src, filename) {
}
var is_alpha = function(c) {
return (c >= CP_a && c <= CP_z) || (c >= CP_A && c <= CP_Z)
return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z")
}
var is_alnum = function(c) {
@@ -135,41 +74,41 @@ var tokenize = function(src, filename) {
}
var is_ident_start = function(c) {
return is_alpha(c) || c == CP_UNDERSCORE || c == CP_DOLLAR
return is_alpha(c) || c == "_" || c == "$"
}
var is_ident_char = function(c) {
return is_alnum(c) || c == CP_UNDERSCORE || c == CP_DOLLAR || c == CP_QMARK || c == CP_BANG
return is_alnum(c) || c == "_" || c == "$" || c == "?" || c == "!"
}
var substr = function(start, end) {
return text(src, start, end)
}
var read_string = function(quote_cp) {
var read_string = function(quote) {
var start = pos
var start_row = row
var start_col = col
var parts = []
var run_start = 0
var esc = 0
var esc = null
adv() // skip opening quote
run_start = pos
while (pos < len && pk() != quote_cp) {
if (pk() == CP_BSLASH) {
while (pos < len && pk() != quote) {
if (pk() == "\\") {
if (pos > run_start) push(parts, text(src, run_start, pos))
adv()
esc = adv()
if (esc == CP_n) { push(parts, "\n") }
else if (esc == CP_t) { push(parts, "\t") }
else if (esc == CP_r) { push(parts, "\r") }
else if (esc == CP_BSLASH) { push(parts, "\\") }
else if (esc == CP_SQUOTE) { push(parts, "'") }
else if (esc == CP_DQUOTE) { push(parts, "\"") }
else if (esc == CP_0) { push(parts, character(0)) }
else if (esc == CP_BACKTICK) { push(parts, "`") }
else if (esc == CP_u) { push(parts, read_unicode_escape()) }
else { push(parts, character(esc)) }
if (esc == "n") { push(parts, "\n") }
else if (esc == "t") { push(parts, "\t") }
else if (esc == "r") { push(parts, "\r") }
else if (esc == "\\") { push(parts, "\\") }
else if (esc == "'") { push(parts, "'") }
else if (esc == "\"") { push(parts, "\"") }
else if (esc == "0") { push(parts, character(0)) }
else if (esc == "`") { push(parts, "`") }
else if (esc == "u") { push(parts, read_unicode_escape()) }
else { push(parts, esc) }
run_start = pos
} else {
adv()
@@ -192,33 +131,33 @@ var tokenize = function(src, filename) {
var parts = []
var run_start = 0
var depth = 0
var tc = 0
var q = 0
var tc = null
var q = null
var interp_start = 0
adv() // skip opening backtick
run_start = pos
while (pos < len && pk() != CP_BACKTICK) {
if (pk() == CP_BSLASH && pos + 1 < len) {
while (pos < len && pk() != "`") {
if (pk() == "\\" && pos + 1 < len) {
if (pos > run_start) push(parts, text(src, run_start, pos))
push(parts, text(src, pos, pos + 2))
adv(); adv()
run_start = pos
} else if (pk() == CP_DOLLAR && pos + 1 < len && pk_at(1) == CP_LBRACE) {
} else if (pk() == "$" && pos + 1 < len && pk_at(1) == "{") {
if (pos > run_start) push(parts, text(src, run_start, pos))
interp_start = pos
adv(); adv() // $ {
depth = 1
while (pos < len && depth > 0) {
tc = pk()
if (tc == CP_LBRACE) { depth = depth + 1; adv() }
else if (tc == CP_RBRACE) {
if (tc == "{") { depth = depth + 1; adv() }
else if (tc == "}") {
depth = depth - 1
adv()
}
else if (tc == CP_SQUOTE || tc == CP_DQUOTE || tc == CP_BACKTICK) {
else if (tc == "'" || tc == "\"" || tc == "`") {
q = adv()
while (pos < len && pk() != q) {
if (pk() == CP_BSLASH && pos + 1 < len) adv()
if (pk() == "\\" && pos + 1 < len) adv()
adv()
}
if (pos < len) adv()
@@ -245,24 +184,24 @@ var tokenize = function(src, filename) {
var start_row = row
var start_col = col
var raw = ""
if (pk() == CP_0 && (pk_at(1) == CP_x || pk_at(1) == CP_X)) {
if (pk() == "0" && (pk_at(1) == "x" || pk_at(1) == "X")) {
adv(); adv()
while (pos < len && (is_hex(pk()) || pk() == CP_UNDERSCORE)) adv()
} else if (pk() == CP_0 && (pk_at(1) == CP_b || pk_at(1) == CP_B)) {
while (pos < len && (is_hex(pk()) || pk() == "_")) adv()
} else if (pk() == "0" && (pk_at(1) == "b" || pk_at(1) == "B")) {
adv(); adv()
while (pos < len && (pk() == CP_0 || pk() == CP_1 || pk() == CP_UNDERSCORE)) adv()
} else if (pk() == CP_0 && (pk_at(1) == CP_o || pk_at(1) == CP_O)) {
while (pos < len && (pk() == "0" || pk() == "1" || pk() == "_")) adv()
} else if (pk() == "0" && (pk_at(1) == "o" || pk_at(1) == "O")) {
adv(); adv()
while (pos < len && pk() >= CP_0 && pk() <= CP_7) adv()
while (pos < len && pk() >= "0" && pk() <= "7") adv()
} else {
while (pos < len && (is_digit(pk()) || pk() == CP_UNDERSCORE)) adv()
if (pos < len && pk() == CP_DOT) {
while (pos < len && (is_digit(pk()) || pk() == "_")) adv()
if (pos < len && pk() == ".") {
adv()
while (pos < len && (is_digit(pk()) || pk() == CP_UNDERSCORE)) adv()
while (pos < len && (is_digit(pk()) || pk() == "_")) adv()
}
if (pos < len && (pk() == CP_e || pk() == CP_E)) {
if (pos < len && (pk() == "e" || pk() == "E")) {
adv()
if (pos < len && (pk() == CP_PLUS || pk() == CP_MINUS)) adv()
if (pos < len && (pk() == "+" || pk() == "-")) adv()
while (pos < len && is_digit(pk())) adv()
}
}
@@ -305,12 +244,12 @@ var tokenize = function(src, filename) {
var start_row = row
var start_col = col
var raw = ""
if (pk_at(1) == CP_SLASH) {
while (pos < len && pk() != CP_LF && pk() != CP_CR) adv()
if (pk_at(1) == "/") {
while (pos < len && pk() != "\n" && pk() != "\r") adv()
} else {
adv(); adv() // skip /*
while (pos < len) {
if (pk() == CP_STAR && pk_at(1) == CP_SLASH) {
if (pk() == "*" && pk_at(1) == "/") {
adv(); adv()
break
}
@@ -359,144 +298,144 @@ var tokenize = function(src, filename) {
var start_row = 0
var start_col = 0
var raw = ""
if (c == -1) return false
if (c == null) return false
if (c == CP_LF) {
if (c == "\n") {
start = pos; start_row = row; start_col = col
adv()
push(tokens, { kind: "newline", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, value: "\n" })
return true
}
if (c == CP_CR) {
if (c == "\r") {
start = pos; start_row = row; start_col = col
adv()
if (pos < len && pk() == CP_LF) adv()
if (pos < len && pk() == "\n") adv()
push(tokens, { kind: "newline", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, value: "\n" })
return true
}
if (c == CP_SPACE || c == CP_TAB) {
if (c == " " || c == "\t") {
start = pos; start_row = row; start_col = col
while (pos < len && (pk() == CP_SPACE || pk() == CP_TAB)) adv()
while (pos < len && (pk() == " " || pk() == "\t")) adv()
raw = substr(start, pos)
push(tokens, { kind: "space", at: start, from_row: start_row, from_column: start_col, to_row: row, to_column: col, value: raw })
return true
}
if (c == CP_SQUOTE || c == CP_DQUOTE) { read_string(c); return true }
if (c == CP_BACKTICK) { read_template(); return true }
if (c == "'" || c == "\"") { read_string(c); return true }
if (c == "`") { read_template(); return true }
if (is_digit(c)) { read_number(); return true }
if (c == CP_DOT && is_digit(pk_at(1))) { read_number(); return true }
if (c == "." && is_digit(pk_at(1))) { read_number(); return true }
if (is_ident_start(c)) { read_name(); return true }
if (c == CP_SLASH) {
if (pk_at(1) == CP_SLASH || pk_at(1) == CP_STAR) { read_comment(); return true }
if (pk_at(1) == CP_EQ) { emit_op("/=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "/") {
if (pk_at(1) == "/" || pk_at(1) == "*") { read_comment(); return true }
if (pk_at(1) == "=") { emit_op("/=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("/", 1); return true
}
if (c == CP_STAR) {
if (pk_at(1) == CP_STAR) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("**=", 3); return true }
if (c == "*") {
if (pk_at(1) == "*") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("**=", 3); return true }
emit_op("**", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("*=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "=") { emit_op("*=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("*", 1); return true
}
if (c == CP_PERCENT) {
if (pk_at(1) == CP_EQ) { emit_op("%=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "%") {
if (pk_at(1) == "=") { emit_op("%=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("%", 1); return true
}
if (c == CP_PLUS) {
if (pk_at(1) == CP_EQ) { emit_op("+=", 2); return true }
if (pk_at(1) == CP_PLUS) { emit_op("++", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "+") {
if (pk_at(1) == "=") { emit_op("+=", 2); return true }
if (pk_at(1) == "+") { emit_op("++", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("+", 1); return true
}
if (c == CP_MINUS) {
if (pk_at(1) == CP_EQ) { emit_op("-=", 2); return true }
if (pk_at(1) == CP_MINUS) { emit_op("--", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "-") {
if (pk_at(1) == "=") { emit_op("-=", 2); return true }
if (pk_at(1) == "-") { emit_op("--", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("-", 1); return true
}
if (c == CP_LT) {
if (pk_at(1) == CP_EQ && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(1) == CP_EQ) { emit_op("<=", 2); return true }
if (pk_at(1) == CP_LT) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("<<=", 3); return true }
if (c == "<") {
if (pk_at(1) == "=" && pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(1) == "=") { emit_op("<=", 2); return true }
if (pk_at(1) == "<") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("<<=", 3); return true }
emit_op("<<", 2); return true
}
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("<", 1); return true
}
if (c == CP_GT) {
if (pk_at(1) == CP_EQ && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(1) == CP_EQ) { emit_op(">=", 2); return true }
if (pk_at(1) == CP_GT) {
if (pk_at(2) == CP_GT) {
if (pk_at(3) == CP_BANG) { emit_ident(4); return true }
if (pk_at(3) == CP_EQ) { emit_op(">>>=", 4); return true }
if (c == ">") {
if (pk_at(1) == "=" && pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(1) == "=") { emit_op(">=", 2); return true }
if (pk_at(1) == ">") {
if (pk_at(2) == ">") {
if (pk_at(3) == "!") { emit_ident(4); return true }
if (pk_at(3) == "=") { emit_op(">>>=", 4); return true }
emit_op(">>>", 3); return true
}
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op(">>=", 3); return true }
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op(">>=", 3); return true }
emit_op(">>", 2); return true
}
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op(">", 1); return true
}
if (c == CP_EQ) {
if (pk_at(1) == CP_EQ) {
if (pk_at(2) == CP_EQ) { emit_op("===", 3); return true }
if (c == "=") {
if (pk_at(1) == "=") {
if (pk_at(2) == "=") { emit_op("===", 3); return true }
emit_op("==", 2); return true
}
if (pk_at(1) == CP_GT) { emit_op("=>", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == ">") { emit_op("=>", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("=", 1); return true
}
if (c == CP_BANG) {
if (pk_at(1) == CP_EQ) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("!==", 3); return true }
if (c == "!") {
if (pk_at(1) == "=") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("!==", 3); return true }
emit_op("!=", 2); return true
}
emit_op("!", 1); return true
}
if (c == CP_AMP) {
if (pk_at(1) == CP_AMP) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("&&=", 3); return true }
if (c == "&") {
if (pk_at(1) == "&") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("&&=", 3); return true }
emit_op("&&", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("&=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "=") { emit_op("&=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("&", 1); return true
}
if (c == CP_PIPE) {
if (pk_at(1) == CP_PIPE) {
if (pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (pk_at(2) == CP_EQ) { emit_op("||=", 3); return true }
if (c == "|") {
if (pk_at(1) == "|") {
if (pk_at(2) == "!") { emit_ident(3); return true }
if (pk_at(2) == "=") { emit_op("||=", 3); return true }
emit_op("||", 2); return true
}
if (pk_at(1) == CP_EQ) { emit_op("|=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (pk_at(1) == "=") { emit_op("|=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("|", 1); return true
}
if (c == CP_CARET) {
if (pk_at(1) == CP_EQ) { emit_op("^=", 2); return true }
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "^") {
if (pk_at(1) == "=") { emit_op("^=", 2); return true }
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("^", 1); return true
}
if (c == CP_LBRACKET) {
if (pk_at(1) == CP_RBRACKET && pk_at(2) == CP_BANG) { emit_ident(3); return true }
if (c == "[") {
if (pk_at(1) == "]" && pk_at(2) == "!") { emit_ident(3); return true }
emit_op("[", 1); return true
}
if (c == CP_TILDE) {
if (pk_at(1) == CP_BANG) { emit_ident(2); return true }
if (c == "~") {
if (pk_at(1) == "!") { emit_ident(2); return true }
emit_op("~", 1); return true
}
emit_op(character(c), 1)
emit_op(c, 1)
return true
}
@@ -508,7 +447,7 @@ var tokenize = function(src, filename) {
// EOF token
push(tokens, { kind: "eof", at: pos, from_row: row, from_column: col, to_row: row, to_column: col })
return {filename: filename, tokens: tokens, cp: cp}
return {filename: filename, tokens: tokens}
}
return tokenize