bootstrap now uses streamline

This commit is contained in:
2026-02-17 12:23:59 -06:00
parent 2be2b15a61
commit eff3548c50
9 changed files with 121957 additions and 100790 deletions

View File

@@ -2254,318 +2254,6 @@ static int ml_int(cJSON *arr, int idx) {
return (int)cJSON_GetArrayItem(arr, idx)->valuedouble;
}
/* ---- Register compression ----
The mcode compiler allocates slots monotonically, producing register numbers
that can exceed 255. Since MachInstr32 uses 8-bit fields, we must compress
the register space via live-range analysis before lowering.
For each slot we record its first and last instruction reference, then do a
greedy linear-scan allocation to pack them into the fewest physical registers.
Slots referenced by child functions via get/put (parent_slot) are in the
PARENT frame and are not remapped here — only current-frame register
operands are touched. */
#define MAX_REG_ITEMS 32
/* Return cJSON pointers to all current-frame register operands in an
instruction. out[] must have room for MAX_REG_ITEMS entries. */
static int mcode_reg_items(cJSON *it, cJSON **out) {
int sz = cJSON_GetArraySize(it);
if (sz < 3) return 0;
const char *op = cJSON_GetArrayItem(it, 0)->valuestring;
int c = 0;
#define ADD(pos) do { \
cJSON *_r = cJSON_GetArrayItem(it, (pos)); \
if (_r && cJSON_IsNumber(_r) && c < MAX_REG_ITEMS) out[c++] = _r; \
} while (0)
/* get/put: only [1] is current-frame (dest/src); [2]=parent_slot, [3]=level */
if (!strcmp(op, "get") || !strcmp(op, "put")) { ADD(1); return c; }
/* dest-only */
if (!strcmp(op, "access") || !strcmp(op, "int") ||
!strcmp(op, "function") || !strcmp(op, "regexp") ||
!strcmp(op, "true") || !strcmp(op, "false") || !strcmp(op, "null"))
{ ADD(1); return c; }
/* invoke: [1]=frame, [2]=dest (result register) */
if (!strcmp(op, "invoke") || !strcmp(op, "tail_invoke")) { ADD(1); ADD(2); return c; }
/* goinvoke: [1]=frame only (no result) */
if (!strcmp(op, "goinvoke")) { ADD(1); return c; }
/* setarg: [1]=call, [2]=arg_idx(const), [3]=val */
if (!strcmp(op, "setarg")) { ADD(1); ADD(3); return c; }
/* frame/goframe: [1]=call, [2]=func, [3]=nr_args(const) */
if (!strcmp(op, "frame") || !strcmp(op, "goframe")) { ADD(1); ADD(2); return c; }
/* no regs */
if (!strcmp(op, "jump") || !strcmp(op, "disrupt")) return 0;
/* cond only */
if (!strcmp(op, "jump_true") || !strcmp(op, "jump_false") ||
!strcmp(op, "jump_not_null"))
{ ADD(1); return c; }
/* single reg */
if (!strcmp(op, "return")) { ADD(1); return c; }
/* delete: [1]=dest, [2]=obj, [3]=key (string or reg) */
if (!strcmp(op, "delete")) {
ADD(1); ADD(2);
cJSON *k = cJSON_GetArrayItem(it, 3);
if (k && cJSON_IsNumber(k)) out[c++] = k;
return c;
}
/* record: [1]=dest, [2]=0(const) — no line/col suffix */
if (!strcmp(op, "record")) { ADD(1); return c; }
/* array: [1]=dest, [2]=count(const) — elements added via separate push instrs */
if (!strcmp(op, "array")) {
ADD(1);
return c;
}
/* load_field: [1]=dest, [2]=obj, [3]=key (string or reg) */
if (!strcmp(op, "load_field")) {
ADD(1); ADD(2);
cJSON *key = cJSON_GetArrayItem(it, 3);
if (key && cJSON_IsNumber(key)) out[c++] = key;
return c;
}
/* store_field: [1]=obj, [2]=val, [3]=key (string or reg) */
if (!strcmp(op, "store_field")) {
ADD(1); ADD(2);
cJSON *key = cJSON_GetArrayItem(it, 3);
if (key && cJSON_IsNumber(key)) out[c++] = key;
return c;
}
/* Default: every numeric operand in [1..sz-3] is a register.
Covers move, arithmetic, comparisons, type checks, push, pop,
load_dynamic, store_dynamic, in, concat, logical, bitwise, etc. */
for (int j = 1; j < sz - 2; j++) {
cJSON *item = cJSON_GetArrayItem(it, j);
if (item && cJSON_IsNumber(item)) out[c++] = item;
}
return c;
#undef ADD
}
/* Compress register numbers in a single function's mcode JSON so they
fit in 8 bits. Modifies the cJSON instructions and nr_slots in place.
Returns a malloc'd remap table (caller must free), or NULL if no
compression was needed. *out_old_nr_slots is set to the original count. */
static int *mcode_compress_regs(cJSON *fobj, int *out_old_nr_slots,
int *captured_slots, int n_captured) {
cJSON *nr_slots_j = cJSON_GetObjectItemCaseSensitive(fobj, "nr_slots");
int nr_slots = (int)cJSON_GetNumberValue(nr_slots_j);
*out_old_nr_slots = nr_slots;
if (nr_slots <= 255) return NULL;
int nr_args = (int)cJSON_GetNumberValue(
cJSON_GetObjectItemCaseSensitive(fobj, "nr_args"));
cJSON *instrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
int n = instrs ? cJSON_GetArraySize(instrs) : 0;
/* Step 1: build live ranges (first_ref / last_ref per slot) */
int *first_ref = sys_malloc(nr_slots * sizeof(int));
int *last_ref = sys_malloc(nr_slots * sizeof(int));
for (int i = 0; i < nr_slots; i++) { first_ref[i] = -1; last_ref[i] = -1; }
/* this + args are live for the whole function */
int pinned = 1 + nr_args;
for (int i = 0; i < pinned; i++) { first_ref[i] = 0; last_ref[i] = n; }
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (!cJSON_IsArray(it)) continue;
cJSON *regs[MAX_REG_ITEMS];
int rc = mcode_reg_items(it, regs);
for (int j = 0; j < rc; j++) {
int s = (int)regs[j]->valuedouble;
if (s < 0 || s >= nr_slots) continue;
if (first_ref[s] < 0) first_ref[s] = i;
last_ref[s] = i;
}
} }
/* Step 1a: extend live ranges for closure-captured slots.
If a child function captures a parent slot via get/put, that slot must
remain live for the entire parent function (the closure can read it at
any time while the parent frame is on the stack). */
for (int ci = 0; ci < n_captured; ci++) {
int s = captured_slots[ci];
if (s >= 0 && s < nr_slots) {
if (first_ref[s] < 0) first_ref[s] = 0;
last_ref[s] = n;
}
}
/* Step 1b: extend live ranges for loops (backward jumps).
Build label→position map, then for each backward jump [target..jump],
extend all overlapping live ranges to cover the full loop body. */
{
/* Collect label positions */
typedef struct { const char *name; int pos; } LabelPos;
int lbl_cap = 32, lbl_n = 0;
LabelPos *lbls = sys_malloc(lbl_cap * sizeof(LabelPos));
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (cJSON_IsString(it)) {
if (lbl_n >= lbl_cap) {
lbl_cap *= 2;
lbls = sys_realloc(lbls, lbl_cap * sizeof(LabelPos));
}
lbls[lbl_n++] = (LabelPos){it->valuestring, i};
}
} }
/* Find backward jumps and extend live ranges */
int changed = 1;
while (changed) {
changed = 0;
cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (!cJSON_IsArray(it)) continue;
int sz = cJSON_GetArraySize(it);
if (sz < 3) continue;
const char *op = it->child->valuestring;
const char *target = NULL;
if (!strcmp(op, "jump")) {
target = it->child->next->valuestring;
} else if (!strcmp(op, "jump_true") || !strcmp(op, "jump_false") ||
!strcmp(op, "jump_not_null")) {
target = it->child->next->next->valuestring;
}
if (!target) continue;
/* Find label position */
int tpos = -1;
for (int j = 0; j < lbl_n; j++) {
if (!strcmp(lbls[j].name, target)) { tpos = lbls[j].pos; break; }
}
if (tpos < 0 || tpos >= i) continue; /* forward jump or not found */
/* Backward jump: extend registers that are live INTO the loop
(first_ref < loop start but used inside). Temporaries born
inside the loop body don't need extension — they are per-iteration. */
for (int s = pinned; s < nr_slots; s++) {
if (first_ref[s] < 0) continue;
if (first_ref[s] >= tpos) continue; /* born inside loop — skip */
if (last_ref[s] < tpos) continue; /* dead before loop — skip */
/* Register is live into the loop body — extend to loop end */
if (last_ref[s] < i) { last_ref[s] = i; changed = 1; }
}
}
}
sys_free(lbls);
}
/* Step 2: linear-scan register allocation */
typedef struct { int slot, first, last; } SlotInfo;
int cnt = 0;
SlotInfo *sorted = sys_malloc(nr_slots * sizeof(SlotInfo));
for (int s = pinned; s < nr_slots; s++)
if (first_ref[s] >= 0)
sorted[cnt++] = (SlotInfo){s, first_ref[s], last_ref[s]};
/* Sort by first_ref, tie-break by original slot (keeps named vars first) */
for (int i = 1; i < cnt; i++) {
SlotInfo key = sorted[i];
int j = i - 1;
while (j >= 0 && (sorted[j].first > key.first ||
(sorted[j].first == key.first && sorted[j].slot > key.slot))) {
sorted[j + 1] = sorted[j];
j--;
}
sorted[j + 1] = key;
}
int *remap = sys_malloc(nr_slots * sizeof(int));
for (int i = 0; i < nr_slots; i++) remap[i] = i;
/* Free-register pool (min-heap would be ideal but a flat scan is fine) */
int *pool = sys_malloc(nr_slots * sizeof(int));
int pool_n = 0;
int next_phys = pinned;
typedef struct { int phys, last; } ActiveAlloc;
ActiveAlloc *active = sys_malloc(cnt * sizeof(ActiveAlloc));
int active_n = 0;
for (int i = 0; i < cnt; i++) {
int first = sorted[i].first;
/* Expire intervals whose last_ref < first */
for (int j = 0; j < active_n; ) {
if (active[j].last < first) {
pool[pool_n++] = active[j].phys;
active[j] = active[--active_n];
} else {
j++;
}
}
/* Pick lowest available physical register */
int phys;
if (pool_n > 0) {
int mi = 0;
for (int j = 1; j < pool_n; j++)
if (pool[j] < pool[mi]) mi = j;
phys = pool[mi];
pool[mi] = pool[--pool_n];
} else {
phys = next_phys++;
}
remap[sorted[i].slot] = phys;
active[active_n++] = (ActiveAlloc){phys, sorted[i].last};
}
/* Compute new nr_slots */
int new_max = pinned;
for (int s = 0; s < nr_slots; s++)
if (first_ref[s] >= 0 && remap[s] >= new_max)
new_max = remap[s] + 1;
if (new_max > 255)
fprintf(stderr, " WARNING: %d live regs still exceeds 255\n", new_max);
/* Verify: check no two registers with overlapping live ranges share phys */
for (int a = pinned; a < nr_slots; a++) {
if (first_ref[a] < 0) continue;
for (int b = a + 1; b < nr_slots; b++) {
if (first_ref[b] < 0) continue;
if (remap[a] != remap[b]) continue;
/* Same phys — ranges must NOT overlap */
if (first_ref[a] <= last_ref[b] && first_ref[b] <= last_ref[a]) {
fprintf(stderr, " OVERLAP: slot %d [%d,%d] and slot %d [%d,%d] -> phys %d\n",
a, first_ref[a], last_ref[a], b, first_ref[b], last_ref[b], remap[a]);
}
}
}
/* Step 3: apply remap to instructions */
{ cJSON *it = instrs ? instrs->child : NULL;
for (int i = 0; it; i++, it = it->next) {
if (!cJSON_IsArray(it)) continue;
cJSON *regs[MAX_REG_ITEMS];
int rc = mcode_reg_items(it, regs);
for (int j = 0; j < rc; j++) {
int old = (int)regs[j]->valuedouble;
if (old >= 0 && old < nr_slots) {
cJSON_SetNumberValue(regs[j], remap[old]);
}
}
} }
/* Update nr_slots in the JSON */
cJSON_SetNumberValue(nr_slots_j, new_max);
sys_free(first_ref); sys_free(last_ref);
sys_free(sorted);
sys_free(pool); sys_free(active);
return remap; /* caller must free */
}
/* Lower one function's mcode instructions to MachInstr32 */
static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
McodeLowerState s = {0};
@@ -2575,6 +2263,14 @@ static MachCode *mcode_lower_func(cJSON *fobj, const char *filename) {
cJSON_GetObjectItemCaseSensitive(fobj, "nr_close_slots"));
s.nr_slots = (int)cJSON_GetNumberValue(
cJSON_GetObjectItemCaseSensitive(fobj, "nr_slots"));
if (s.nr_slots > 255) {
cJSON *nm_chk = cJSON_GetObjectItemCaseSensitive(fobj, "name");
const char *fn_name = nm_chk ? cJSON_GetStringValue(nm_chk) : "<anonymous>";
fprintf(stderr, "ERROR: function '%s' has %d slots (max 255). "
"Ensure the streamline optimizer ran before mach compilation.\n",
fn_name, s.nr_slots);
return NULL;
}
int dis_raw = (int)cJSON_GetNumberValue(
cJSON_GetObjectItemCaseSensitive(fobj, "disruption_pc"));
cJSON *nm = cJSON_GetObjectItemCaseSensitive(fobj, "name");
@@ -3007,131 +2703,8 @@ MachCode *mach_compile_mcode(cJSON *mcode_json) {
cJSON *main_obj = cJSON_GetObjectItemCaseSensitive(mcode_json, "main");
/* Build parent_of[]: for each function, which function index is its parent.
parent_of[i] = parent index, or func_count for main, or -1 if unknown.
Scan each function (and main) for "function" instructions. */
int *parent_of = sys_malloc(func_count * sizeof(int));
for (int i = 0; i < func_count; i++) parent_of[i] = -1;
/* Scan main's instructions */
{
cJSON *main_instrs = cJSON_GetObjectItemCaseSensitive(main_obj, "instructions");
cJSON *it = main_instrs ? main_instrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 3) continue;
const char *op = it->child->valuestring;
if (!strcmp(op, "function")) {
int child_idx = (int)it->child->next->next->valuedouble;
if (child_idx >= 0 && child_idx < func_count)
parent_of[child_idx] = func_count; /* main */
}
}
}
/* Scan each function's instructions */
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int fi = 0; fobj; fi++, fobj = fobj->next) {
cJSON *finstrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
cJSON *it = finstrs ? finstrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 3) continue;
const char *op = it->child->valuestring;
if (!strcmp(op, "function")) {
int child_idx = (int)it->child->next->next->valuedouble;
if (child_idx >= 0 && child_idx < func_count)
parent_of[child_idx] = fi;
}
}
} }
/* Build per-function capture sets: for each function F, which of its slots
are captured by descendant functions via get/put. Captured slots must
have extended live ranges during register compression. */
int **cap_slots = sys_malloc((func_count + 1) * sizeof(int *));
int *cap_counts = sys_malloc((func_count + 1) * sizeof(int));
memset(cap_slots, 0, (func_count + 1) * sizeof(int *));
memset(cap_counts, 0, (func_count + 1) * sizeof(int));
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int fi = 0; fobj; fi++, fobj = fobj->next) {
cJSON *finstrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
cJSON *it = finstrs ? finstrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 4) continue;
const char *op = it->child->valuestring;
if (strcmp(op, "get") && strcmp(op, "put")) continue;
int slot = (int)it->child->next->next->valuedouble;
int level = (int)it->child->next->next->next->valuedouble;
/* Walk up parent chain to find the ancestor whose slot is referenced */
int ancestor = fi;
for (int l = 0; l < level && ancestor >= 0; l++)
ancestor = parent_of[ancestor];
if (ancestor < 0) continue;
/* Add slot to ancestor's capture list (deduplicate) */
int found = 0;
for (int k = 0; k < cap_counts[ancestor]; k++)
if (cap_slots[ancestor][k] == slot) { found = 1; break; }
if (!found) {
cap_slots[ancestor] = sys_realloc(cap_slots[ancestor],
(cap_counts[ancestor] + 1) * sizeof(int));
cap_slots[ancestor][cap_counts[ancestor]++] = slot;
}
}
} }
/* Compress registers for functions that exceed 8-bit slot limits.
Save remap tables so we can fix get/put parent_slot references. */
int **remaps = sys_malloc((func_count + 1) * sizeof(int *));
int *remap_sizes = sys_malloc((func_count + 1) * sizeof(int));
memset(remaps, 0, (func_count + 1) * sizeof(int *));
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int i = 0; fobj; i++, fobj = fobj->next)
remaps[i] = mcode_compress_regs(fobj,
&remap_sizes[i], cap_slots[i], cap_counts[i]);
}
/* main is stored at index func_count in our arrays */
remaps[func_count] = mcode_compress_regs(main_obj,
&remap_sizes[func_count], cap_slots[func_count], cap_counts[func_count]);
/* Free capture lists */
for (int i = 0; i <= func_count; i++)
if (cap_slots[i]) sys_free(cap_slots[i]);
sys_free(cap_slots);
sys_free(cap_counts);
/* Fix up get/put parent_slot references using ancestor remap tables */
{ cJSON *fobj = funcs_arr ? funcs_arr->child : NULL;
for (int fi = 0; fobj; fi++, fobj = fobj->next) {
cJSON *finstrs = cJSON_GetObjectItemCaseSensitive(fobj, "instructions");
cJSON *it = finstrs ? finstrs->child : NULL;
for (; it; it = it->next) {
if (!cJSON_IsArray(it) || cJSON_GetArraySize(it) < 4) continue;
const char *op = it->child->valuestring;
if (strcmp(op, "get") && strcmp(op, "put")) continue;
int level = (int)it->child->next->next->next->valuedouble;
/* Walk up parent chain 'level' times to find ancestor */
int ancestor = fi;
for (int l = 0; l < level && ancestor >= 0; l++) {
ancestor = parent_of[ancestor];
}
if (ancestor < 0) continue; /* unknown parent — leave as is */
int *anc_remap = remaps[ancestor];
if (!anc_remap) continue; /* ancestor wasn't compressed */
cJSON *slot_item = it->child->next->next;
int old_slot = (int)slot_item->valuedouble;
if (old_slot >= 0 && old_slot < remap_sizes[ancestor]) {
int new_slot = anc_remap[old_slot];
cJSON_SetNumberValue(slot_item, new_slot);
}
}
} }
/* Free remap tables */
for (int i = 0; i <= func_count; i++)
if (remaps[i]) sys_free(remaps[i]);
sys_free(remaps);
sys_free(remap_sizes);
sys_free(parent_of);
/* Slot compression is handled by the streamline optimizer before mach
compilation. mcode_lower_func() asserts nr_slots <= 255. */
/* Compile all flat functions */
MachCode **compiled = NULL;