Merge branch 'quicken_mcode' into gen_dylib

2026-02-16 00:35:40 -06:00
parent f4f56ed470 ff61ab1f50
commit cd6e357b6e
29 changed files with 160180 additions and 877679 deletions
--- a/boot/bootstrap.cm.mcode
+++ b/boot/bootstrap.cm.mcode
--- a/boot/engine.cm.mcode
+++ b/boot/engine.cm.mcode
--- a/boot/fd.cm.mcode
+++ b/boot/fd.cm.mcode
--- a/boot/fold.cm.mcode
+++ b/boot/fold.cm.mcode
--- a/boot/internal_shop.cm.mcode
+++ b/boot/internal_shop.cm.mcode
--- a/boot/link.cm.mcode
+++ b/boot/link.cm.mcode
--- a/boot/mcode.cm.mcode
+++ b/boot/mcode.cm.mcode
--- a/boot/package.cm.mcode
+++ b/boot/package.cm.mcode
--- a/boot/parse.cm.mcode
+++ b/boot/parse.cm.mcode
--- a/boot/pronto.cm.mcode
+++ b/boot/pronto.cm.mcode
--- a/boot/qbe.cm.mcode
+++ b/boot/qbe.cm.mcode
--- a/boot/qbe_emit.cm.mcode
+++ b/boot/qbe_emit.cm.mcode
--- a/boot/seed_bootstrap.cm.mcode
+++ b/boot/seed_bootstrap.cm.mcode
--- a/boot/streamline.cm.mcode
+++ b/boot/streamline.cm.mcode
--- a/boot/time.cm.mcode
+++ b/boot/time.cm.mcode
--- a/boot/tokenize.cm.mcode
+++ b/boot/tokenize.cm.mcode
--- a/boot/toml.cm.mcode
+++ b/boot/toml.cm.mcode
--- a/boot/toolchains.cm.mcode
+++ b/boot/toolchains.cm.mcode
--- a/boot/verify_ir.cm.mcode
+++ b/boot/verify_ir.cm.mcode
--- a/docs/spec/mach.md
+++ b/docs/spec/mach.md
@@ -82,3 +82,14 @@ Named property instructions (`LOAD_FIELD`, `STORE_FIELD`, `DELETE`) use the iABC
 2. `LOAD_DYNAMIC` / `STORE_DYNAMIC` / `DELETEINDEX` — use the register-based variant

 This is transparent to the mcode compiler and streamline optimizer.
+
+## Arithmetic Dispatch
+
+Arithmetic ops (ADD, SUB, MUL, DIV, MOD, POW) are executed inline without calling the polymorphic `reg_vm_binop()` helper. Since mcode's type guard dispatch guarantees both operands are numbers:
+
+1. **Int-int fast path**: `JS_VALUE_IS_BOTH_INT` → native integer arithmetic with int32 overflow check. Overflow promotes to float64.
+2. **Float fallback**: `JS_ToFloat64` → native floating-point operation. Non-finite results produce null.
+
+DIV and MOD check for zero divisor (→ null). POW uses `pow()` with non-finite handling for finite inputs.
+
+Comparison ops (EQ through GE) and bitwise ops still use `reg_vm_binop()` for their slow paths, as they handle a wider range of type combinations (string comparisons, null equality, etc.).
--- a/docs/spec/streamline.md
+++ b/docs/spec/streamline.md
@@ -45,11 +45,10 @@ Backward inference rules:

 | Operator class | Operand type inferred |
 |---|---|
-| `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate` | T_NUM |
-| `eq_int`, `ne_int`, `lt_int`, `gt_int`, `le_int`, `ge_int`, bitwise ops | T_INT |
-| `eq_float`, `ne_float`, `lt_float`, `gt_float`, `le_float`, `ge_float` | T_FLOAT |
-| `concat`, text comparisons | T_TEXT |
-| `eq_bool`, `ne_bool`, `not`, `and`, `or` | T_BOOL |
+| `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate` | T_NUM |
+| bitwise ops (`bitand`, `bitor`, `bitxor`, `shl`, `shr`, `ushr`, `bitnot`) | T_INT |
+| `concat` | T_TEXT |
+| `not`, `and`, `or` | T_BOOL |
 | `store_index` (object operand) | T_ARRAY |
 | `store_index` (index operand) | T_INT |
 | `store_field` (object operand) | T_RECORD |
@@ -59,9 +58,11 @@ Backward inference rules:
 | `load_field` (object operand) | T_RECORD |
 | `pop` (array operand) | T_ARRAY |

-Note: `add` is excluded from backward inference because it is polymorphic — it handles both numeric addition and text concatenation. Only operators that are unambiguously numeric can infer T_NUM.
+Typed comparison operators (`eq_int`, `lt_float`, `lt_text`, etc.) and typed boolean comparisons (`eq_bool`, `ne_bool`) are excluded from backward inference. These ops always appear inside guard dispatch patterns (`is_type` + `jump_false` + typed_op), where mutually exclusive branches use the same slot with different types. Including them would merge conflicting types (e.g., T_INT from `lt_int` + T_FLOAT from `lt_float` + T_TEXT from `lt_text`) into T_UNKNOWN, losing all type information. Only unconditionally executed ops contribute to backward inference.

-When a slot appears with conflicting type inferences, the result is `unknown`. INT + FLOAT conflicts produce `num`.
+Note: `add` infers T_NUM even though it is polymorphic (numeric addition or text concatenation). When `add` appears in the IR, both operands have already passed a `is_num` guard, so they are guaranteed to be numeric. The text concatenation path uses `concat` instead.
+
+When a slot appears with conflicting type inferences, the merge widens: INT + FLOAT → NUM, INT + NUM → NUM, FLOAT + NUM → NUM. Incompatible types (e.g., NUM + TEXT) produce `unknown`.

 **Nop prefix:** none (analysis only, does not modify instructions)

@@ -88,8 +89,9 @@ Write type mapping:
 | `length` | T_INT |
 | bitwise ops | T_INT |
 | `concat` | T_TEXT |
+| `negate` | T_NUM |
+| `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow` | T_NUM |
 | bool ops, comparisons, `in` | T_BOOL |
-| generic arithmetic (`add`, `subtract`, `negate`, etc.) | T_UNKNOWN |
 | `move`, `load_field`, `load_index`, `load_dynamic`, `pop`, `get` | T_UNKNOWN |
 | `invoke`, `tail_invoke` | T_UNKNOWN |

@@ -100,8 +102,9 @@ Common patterns this enables:
 - **Length variables** (`var len = length(arr)`): written by `length` (T_INT) only → invariant T_INT
 - **Boolean flags** (`var found = false; ... found = true`): written by `false` and `true` → invariant T_BOOL
 - **Locally-created containers** (`var arr = []`): written by `array` only → invariant T_ARRAY
+- **Numeric accumulators** (`var sum = 0; sum = sum - x`): written by `access 0` (T_INT) and `subtract` (T_NUM) → merges to T_NUM

-Note: Loop counters (`var i = 0; i = i + 1`) are NOT invariant because `add` produces T_UNKNOWN. However, if `i` is a function parameter used in arithmetic, backward inference from `subtract`/`multiply`/etc. will infer T_NUM for it, which persists across labels.
+Note: Loop counters using `+` (`var i = 0; i = i + 1`) may not achieve write-type invariance because the `+` operator emits a guard dispatch with both `concat` (T_TEXT) and `add` (T_NUM) paths writing to the same temp slot, producing T_UNKNOWN. However, when one operand is a known number literal, `mcode.cm` emits a numeric-only path (see "Known-Number Add Shortcut" below), avoiding the text dispatch. Other arithmetic ops (`-`, `*`, `/`, `%`, `**`) always emit a single numeric write path and work cleanly with write-type analysis.

 **Nop prefix:** none (analysis only, does not modify instructions)

@@ -109,9 +112,11 @@ Note: Loop counters (`var i = 0; i = i + 1`) are NOT invariant because `add` pro

 Forward pass that tracks the known type of each slot. When a type check (`is_int`, `is_text`, `is_num`, etc.) is followed by a conditional jump, and the slot's type is already known, the check and jump can be eliminated or converted to an unconditional jump.

-Three cases:
+Five cases:

 - **Known match** (e.g., `is_int` on a slot known to be `int`): both the check and the conditional jump are eliminated (nop'd).
+- **Subsumption match** (e.g., `is_num` on a slot known to be `int` or `float`): since `int` and `float` are subtypes of `num`, both the check and jump are eliminated.
+- **Subsumption partial** (e.g., `is_int` on a slot known to be `num`): the `num` type could be `int` or `float`, so the check must remain. On fallthrough, the slot narrows to the checked subtype (`int`). This is NOT a mismatch — `num` values can pass an `is_int` check.
 - **Known mismatch** (e.g., `is_text` on a slot known to be `int`): the check is nop'd and the conditional jump is rewritten to an unconditional `jump`.
 - **Unknown**: the check remains, but on fallthrough, the slot's type is narrowed to the checked type (enabling downstream eliminations).

@@ -212,12 +217,44 @@ These inlined opcodes have corresponding Mach VM implementations in `mach.c`.

 Arithmetic operations use generic opcodes: `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate`. There are no type-dispatched variants (e.g., no `add_int`/`add_float`).

-The Mach VM dispatches at runtime with an int-first fast path via `reg_vm_binop()`: it checks `JS_VALUE_IS_BOTH_INT` first for fast integer arithmetic, then falls back to float conversion, text concatenation (for `add` only), or type error.
+The Mach VM handles arithmetic inline with a two-tier fast path. Since mcode's type guard dispatch guarantees both operands are numbers by the time arithmetic executes, the VM does not need polymorphic dispatch:
+
+1. **Int-int fast path**: `JS_VALUE_IS_BOTH_INT` → native integer arithmetic with overflow check. If the result fits int32, returns int32; otherwise promotes to float64.
+2. **Float fallback**: `JS_ToFloat64` both operands → native floating-point arithmetic. Non-finite results (infinity, NaN) produce null.
+
+Division and modulo additionally check for zero divisor (→ null). Power uses `pow()` with non-finite handling.
+
+The legacy `reg_vm_binop()` function remains available for comparison operators and any non-mcode bytecode paths, but arithmetic ops no longer call it.

 Bitwise operations (`shl`, `shr`, `ushr`, `bitand`, `bitor`, `bitxor`, `bitnot`) remain integer-only and disrupt if operands are not integers.

 The QBE/native backend maps generic arithmetic to helper calls (`qbe.add`, `qbe.sub`, etc.). The vision for the native path is that with sufficient type inference, the backend can unbox proven-numeric values to raw registers, operate directly, and only rebox at boundaries (returns, calls, stores).

+## Known-Number Add Shortcut
+
+The `+` operator is the only arithmetic op that is polymorphic at the mcode level — `emit_add_decomposed` in `mcode.cm` emits a guard dispatch that checks for text (→ `concat`) before numeric (→ `add`). This dual dispatch means the temp slot is written by both `concat` (T_TEXT) and `add` (T_NUM), producing T_UNKNOWN in write-type analysis.
+
+When either operand is a known number literal (e.g., `i + 1`, `x + 0.5`), `emit_add_decomposed` skips the text dispatch entirely and emits `emit_numeric_binop("add")` — a single `is_num` guard + `add` with no `concat` path. This is safe because text concatenation requires both operands to be text; a known number can never participate in concat.
+
+This optimization eliminates 6-8 instructions from the add block (two `is_text` checks, two conditional jumps, `concat`, `jump`) and produces a clean single-type write path that works with write-type analysis.
+
+Other arithmetic ops (`subtract`, `multiply`, etc.) always use `emit_numeric_binop` and never have this problem.
+
+## Target Slot Propagation
+
+For simple local variable assignments (`i = expr`), the mcode compiler passes the variable's register slot as a `target` to the expression compiler. Binary operations that use `emit_numeric_binop` (subtract, multiply, divide, modulo, pow) can write directly to the target slot instead of allocating a temp and emitting a `move`:
+
+```
+// Before: i = i - 1
+subtract 7, 2, 6    // temp = i - 1
+move 2, 7           // i = temp
+
+// After: i = i - 1
+subtract 2, 2, 6    // i = i - 1 (direct)
+```
+
+The `+` operator is excluded from target slot propagation when it would use the full text+num dispatch (i.e., when neither operand is a known number), because writing both `concat` and `add` to the variable's slot would pollute its write type. When the known-number shortcut applies, `+` uses `emit_numeric_binop` and would be safe for target propagation, but this is not currently implemented — the exclusion is by operator kind, not by dispatch path.
+
 ## Debugging Tools

 Three dump tools inspect the IR at different stages:
@@ -295,6 +332,18 @@ The current purity set is conservative (only `is_*`). It could be expanded by:
 - **User function purity**: Analyze user-defined function bodies during pre_scan. A function is pure if its body contains only pure expressions and calls to known-pure functions. This requires fixpoint iteration for mutual recursion.
 - **Callback-aware purity**: Intrinsics like `filter`, `find`, `reduce`, `some`, `every` are pure if their callback argument is pure.

+### Move Type Resolution in Write-Type Analysis
+
+Currently, `move` instructions produce T_UNKNOWN in write-type analysis. This prevents type propagation through moves — e.g., a slot written by `access 0` (T_INT) and `move` from an `add` result (T_NUM) merges to T_UNKNOWN instead of T_NUM.
+
+A two-pass approach would fix this: first compute write types for all non-move instructions, then resolve moves by looking up the source slot's computed type. If the source has a known type, merge it into the destination; if unknown, skip the move (don't poison the destination with T_UNKNOWN).
+
+This was implemented and tested but causes a bootstrap failure during self-hosting convergence. The root cause is not yet understood — the optimizer modifies its own bytecode, and the move resolution changes the type landscape enough to produce different code on each pass, preventing convergence. Further investigation is needed; the fix is correct in isolation but interacts badly with the self-hosting fixed-point iteration.
+
+### Target Slot Propagation for Add with Known Numbers
+
+When the known-number add shortcut applies (one operand is a literal number), the generated code uses `emit_numeric_binop` which has a single write path. Target slot propagation should be safe in this case, but is currently blocked by the blanket `kind != "+"` exclusion. Refining the exclusion to check whether the shortcut will apply (by testing `is_known_number` on either operand) would enable direct writes for patterns like `i = i + 1`.
+
 ### Forward Type Narrowing from Typed Operations

 With unified arithmetic (generic `add`/`subtract`/`multiply`/`divide`/`modulo`/`negate` instead of typed variants), this approach is no longer applicable. Typed comparisons (`eq_int`, `lt_float`, etc.) still exist and their operands have known types, but these are already handled by backward inference.
--- a/mcode.cm
+++ b/mcode.cm
@@ -291,6 +291,11 @@ var mcode = function(ast) {
      emit_3("add", _bp_dest, _bp_left, _bp_right)
      return null
    }
+    // If either operand is a known number, concat is impossible
+    if (is_known_number(_bp_ln) || is_known_number(_bp_rn)) {
+      emit_numeric_binop("add")
+      return null
+    }
    // Unknown types: emit full dispatch
    var t0 = alloc_slot()
    var t1 = alloc_slot()
@@ -1217,7 +1222,7 @@ var mcode = function(ast) {
  }

  // Binary expression compilation
-  var gen_binary = function(node) {
+  var gen_binary = function(node, target) {
    var kind = node.kind
    var left = node.left
    var right = node.right
@@ -1272,7 +1277,8 @@ var mcode = function(ast) {
    // Standard binary ops
    left_slot = gen_expr(left, -1)
    right_slot = gen_expr(right, -1)
-    dest = alloc_slot()
+    // Use target slot for ops without multi-type dispatch (add has text+num paths)
+    dest = (target >= 0 && kind != "+") ? target : alloc_slot()
    op = binop_map[kind]
    if (op == null) {
      op = "add"
@@ -1426,9 +1432,9 @@ var mcode = function(ast) {
      return val_slot
    }

-    val_slot = gen_expr(right, -1)
    left_kind = left.kind

+    // For local name assignments, try to write directly to the var's slot
    if (left_kind == "name") {
      name = left.name
      level = left.level
@@ -1438,17 +1444,30 @@ var mcode = function(ast) {
      if (level == 0 || level == -1) {
        slot = find_var(name)
        if (slot >= 0) {
-          emit_2("move", slot, val_slot)
-        } else if (level == -1) {
+          val_slot = gen_expr(right, slot)
+          if (val_slot != slot) {
+            emit_2("move", slot, val_slot)
+          }
+          return val_slot
+        }
+        val_slot = gen_expr(right, -1)
+        if (level == -1) {
          add_instr(["set_var", name, val_slot])
        }
-      } else if (level > 0) {
-        _lv = level - 1
-        pstate = parent_states[length(parent_states) - 1 - _lv]
-        pslot = find_var_in_saved(pstate, name)
-        emit_3("put", val_slot, pslot, level)
+      } else {
+        val_slot = gen_expr(right, -1)
+        if (level > 0) {
+          _lv = level - 1
+          pstate = parent_states[length(parent_states) - 1 - _lv]
+          pslot = find_var_in_saved(pstate, name)
+          emit_3("put", val_slot, pslot, level)
+        }
      }
-    } else if (left_kind == ".") {
+      return val_slot
+    }
+
+    val_slot = gen_expr(right, -1)
+    if (left_kind == ".") {
      obj = left.left
      prop = left.right
      obj_slot = gen_expr(obj, -1)
@@ -2045,7 +2064,7 @@ var mcode = function(ast) {
    }

    // Binary operators (fallback)
-    return gen_binary(expr)
+    return gen_binary(expr, target)
  }

  // Statement compilation
--- a/meson.build
+++ b/meson.build
@@ -26,6 +26,7 @@ if get_option('force_gc')
  add_project_arguments('-DFORCE_GC_AT_MALLOC', language: 'c')
 endif

+
 deps = []

 if host_machine.system() == 'darwin'
--- a/prettify_mcode.ce
+++ b/prettify_mcode.ce
@@ -1,116 +0,0 @@
-// prettify_mcode.ce — reformat .mcode files to be human-readable
-// Usage: ./cell --dev prettify_mcode boot/tokenize.cm.mcode
-//        ./cell --dev prettify_mcode boot/*.mcode
-
-var fd = use("fd")
-var json = use("json")
-
-if (length(args) == 0) {
-  print("usage: cell prettify_mcode <file.mcode> [...]")
-  disrupt
-}
-
-// Collapse leaf arrays (instruction arrays) onto single lines
-var compact_arrays = function(json_text) {
-  var lines = array(json_text, "\n")
-  var result = []
-  var i = 0
-  var line = null
-  var trimmed = null
-  var collecting = false
-  var collected = null
-  var indent = null
-  var is_leaf = null
-  var j = 0
-  var inner = null
-  var parts = null
-  var trailing = null
-  var chars = null
-  var k = 0
-
-  while (i < length(lines)) {
-    line = lines[i]
-    trimmed = trim(line)
-    if (collecting == false && trimmed == "[") {
-      collecting = true
-      chars = array(line)
-      k = 0
-      while (k < length(chars) && chars[k] == " ") {
-        k = k + 1
-      }
-      indent = text(line, 0, k)
-      collected = []
-      i = i + 1
-      continue
-    }
-    if (collecting) {
-      if (trimmed == "]" || trimmed == "],") {
-        is_leaf = true
-        j = 0
-        while (j < length(collected)) {
-          inner = trim(collected[j])
-          if (starts_with(inner, "[") || starts_with(inner, "{")) {
-            is_leaf = false
-          }
-          j = j + 1
-        }
-        if (is_leaf && length(collected) > 0) {
-          parts = []
-          j = 0
-          while (j < length(collected)) {
-            inner = trim(collected[j])
-            if (ends_with(inner, ",")) {
-              inner = text(inner, 0, length(inner) - 1)
-            }
-            parts[] = inner
-            j = j + 1
-          }
-          trailing = ""
-          if (ends_with(trimmed, ",")) {
-            trailing = ","
-          }
-          result[] = `${indent}[${text(parts, ", ")}]${trailing}`
-        } else {
-          result[] = `${indent}[`
-          j = 0
-          while (j < length(collected)) {
-            result[] = collected[j]
-            j = j + 1
-          }
-          result[] = line
-        }
-        collecting = false
-      } else {
-        collected[] = line
-      }
-      i = i + 1
-      continue
-    }
-    result[] = line
-    i = i + 1
-  }
-  return text(result, "\n")
-}
-
-var i = 0
-var path = null
-var raw = null
-var obj = null
-var pretty = null
-var f = null
-while (i < length(args)) {
-  path = args[i]
-  if (!fd.is_file(path)) {
-    print(`skip ${path} (not found)`)
-    i = i + 1
-    continue
-  }
-  raw = text(fd.slurp(path))
-  obj = json.decode(raw)
-  pretty = compact_arrays(json.encode(obj, null, 2))
-  f = fd.open(path, "w")
-  fd.write(f, pretty)
-  fd.close(f)
-  print(`prettified ${path}`)
-  i = i + 1
-}
--- a/source/mach.c
+++ b/source/mach.c
--- a/source/quickjs.h
+++ b/source/quickjs.h
@@ -588,7 +588,8 @@ JSValue JS_ParseJSON (JSContext *ctx, const char *buf, size_t buf_len,
 JSValue JS_ParseJSON2 (JSContext *ctx, const char *buf, size_t buf_len,
                       const char *filename, int flags);
 JSValue JS_JSONStringify (JSContext *ctx, JSValue obj,
-                          JSValue replacer, JSValue space0);
+                          JSValue replacer, JSValue space0,
+                          JS_BOOL compact_arrays);

 /* ============================================================
   9. Intrinsic Wrappers (JS_Cell* / JS_Array*)
--- a/source/runtime.c
+++ b/source/runtime.c
@@ -1492,6 +1492,10 @@ void gc_scan_object (JSContext *ctx, void *ptr, uint8_t *from_base, uint8_t *fro
   allow_grow: if true, grow heap when recovery is poor
   alloc_size: the allocation that triggered GC — used to size the new block */
 int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size) {
+#ifdef DUMP_GC_TIMING
+  struct timespec gc_t0, gc_t1;
+  clock_gettime(CLOCK_MONOTONIC, &gc_t0);
+#endif
  JSRuntime *rt = ctx->rt;
  size_t old_used = ctx->heap_free - ctx->heap_base;
  size_t old_heap_size = ctx->current_block_size;
@@ -1692,6 +1696,16 @@ int ctx_gc (JSContext *ctx, int allow_grow, size_t alloc_size) {
  ctx->gc_bytes_copied += new_used;
  size_t recovered = old_used > new_used ? old_used - new_used : 0;

+#ifdef DUMP_GC_TIMING
+  clock_gettime(CLOCK_MONOTONIC, &gc_t1);
+  double gc_ms = (gc_t1.tv_sec - gc_t0.tv_sec) * 1000.0 +
+                 (gc_t1.tv_nsec - gc_t0.tv_nsec) / 1e6;
+  fprintf(stderr, "GC #%u: %.2f ms | copied %zu KB | old %zu KB -> new %zu KB | recovered %zu KB (%.0f%%)\n",
+          ctx->gc_count, gc_ms,
+          new_used / 1024, old_used / 1024, new_size / 1024,
+          recovered / 1024,
+          old_used > 0 ? (100.0 * recovered / old_used) : 0.0);
+#endif

  ctx->heap_base = to_base;
  ctx->heap_free = to_free;
@@ -5614,6 +5628,8 @@ typedef struct JSONStringifyContext {
  JSValue gap;
  JSValue empty;
  JSGCRef b_root;  /* GC root for buffer - use JSC_B_GET/SET macros */
+  BOOL compact_arrays;
+  BOOL in_compact_array;
 } JSONStringifyContext;

 /* Macros to access the buffer from the rooted JSValue */
@@ -5718,7 +5734,7 @@ static int js_json_to_str (JSContext *ctx, JSONStringifyContext *jsc, JSValue ho
    }
    indent1_ref.val = JS_ConcatString (ctx, indent_ref.val, jsc->gap);
    if (JS_IsException (indent1_ref.val)) goto exception;
-    if (!JS_IsEmptyString (jsc->gap)) {
+    if (!JS_IsEmptyString (jsc->gap) && !jsc->in_compact_array) {
      sep_ref.val = JS_ConcatString3 (ctx, "\n", indent1_ref.val, "");
      if (JS_IsException (sep_ref.val)) goto exception;
      sep1_ref.val = js_new_string8 (ctx, " ");
@@ -5733,12 +5749,49 @@ static int js_json_to_str (JSContext *ctx, JSONStringifyContext *jsc, JSValue ho
    if (ret < 0) goto exception;
    if (ret) {
      if (js_get_length64 (ctx, &len, val_ref.val)) goto exception;
+      /* Check if this is a leaf array for compact mode.
+         Leaf = no element is an array, and no element is an object
+         that has array-valued properties. */
+      BOOL was_compact = jsc->in_compact_array;
+      if (jsc->compact_arrays && !jsc->in_compact_array && !JS_IsEmptyString (jsc->gap)) {
+        BOOL is_leaf = TRUE;
+        for (i = 0; i < len && is_leaf; i++) {
+          v = JS_GetPropertyNumber (ctx, val_ref.val, i);
+          if (JS_IsException (v)) goto exception;
+          if (JS_IsArray (v) > 0) {
+            is_leaf = FALSE;
+          } else if (mist_is_gc_object (v) && !JS_IsText (v)) {
+            /* Element is an object — check if any property is an array */
+            v_ref.val = v;
+            prop_ref.val = JS_GetOwnPropertyNames (ctx, v_ref.val);
+            if (!JS_IsException (prop_ref.val)) {
+              int64_t nprops;
+              if (!js_get_length64 (ctx, &nprops, prop_ref.val)) {
+                for (int64_t j = 0; j < nprops && is_leaf; j++) {
+                  JSValue key = JS_GetPropertyNumber (ctx, prop_ref.val, j);
+                  if (!JS_IsException (key)) {
+                    JSValue pval = JS_GetPropertyValue (ctx, v_ref.val, key);
+                    if (JS_IsArray (pval) > 0) is_leaf = FALSE;
+                  }
+                }
+              }
+            }
+            v_ref.val = JS_NULL;
+            prop_ref.val = JS_NULL;
+          }
+        }
+        if (is_leaf) jsc->in_compact_array = TRUE;
+      }
      JSC_B_PUTC (jsc, '[');
      for (i = 0; i < len; i++) {
        if (i > 0) {
          JSC_B_PUTC (jsc, ',');
        }
-        JSC_B_CONCAT (jsc, sep_ref.val);
+        if (jsc->in_compact_array && !was_compact) {
+          if (i > 0) JSC_B_PUTC (jsc, ' ');
+        } else {
+          JSC_B_CONCAT (jsc, sep_ref.val);
+        }
        v = JS_GetPropertyNumber (ctx, val_ref.val, i);
        if (JS_IsException (v)) goto exception;
        v_ref.val = v; /* root v — JS_ToString below can trigger GC */
@@ -5751,11 +5804,12 @@ static int js_json_to_str (JSContext *ctx, JSONStringifyContext *jsc, JSValue ho
        if (JS_IsNull (v)) v = JS_NULL;
        if (js_json_to_str (ctx, jsc, val_ref.val, v, indent1_ref.val)) goto exception;
      }
-      if (len > 0 && !JS_IsEmptyString (jsc->gap)) {
+      if (len > 0 && !JS_IsEmptyString (jsc->gap) && !jsc->in_compact_array) {
        JSC_B_PUTC (jsc, '\n');
        JSC_B_CONCAT (jsc, indent_ref.val);
      }
      JSC_B_PUTC (jsc, ']');
+      jsc->in_compact_array = was_compact;
    } else {
      if (!JS_IsNull (jsc->property_list))
        tab_ref.val = jsc->property_list;
@@ -5789,7 +5843,7 @@ static int js_json_to_str (JSContext *ctx, JSONStringifyContext *jsc, JSValue ho
          has_content = TRUE;
        }
      }
-      if (has_content && !JS_IsEmptyString (jsc->gap)) {
+      if (has_content && !JS_IsEmptyString (jsc->gap) && !jsc->in_compact_array) {
        JSC_B_PUTC (jsc, '\n');
        JSC_B_CONCAT (jsc, indent_ref.val);
      }
@@ -5853,7 +5907,7 @@ exception:
  return -1;
 }

-JSValue JS_JSONStringify (JSContext *ctx, JSValue obj, JSValue replacer, JSValue space0) {
+JSValue JS_JSONStringify (JSContext *ctx, JSValue obj, JSValue replacer, JSValue space0, BOOL compact_arrays) {
  JSONStringifyContext jsc_s, *jsc = &jsc_s;
  JSValue val, v, space, ret, wrapper;
  int res;
@@ -5871,6 +5925,8 @@ JSValue JS_JSONStringify (JSContext *ctx, JSValue obj, JSValue replacer, JSValue
  jsc->property_list = JS_NULL;
  jsc->gap = JS_NULL;
  jsc->empty = JS_KEY_empty;
+  jsc->compact_arrays = compact_arrays;
+  jsc->in_compact_array = FALSE;
  ret = JS_NULL;
  wrapper = JS_NULL;

@@ -11146,9 +11202,14 @@ static JSValue js_cell_json_encode (JSContext *ctx, JSValue this_val, int argc,
  if (argc < 1)
    return JS_ThrowTypeError (ctx, "json.encode requires at least 1 argument");

-  JSValue replacer = argc > 1 ? argv[1] : JS_NULL;
-  JSValue space = argc > 2 ? argv[2] : JS_NewInt32 (ctx, 1);
-  JSValue result = JS_JSONStringify (ctx, argv[0], replacer, space);
+  BOOL pretty = argc <= 1 || JS_ToBool (ctx, argv[1]);
+  JSValue space = pretty ? JS_NewInt32 (ctx, 2) : JS_NULL;
+  JSValue replacer = JS_NULL;
+  if (argc > 2 && JS_IsFunction (argv[2]))
+    replacer = argv[2];
+  else if (argc > 3 && JS_IsArray (argv[3]))
+    replacer = argv[3];
+  JSValue result = JS_JSONStringify (ctx, argv[0], replacer, space, pretty);
  return result;
 }

--- a/source/suite.c
+++ b/source/suite.c
@@ -1428,7 +1428,7 @@ TEST(stringify_json_object) {
  obj_ref.val = JS_NewObject(ctx);
  JSValue v1 = JS_NewInt32(ctx, 1);
  JS_SetPropertyStr(ctx, obj_ref.val, "a", v1);
-  JSValue str = JS_JSONStringify(ctx, obj_ref.val, JS_NULL, JS_NULL);
+  JSValue str = JS_JSONStringify(ctx, obj_ref.val, JS_NULL, JS_NULL, 0);
  JS_PopGCRef(ctx, &obj_ref);
  ASSERT(JS_IsText(str));
  const char *s = JS_ToCString(ctx, str);
@@ -1444,7 +1444,7 @@ TEST(stringify_json_array) {
  arr_ref.val = JS_NewArray(ctx);
  JS_ArrayPush(ctx, &arr_ref.val, JS_NewInt32(ctx, 1));
  JS_ArrayPush(ctx, &arr_ref.val, JS_NewInt32(ctx, 2));
-  JSValue str = JS_JSONStringify(ctx, arr_ref.val, JS_NULL, JS_NULL);
+  JSValue str = JS_JSONStringify(ctx, arr_ref.val, JS_NULL, JS_NULL, 0);
  JS_PopGCRef(ctx, &arr_ref);
  ASSERT(JS_IsText(str));
  const char *s = JS_ToCString(ctx, str);
--- a/streamline.cm
+++ b/streamline.cm
@@ -185,9 +185,9 @@ var streamline = function(ir, log) {
      backward_types[slot] = typ
    } else if (existing != typ && existing != T_UNKNOWN) {
      if ((existing == T_INT || existing == T_FLOAT) && typ == T_NUM) {
-        // Keep more specific
+        backward_types[slot] = T_NUM
      } else if (existing == T_NUM && (typ == T_INT || typ == T_FLOAT)) {
-        backward_types[slot] = typ
+        // Keep wider T_NUM
      } else if ((existing == T_INT && typ == T_FLOAT) || (existing == T_FLOAT && typ == T_INT)) {
        backward_types[slot] = T_NUM
      } else {
@@ -230,21 +230,11 @@ var streamline = function(ir, log) {
    subtract: [2, T_NUM, 3, T_NUM], multiply: [2, T_NUM, 3, T_NUM],
    divide: [2, T_NUM, 3, T_NUM], modulo: [2, T_NUM, 3, T_NUM],
    pow: [2, T_NUM, 3, T_NUM], negate: [2, T_NUM],
-    eq_int: [2, T_INT, 3, T_INT], ne_int: [2, T_INT, 3, T_INT],
-    lt_int: [2, T_INT, 3, T_INT], gt_int: [2, T_INT, 3, T_INT],
-    le_int: [2, T_INT, 3, T_INT], ge_int: [2, T_INT, 3, T_INT],
    bitand: [2, T_INT, 3, T_INT], bitor: [2, T_INT, 3, T_INT],
    bitxor: [2, T_INT, 3, T_INT], shl: [2, T_INT, 3, T_INT],
    shr: [2, T_INT, 3, T_INT], ushr: [2, T_INT, 3, T_INT],
    bitnot: [2, T_INT],
-    eq_float: [2, T_FLOAT, 3, T_FLOAT], ne_float: [2, T_FLOAT, 3, T_FLOAT],
-    lt_float: [2, T_FLOAT, 3, T_FLOAT], gt_float: [2, T_FLOAT, 3, T_FLOAT],
-    le_float: [2, T_FLOAT, 3, T_FLOAT], ge_float: [2, T_FLOAT, 3, T_FLOAT],
    concat: [2, T_TEXT, 3, T_TEXT],
-    eq_text: [2, T_TEXT, 3, T_TEXT], ne_text: [2, T_TEXT, 3, T_TEXT],
-    lt_text: [2, T_TEXT, 3, T_TEXT], gt_text: [2, T_TEXT, 3, T_TEXT],
-    le_text: [2, T_TEXT, 3, T_TEXT], ge_text: [2, T_TEXT, 3, T_TEXT],
-    eq_bool: [2, T_BOOL, 3, T_BOOL], ne_bool: [2, T_BOOL, 3, T_BOOL],
    not: [2, T_BOOL], and: [2, T_BOOL, 3, T_BOOL], or: [2, T_BOOL, 3, T_BOOL],
    store_index: [1, T_ARRAY, 2, T_INT], store_field: [1, T_RECORD],
    push: [1, T_ARRAY],
@@ -311,11 +301,11 @@ var streamline = function(ir, log) {
    function: [1, T_FUNCTION], length: [1, T_INT],
    bitnot: [1, T_INT], bitand: [1, T_INT], bitor: [1, T_INT],
    bitxor: [1, T_INT], shl: [1, T_INT], shr: [1, T_INT], ushr: [1, T_INT],
-    negate: [1, T_UNKNOWN], concat: [1, T_TEXT],
+    negate: [1, T_NUM], concat: [1, T_TEXT],
    eq: [1, T_BOOL], ne: [1, T_BOOL], lt: [1, T_BOOL],
    le: [1, T_BOOL], gt: [1, T_BOOL], ge: [1, T_BOOL], in: [1, T_BOOL],
-    add: [1, T_UNKNOWN], subtract: [1, T_UNKNOWN], multiply: [1, T_UNKNOWN],
-    divide: [1, T_UNKNOWN], modulo: [1, T_UNKNOWN], pow: [1, T_UNKNOWN],
+    add: [1, T_NUM], subtract: [1, T_NUM], multiply: [1, T_NUM],
+    divide: [1, T_NUM], modulo: [1, T_NUM], pow: [1, T_NUM],
    move: [1, T_UNKNOWN], load_field: [1, T_UNKNOWN],
    load_index: [1, T_UNKNOWN], load_dynamic: [1, T_UNKNOWN],
    pop: [1, T_UNKNOWN], get: [1, T_UNKNOWN],
@@ -510,6 +500,13 @@ var streamline = function(ir, log) {
                i = i + 2
                continue
              }
+              if ((checked_type == T_INT || checked_type == T_FLOAT) && src_known == T_NUM) {
+                // T_NUM could be int or float — not a mismatch, keep check
+                slot_types[dest] = T_BOOL
+                slot_types[src] = checked_type
+                i = i + 2
+                continue
+              }
              nc = nc + 1
              instructions[i] = "_nop_tc_" + text(nc)
              jlen = length(next)
@@ -579,6 +576,12 @@ var streamline = function(ir, log) {
                i = i + 2
                continue
              }
+              if ((checked_type == T_INT || checked_type == T_FLOAT) && src_known == T_NUM) {
+                // T_NUM could be int or float — not a mismatch, keep check
+                slot_types[dest] = T_BOOL
+                i = i + 2
+                continue
+              }
              nc = nc + 1
              instructions[i] = "_nop_tc_" + text(nc)
              nc = nc + 1