From 2f681fa36657e93ae7568fe33860258ffa3d2c75 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Tue, 10 Feb 2026 17:38:15 -0600 Subject: [PATCH] output for parser stages and c runtime doc --- docs/spec/c-runtime.md | 296 +++++++++++++++++++++++++++++++++++++++++ mcode.ce | 6 +- parse.ce | 3 +- qbe.ce | 18 +++ streamline.ce | 15 +++ tokenize.ce | 1 + 6 files changed, 336 insertions(+), 3 deletions(-) create mode 100644 docs/spec/c-runtime.md create mode 100644 qbe.ce create mode 100644 streamline.ce diff --git a/docs/spec/c-runtime.md b/docs/spec/c-runtime.md new file mode 100644 index 00000000..a35c0926 --- /dev/null +++ b/docs/spec/c-runtime.md @@ -0,0 +1,296 @@ +--- +title: "C Runtime for Native Code" +description: "Minimum C runtime surface for QBE-generated native code" +--- + +## Overview + +QBE-generated native code calls into a C runtime for anything that touches the heap, dispatches dynamically, or requires GC awareness. The design principle: **native code handles control flow and integer math directly; everything else is a runtime call.** + +This document defines the runtime boundary — what must be in C, what QBE handles inline, and how to organize the C code to serve both the mcode interpreter and native code cleanly. + +## The Boundary + +### What native code does inline (no C calls) + +These operations compile to straight QBE instructions with no runtime involvement: + +- **Integer arithmetic**: `add`, `sub`, `mul` on NaN-boxed ints (shift right 1, operate, shift left 1) +- **Integer comparisons**: extract int with shift, compare, produce tagged bool +- **Control flow**: jumps, branches, labels, function entry/exit +- **Slot access**: load/store to frame slots via `%fp` + offset +- **NaN-box tagging**: integer tagging (`n << 1`), bool constants (`0x03`/`0x23`), null (`0x07`) +- **Type tests**: `JS_IsInt` (LSB check), `JS_IsNumber`, `JS_IsText`, `JS_IsNull` — these are bit tests on the value, no heap access needed + +### What requires a C call + +Anything that: +1. **Allocates** (arrays, records, strings, frames, function objects) +2. **Touches the heap** (property get/set, array indexing, closure access) +3. **Dispatches on type at runtime** (dynamic load/store, polymorphic arithmetic) +4. **Calls user functions** (frame setup, argument passing, invocation) +5. **Does string operations** (concatenation, comparison, conversion) + +## Runtime Functions + +### Tier 1: Essential (must exist for any program to run) + +These are called by virtually every QBE program. + +#### Intrinsic Lookup + +```c +// Look up a built-in function by name. Called once per intrinsic per callsite. +JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name); +``` + +Maps name → C function pointer wrapped in JSValue. This is the primary entry point for all built-in functions (`print`, `text`, `length`, `is_array`, etc). The native code never calls intrinsics directly — it always goes through `get_intrinsic` → `frame` → `invoke`. + +#### Function Calls + +```c +// Allocate a call frame with space for nr_args arguments. +JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int nr_args); + +// Set argument idx in the frame. +void cell_rt_setarg(JSValue frame, int idx, JSValue val); + +// Execute the function. Returns the result. +JSValue cell_rt_invoke(JSContext *ctx, JSValue frame); +``` + +This is the universal calling convention. Every function call — user functions, intrinsics, methods — goes through frame/setarg/invoke. The frame allocates a `JSFrameRegister` on the GC heap, setarg fills slots, invoke dispatches. + +**Tail call variants:** + +```c +JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int nr_args); +void cell_rt_goinvoke(JSContext *ctx, JSValue frame); +``` + +Same as frame/invoke but reuse the caller's stack position. + +### Tier 2: Property Access (needed by any program using records or arrays) + +```c +// Record field by constant name. +JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name); +void cell_rt_store_field(JSContext *ctx, JSValue obj, JSValue val, const char *name); + +// Array element by integer index. +JSValue cell_rt_load_index(JSContext *ctx, JSValue obj, JSValue idx); +void cell_rt_store_index(JSContext *ctx, JSValue obj, JSValue idx, JSValue val); + +// Dynamic — type of key unknown at compile time. +JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key); +void cell_rt_store_dynamic(JSContext *ctx, JSValue obj, JSValue key, JSValue val); +``` + +The typed variants (`load_field`/`load_index`) skip the key-type dispatch that `load_dynamic` must do. When parse and fold provide type information, QBE emit selects the typed variant and the streamline optimizer can narrow dynamic → typed. + +**Implementation**: These are thin wrappers around existing `JS_GetPropertyStr`/`JS_GetPropertyNumber`/`JS_GetProperty` and their `Set` counterparts. + +### Tier 3: Closures (needed by programs with nested functions) + +```c +// Walk depth levels up the frame chain, read slot. +JSValue cell_rt_get_closure(JSContext *ctx, JSValue fp, int depth, int slot); + +// Walk depth levels up, write slot. +void cell_rt_put_closure(JSContext *ctx, JSValue fp, JSValue val, int depth, int slot); +``` + +Closure variables live in outer frames. `depth` is how many `caller` links to follow; `slot` is the register index in that frame. + +### Tier 4: Object Construction (needed by programs creating arrays/records/functions) + +```c +// Create a function object from a compiled function index. +// The native code loader must maintain a function table. +JSValue cell_rt_make_function(JSContext *ctx, int fn_id); +``` + +Array and record literals are currently compiled as intrinsic calls (`array(...)`, direct `{...}` construction) which go through the frame/invoke path. A future optimization could add: + +```c +// Fast paths (optional, not yet needed) +JSValue cell_rt_new_array(JSContext *ctx, int len); +JSValue cell_rt_new_record(JSContext *ctx); +``` + +### Tier 5: Collection Operations + +```c +// a[] = val (push) and var v = a[] (pop) +void cell_rt_push(JSContext *ctx, JSValue arr, JSValue val); +JSValue cell_rt_pop(JSContext *ctx, JSValue arr); +``` + +### Tier 6: Error Handling + +```c +// Trigger disruption. Jumps to the disrupt handler or unwinds. +void cell_rt_disrupt(JSContext *ctx); +``` + +### Tier 7: Miscellaneous + +```c +JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key); +JSValue cell_rt_typeof(JSContext *ctx, JSValue val); +``` + +### Tier 8: String and Float Helpers (called from QBE inline code, not from qbe_emit) + +These are called from the QBE IL that `qbe.cm` generates inline for arithmetic and comparison operations. They're not `cell_rt_` prefixed — they're lower-level: + +```c +// Float arithmetic (when operands aren't both ints) +JSValue qbe_float_add(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_float_sub(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_float_mul(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_float_div(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_float_mod(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_float_pow(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_float_neg(JSContext *ctx, JSValue v); +JSValue qbe_float_inc(JSContext *ctx, JSValue v); +JSValue qbe_float_dec(JSContext *ctx, JSValue v); + +// Float comparison (returns C int 0/1 for QBE branching) +int qbe_float_cmp(JSContext *ctx, int op, JSValue a, JSValue b); + +// Bitwise ops on non-int values (convert to int32 first) +JSValue qbe_bnot(JSContext *ctx, JSValue v); +JSValue qbe_bitwise_and(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_bitwise_or(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_bitwise_xor(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_shift_shl(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_shift_sar(JSContext *ctx, JSValue a, JSValue b); +JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b); + +// String operations +JSValue JS_ConcatString(JSContext *ctx, JSValue a, JSValue b); +int js_string_compare_value(JSContext *ctx, JSValue a, JSValue b, int eq_only); +JSValue JS_NewString(JSContext *ctx, const char *str); +JSValue __JS_NewFloat64(JSContext *ctx, double d); +int JS_ToBool(JSContext *ctx, JSValue v); + +// String/number type tests (inline-able but currently calls) +int JS_IsText(JSValue v); +int JS_IsNumber(JSValue v); + +// Tolerant equality (== on mixed types) +JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b); +JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b); + +// Text ordering comparisons +JSValue cell_rt_lt_text(JSContext *ctx, JSValue a, JSValue b); +JSValue cell_rt_le_text(JSContext *ctx, JSValue a, JSValue b); +JSValue cell_rt_gt_text(JSContext *ctx, JSValue a, JSValue b); +JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b); +``` + +## What Exists vs What Needs Writing + +### Already exists (in qbe_helpers.c) + +All `qbe_float_*`, `qbe_bnot`, `qbe_bitwise_*`, `qbe_shift_*`, `qbe_to_bool` — these are implemented and working. + +### Already exists (in runtime.c / quickjs.c) but not yet wrapped + +The underlying operations exist but aren't exposed with the `cell_rt_` names: + +| Runtime function | Underlying implementation | +|---|---| +| `cell_rt_load_field` | `JS_GetPropertyStr(ctx, obj, name)` | +| `cell_rt_load_index` | `JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx))` | +| `cell_rt_load_dynamic` | `JS_GetProperty(ctx, obj, key)` | +| `cell_rt_store_field` | `JS_SetPropertyStr(ctx, obj, name, val)` | +| `cell_rt_store_index` | `JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx), val)` | +| `cell_rt_store_dynamic` | `JS_SetProperty(ctx, obj, key, val)` | +| `cell_rt_delete` | `JS_DeleteProperty(ctx, obj, key)` | +| `cell_rt_push` | `JS_ArrayPush(ctx, &arr, val)` | +| `cell_rt_pop` | `JS_ArrayPop(ctx, arr)` | +| `cell_rt_typeof` | type tag switch → `JS_NewString` | +| `cell_rt_disrupt` | `JS_Throw(ctx, ...)` | +| `cell_rt_eq_tol` / `cell_rt_ne_tol` | comparison logic in mcode.c `eq_tol`/`ne_tol` handler | +| `cell_rt_lt_text` etc. | `js_string_compare_value` + wrap result | + +### Needs new code + +| Runtime function | What's needed | +|---|---| +| `cell_rt_get_intrinsic` | Look up intrinsic by name string, return JSValue function. Currently scattered across `js_cell_intrinsic_get` and the mcode handler. Needs a clean single entry point. | +| `cell_rt_frame` | Allocate `JSFrameRegister`, set function slot, set argc. Exists in mcode.c `frame` handler but not as a callable function. | +| `cell_rt_setarg` | Write to frame slot. Trivial: `frame->slots[idx + 1] = val` (slot 0 is `this`). | +| `cell_rt_invoke` | Call the function in the frame. Needs to dispatch: native C function vs mach bytecode vs mcode. This is the critical piece — it must handle all function types. | +| `cell_rt_goframe` / `cell_rt_goinvoke` | Tail call variants. Similar to frame/invoke but reuse caller frame. | +| `cell_rt_make_function` | Create function object from index. Needs a function table (populated by the native loader). | +| `cell_rt_get_closure` / `cell_rt_put_closure` | Walk frame chain. Exists inline in mcode.c `get`/`put` handlers. | + +## Recommended C File Organization + +``` +source/ + cell_runtime.c — NEW: all cell_rt_* functions (the native code API) + qbe_helpers.c — existing: float/bitwise/shift helpers for inline QBE + runtime.c — existing: JS_GetProperty, JS_SetProperty, etc. + quickjs.c — existing: core VM, GC, value representation + mcode.c — existing: mcode interpreter (can delegate to cell_runtime.c) +``` + +**`cell_runtime.c`** is the single file that defines the native code contract. It should: + +1. Include `quickjs-internal.h` for access to value representation and heap types +2. Export all `cell_rt_*` functions with C linkage (no `static`) +3. Keep each function thin — delegate to existing `JS_*` functions where possible +4. Handle GC safety: after any allocation (frame, string, array), callers' frames may have moved + +### Implementation Priority + +**Phase 1** — Get "hello world" running natively: +- `cell_rt_get_intrinsic` (to find `print` and `text`) +- `cell_rt_frame`, `cell_rt_setarg`, `cell_rt_invoke` (to call them) +- A loader that takes QBE output → assembles → links → calls `cell_main` + +**Phase 2** — Variables and arithmetic: +- All property access (`load_field`, `load_index`, `store_*`, `load_dynamic`) +- `cell_rt_make_function`, `cell_rt_get_closure`, `cell_rt_put_closure` + +**Phase 3** — Full language: +- `cell_rt_push`, `cell_rt_pop`, `cell_rt_delete`, `cell_rt_typeof` +- `cell_rt_disrupt` +- `cell_rt_goframe`, `cell_rt_goinvoke` +- Text comparison wrappers (`cell_rt_lt_text`, etc.) +- Tolerant equality (`cell_rt_eq_tol`, `cell_rt_ne_tol`) + +## Calling Convention + +All `cell_rt_*` functions follow the same pattern: + +- First argument is always `JSContext *ctx` +- Values are passed/returned as `JSValue` (64-bit, by value) +- Frame pointers are `JSValue` (tagged pointer to `JSFrameRegister`) +- String names are `const char *` (pointer to data section label) +- Integer constants (slot indices, arg counts) are `int` / `long` + +Native code maintains `%ctx` (JSContext) and `%fp` (current frame pointer) as persistent values across the function body. All slot reads/writes go through `%fp` + offset. + +## What Should NOT Be in the C Runtime + +These are handled entirely by QBE-generated code: + +- **Integer arithmetic and comparisons** — bit operations on NaN-boxed values +- **Control flow** — branches, loops, labels, jumps +- **Boolean logic** — `and`/`or`/`not` on tagged values +- **Constant loading** — integer constants are immediate, strings are data labels +- **Type guard branches** — the `is_int`/`is_text`/`is_null` checks are inline bit tests; the branch to the float or text path is just a QBE `jnz` + +The `qbe.cm` macros already handle all of this. The arithmetic path looks like: + +``` +check both ints? → yes → inline int add → done + → no → call qbe_float_add (or JS_ConcatString for text) +``` + +The C runtime is only called on the slow paths (float, text, dynamic dispatch). The fast path (integer arithmetic, comparisons, branching) is fully native. diff --git a/mcode.ce b/mcode.ce index f6614950..eb0708d4 100644 --- a/mcode.ce +++ b/mcode.ce @@ -2,10 +2,12 @@ var fd = use("fd") var json = use("json") var tokenize = use("tokenize") var parse = use("parse") +var fold = use("fold") var mcode = use("mcode") var filename = args[0] var src = text(fd.slurp(filename)) var result = tokenize(src, filename) -var ast = parse(result.tokens, src, filename) -var compiled = mcode(ast) +var ast = parse(result.tokens, src, filename, tokenize) +var folded = fold(ast) +var compiled = mcode(folded) print(json.encode(compiled)) diff --git a/parse.ce b/parse.ce index 8c0c3b0a..b269666d 100644 --- a/parse.ce +++ b/parse.ce @@ -1,8 +1,9 @@ var fd = use("fd") +var json = use("json") var tokenize = use("tokenize") var parse = use("parse") var filename = args[0] var src = text(fd.slurp(filename)) var result = tokenize(src, filename) -var ast = parse(result.tokens, src, filename) +var ast = parse(result.tokens, src, filename, tokenize) print(json.encode(ast)) diff --git a/qbe.ce b/qbe.ce new file mode 100644 index 00000000..4e3d08cd --- /dev/null +++ b/qbe.ce @@ -0,0 +1,18 @@ +var fd = use("fd") +var json = use("json") +var tokenize = use("tokenize") +var parse = use("parse") +var fold = use("fold") +var mcode = use("mcode") +var streamline = use("streamline") +var qbe_macros = use("qbe") +var qbe_emit = use("qbe_emit") +var filename = args[0] +var src = text(fd.slurp(filename)) +var result = tokenize(src, filename) +var ast = parse(result.tokens, src, filename, tokenize) +var folded = fold(ast) +var compiled = mcode(folded) +var optimized = streamline(compiled) +var il = qbe_emit(optimized, qbe_macros) +print(il) diff --git a/streamline.ce b/streamline.ce new file mode 100644 index 00000000..48dfd1ed --- /dev/null +++ b/streamline.ce @@ -0,0 +1,15 @@ +var fd = use("fd") +var json = use("json") +var tokenize = use("tokenize") +var parse = use("parse") +var fold = use("fold") +var mcode = use("mcode") +var streamline = use("streamline") +var filename = args[0] +var src = text(fd.slurp(filename)) +var result = tokenize(src, filename) +var ast = parse(result.tokens, src, filename, tokenize) +var folded = fold(ast) +var compiled = mcode(folded) +var optimized = streamline(compiled) +print(json.encode(optimized)) diff --git a/tokenize.ce b/tokenize.ce index fa074035..f7d4fd06 100644 --- a/tokenize.ce +++ b/tokenize.ce @@ -1,4 +1,5 @@ var fd = use("fd") +var json = use("json") var tokenize = use("tokenize") var filename = args[0] var src = text(fd.slurp(filename))