cached bootstrap

This commit is contained in:
2026-02-13 08:11:35 -06:00
parent 77fa058135
commit d26a96bc62
15 changed files with 8319 additions and 2688 deletions

View File

@@ -11,7 +11,7 @@
CELL_SHOP = $(HOME)/.cell
CELL_CORE_PACKAGE = $(CELL_SHOP)/packages/core
# .cm sources that compile to .mach bytecode
# .cm sources that compile to .mcode bytecode
MACH_SOURCES = tokenize.cm parse.cm fold.cm mcode.cm \
internal/bootstrap.cm internal/engine.cm

2791
boot/bootstrap.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

5299
boot/verify_ir.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -9,11 +9,31 @@ function use_embed(name) {
var fd = use_embed('fd')
var json = use_embed('json')
var crypto = use_embed('crypto')
var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
use_cache['crypto'] = crypto
function content_hash(content) {
return text(crypto.blake2(content), 'h')
}
function cache_path(hash) {
if (!shop_path) return null
return shop_path + '/build/' + hash + '.mach'
}
function ensure_build_dir() {
if (!shop_path) return null
var dir = shop_path + '/build'
if (!fd.is_dir(dir)) {
fd.mkdir(dir)
}
return dir
}
// Bootstrap: load tokenize.cm, parse.cm, fold.cm from pre-compiled mach bytecode
function use_basic(path) {
@@ -24,19 +44,28 @@ function use_basic(path) {
return result
}
// Load a module from .mach/.mcode bytecode (bootstrap modules have no source fallback)
// Load a module from cached .mach or .mcode bytecode
function boot_load(name, env) {
var mach_path = core_path + '/' + name + ".cm.mach"
var mcode_path = core_path + '/' + name + ".cm.mcode"
var data = null
var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
var mcode_blob = null
var hash = null
var cached = null
var mcode_json = null
if (fd.is_file(mach_path)) {
data = fd.slurp(mach_path)
return mach_load(data, env)
}
var mach_blob = null
if (fd.is_file(mcode_path)) {
mcode_json = text(fd.slurp(mcode_path))
return mach_eval_mcode(name, mcode_json, env)
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached)) {
return mach_load(fd.slurp(cached), env)
}
mcode_json = text(mcode_blob)
mach_blob = mach_compile_mcode_bin(name, mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
return mach_load(mach_blob, env)
}
print("error: missing bootstrap bytecode: " + name + "\n")
disrupt
@@ -58,43 +87,33 @@ var streamline_mod = null
// Warn if any .cm source is newer than its compiled bytecode
function check_mach_stale() {
var sources = [
"tokenize.cm",
"parse.cm",
"fold.cm",
"mcode.cm",
"streamline.cm",
"qbe.cm",
"qbe_emit.cm",
"verify_ir.cm",
"internal/bootstrap.cm",
"internal/engine.cm"
{src: "tokenize.cm", mcode: "boot/tokenize.cm.mcode"},
{src: "parse.cm", mcode: "boot/parse.cm.mcode"},
{src: "fold.cm", mcode: "boot/fold.cm.mcode"},
{src: "mcode.cm", mcode: "boot/mcode.cm.mcode"},
{src: "streamline.cm", mcode: "boot/streamline.cm.mcode"},
{src: "qbe.cm", mcode: "boot/qbe.cm.mcode"},
{src: "qbe_emit.cm", mcode: "boot/qbe_emit.cm.mcode"},
{src: "verify_ir.cm", mcode: "boot/verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", mcode: "boot/bootstrap.cm.mcode"},
{src: "internal/engine.cm", mcode: "boot/engine.cm.mcode"}
]
var stale = []
var _i = 0
var cm_path = null
var mach_path = null
var mcode_path = null
var cm_stat = null
var compiled_stat = null
var best_mtime = null
var entry = null
while (_i < length(sources)) {
cm_path = core_path + '/' + sources[_i]
mach_path = cm_path + '.mach'
mcode_path = cm_path + '.mcode'
best_mtime = null
if (fd.is_file(mach_path)) {
best_mtime = fd.stat(mach_path).mtime
}
if (fd.is_file(mcode_path)) {
entry = sources[_i]
cm_path = core_path + '/' + entry.src
mcode_path = core_path + '/' + entry.mcode
if (fd.is_file(mcode_path) && fd.is_file(cm_path)) {
compiled_stat = fd.stat(mcode_path)
if (best_mtime == null || compiled_stat.mtime > best_mtime) {
best_mtime = compiled_stat.mtime
}
}
if (best_mtime != null && fd.is_file(cm_path)) {
cm_stat = fd.stat(cm_path)
if (cm_stat.mtime > best_mtime) {
push(stale, sources[_i])
if (cm_stat.mtime > compiled_stat.mtime) {
push(stale, entry.src)
}
}
_i = _i + 1
@@ -175,23 +194,33 @@ function run_ast_noopt(name, ast, env) {
// Helper to load engine.cm and run it with given env
function load_engine(env) {
var engine_path = core_path + '/internal/engine.cm.mach'
var mcode_path = core_path + '/internal/engine.cm.mcode'
var data = null
var mcode_path = core_path + '/boot/engine.cm.mcode'
var mcode_blob = null
var hash = null
var cached = null
var mcode_json = null
var mach_blob = null
var engine_src = null
var engine_ast = null
if (fd.is_file(engine_path)) {
data = fd.slurp(engine_path)
return mach_load(data, env)
}
if (fd.is_file(mcode_path)) {
mcode_json = text(fd.slurp(mcode_path))
return mach_eval_mcode('engine', mcode_json, env)
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached)) {
return mach_load(fd.slurp(cached), env)
}
mcode_json = text(mcode_blob)
mach_blob = mach_compile_mcode_bin('engine', mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
return mach_load(mach_blob, env)
}
engine_path = core_path + '/internal/engine.cm'
engine_src = text(fd.slurp(engine_path))
engine_ast = analyze(engine_src, engine_path)
// Fallback: compile from source
var engine_cm = core_path + '/internal/engine.cm'
engine_src = text(fd.slurp(engine_cm))
engine_ast = analyze(engine_src, engine_cm)
return run_ast('engine', engine_ast, env)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,9 @@
// regen.ce — regenerate .mach bytecode files
// regen.ce — regenerate .mcode bytecode files and pre-warm .mach cache
// Run with: ./cell --core . regen
var fd = use("fd")
var json = use("json")
var crypto = use("crypto")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
@@ -10,24 +11,46 @@ var mcode = use("mcode")
var streamline = use("streamline")
var files = [
{src: "tokenize.cm", name: "tokenize", out: "tokenize.cm.mcode"},
{src: "parse.cm", name: "parse", out: "parse.cm.mcode"},
{src: "fold.cm", name: "fold", out: "fold.cm.mcode"},
{src: "mcode.cm", name: "mcode", out: "mcode.cm.mcode"},
{src: "streamline.cm", name: "streamline", out: "streamline.cm.mcode"},
{src: "qbe.cm", name: "qbe", out: "qbe.cm.mcode"},
{src: "qbe_emit.cm", name: "qbe_emit", out: "qbe_emit.cm.mcode"},
{src: "verify_ir.cm", name: "verify_ir", out: "verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "internal/bootstrap.cm.mcode"},
{src: "internal/engine.cm", name: "engine", out: "internal/engine.cm.mcode"}
{src: "tokenize.cm", name: "tokenize", out: "boot/tokenize.cm.mcode"},
{src: "parse.cm", name: "parse", out: "boot/parse.cm.mcode"},
{src: "fold.cm", name: "fold", out: "boot/fold.cm.mcode"},
{src: "mcode.cm", name: "mcode", out: "boot/mcode.cm.mcode"},
{src: "streamline.cm", name: "streamline", out: "boot/streamline.cm.mcode"},
{src: "qbe.cm", name: "qbe", out: "boot/qbe.cm.mcode"},
{src: "qbe_emit.cm", name: "qbe_emit", out: "boot/qbe_emit.cm.mcode"},
{src: "verify_ir.cm", name: "verify_ir", out: "boot/verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", name: "bootstrap", out: "boot/bootstrap.cm.mcode"},
{src: "internal/engine.cm", name: "engine", out: "boot/engine.cm.mcode"}
]
// Resolve shop_path for cache writes
var os = use('os')
var shop = os.getenv('CELL_SHOP')
var home = null
var cache_dir = null
if (!shop) {
home = os.getenv('HOME')
if (home) {
shop = home + '/.cell'
}
}
if (shop) {
cache_dir = shop + '/build'
if (!fd.is_dir(cache_dir)) {
fd.mkdir(cache_dir)
}
}
var i = 0
var entry = null
var src = null
var tok_result = null
var ast = null
var folded = null
var mcode_blob = null
var hash = null
var compact_mcode = null
var mach_blob = null
var compiled = null
var optimized = null
var mcode_text = null
@@ -149,6 +172,15 @@ while (i < length(files)) {
fd.write(f, mcode_text)
fd.close(f)
print(`wrote ${entry.out}`)
// Pre-warm .mach cache
if (cache_dir) {
mcode_blob = stone(blob(mcode_text))
hash = text(crypto.blake2(mcode_blob), 'h')
compact_mcode = json.encode(optimized)
mach_blob = mach_compile_mcode_bin(entry.name, compact_mcode)
fd.slurpwrite(cache_dir + '/' + hash + '.mach', mach_blob)
print(` cached ${hash}.mach`)
}
i = i + 1
}
if (had_errors) {

View File

@@ -11,8 +11,7 @@
#include "cell_internal.h"
#include "cJSON.h"
#define BOOTSTRAP_MACH "internal/bootstrap.cm.mach"
#define BOOTSTRAP_MCODE "internal/bootstrap.cm.mcode"
#define BOOTSTRAP_MCODE "boot/bootstrap.cm.mcode"
#define BOOTSTRAP_SRC "internal/bootstrap.cm"
#define CELL_SHOP_DIR ".cell"
#define CELL_CORE_DIR "packages/core"
@@ -21,6 +20,7 @@
#include <signal.h>
#include <unistd.h>
#include <sys/stat.h>
#include "monocypher.h"
/* Test suite declarations */
int run_c_test_suite(JSContext *ctx);
@@ -31,6 +31,83 @@ static char *shop_path = NULL;
static char *core_path = NULL;
static JSRuntime *g_runtime = NULL;
// Compute blake2b hash of data and return hex string (caller must free)
static char *compute_blake2_hex(const char *data, size_t size) {
uint8_t hash[32];
crypto_blake2b(hash, 32, (const uint8_t *)data, size);
char *hex = malloc(65);
for (int i = 0; i < 32; i++)
snprintf(hex + i * 2, 3, "%02x", hash[i]);
return hex;
}
// Build cache path: shop_path/build/<hex>.mach (caller must free)
static char *build_cache_path(const char *hex) {
if (!shop_path) return NULL;
size_t len = strlen(shop_path) + strlen("/build/") + 64 + strlen(".mach") + 1;
char *path = malloc(len);
snprintf(path, len, "%s/build/%s.mach", shop_path, hex);
return path;
}
// Write binary data to file
static int write_cache_file(const char *path, const uint8_t *data, size_t size) {
FILE *fh = fopen(path, "wb");
if (!fh) return 0;
size_t written = fwrite(data, 1, size, fh);
fclose(fh);
return written == size;
}
// Load cached .mach or compile from .mcode and cache result
// Returns heap-allocated binary data and sets *out_size, or NULL on failure
static char *load_or_cache_bootstrap(const char *mcode_data, size_t mcode_size, size_t *out_size) {
char *hex = compute_blake2_hex(mcode_data, mcode_size);
char *cpath = build_cache_path(hex);
free(hex);
if (cpath) {
// Try loading from cache
FILE *fh = fopen(cpath, "rb");
if (fh) {
fseek(fh, 0, SEEK_END);
long file_size = ftell(fh);
fseek(fh, 0, SEEK_SET);
char *data = malloc(file_size);
if (data && fread(data, 1, file_size, fh) == (size_t)file_size) {
fclose(fh);
free(cpath);
*out_size = file_size;
return data;
}
free(data);
fclose(fh);
}
}
// Cache miss: compile mcode to binary
cJSON *mcode = cJSON_Parse(mcode_data);
if (!mcode) { free(cpath); return NULL; }
MachCode *mc = mach_compile_mcode(mcode);
cJSON_Delete(mcode);
if (!mc) { free(cpath); return NULL; }
size_t bin_size;
uint8_t *bin = JS_SerializeMachCode(mc, &bin_size);
JS_FreeMachCode(mc);
if (!bin) { free(cpath); return NULL; }
// Write to cache
if (cpath) {
write_cache_file(cpath, bin, bin_size);
free(cpath);
}
*out_size = bin_size;
return (char *)bin;
}
// Get the home directory
static const char* get_home_dir(void) {
const char *home = getenv("HOME");
@@ -179,19 +256,23 @@ void script_startup(cell_rt *prt)
cell_rt *crt = JS_GetContextOpaque(js);
JS_FreeValue(js, js_blob_use(js));
// Load pre-compiled bootstrap (.cm.mach or .cm.mcode)
// Load pre-compiled bootstrap .mcode
size_t boot_size;
char *boot_data = load_core_file(BOOTSTRAP_MACH, &boot_size);
int boot_is_mcode = 0;
if (!boot_data) {
boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
boot_is_mcode = 1;
}
char *boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
if (!boot_data) {
printf("ERROR: Could not load bootstrap from %s!\n", core_path);
return;
}
// Try cache or compile mcode → binary
size_t bin_size;
char *bin_data = load_or_cache_bootstrap(boot_data, boot_size, &bin_size);
free(boot_data);
if (!bin_data) {
printf("ERROR: Failed to compile bootstrap mcode!\n");
return;
}
// Create hidden environment
JSValue hidden_env = JS_NewObject(js);
JS_SetPropertyStr(js, hidden_env, "os", js_os_use(js));
@@ -222,12 +303,10 @@ void script_startup(cell_rt *prt)
// Stone the environment
hidden_env = JS_Stone(js, hidden_env);
// Run through MACH VM
// Run from binary
crt->state = ACTOR_RUNNING;
JSValue v = boot_is_mcode
? JS_RunMachMcode(js, boot_data, boot_size, hidden_env)
: JS_RunMachBin(js, (const uint8_t *)boot_data, boot_size, hidden_env);
free(boot_data);
JSValue v = JS_RunMachBin(js, (const uint8_t *)bin_data, bin_size, hidden_env);
free(bin_data);
uncaught_exception(js, v);
crt->state = ACTOR_IDLE;
set_actor_state(crt);
@@ -360,27 +439,31 @@ int cell_init(int argc, char **argv)
actor_initialize();
size_t boot_size;
char *boot_data = load_core_file(BOOTSTRAP_MACH, &boot_size);
int boot_is_mcode = 0;
if (!boot_data) {
boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
boot_is_mcode = 1;
}
char *boot_data = load_core_file(BOOTSTRAP_MCODE, &boot_size);
if (!boot_data) {
printf("ERROR: Could not load bootstrap from %s\n", core_path);
return 1;
}
// Try cache or compile mcode → binary
size_t bin_size;
char *bin_data = load_or_cache_bootstrap(boot_data, boot_size, &bin_size);
free(boot_data);
if (!bin_data) {
printf("ERROR: Failed to compile bootstrap mcode\n");
return 1;
}
g_runtime = JS_NewRuntime();
if (!g_runtime) {
printf("Failed to create JS runtime\n");
free(boot_data);
free(bin_data);
return 1;
}
JSContext *ctx = JS_NewContextWithHeapSize(g_runtime, 16 * 1024 * 1024);
if (!ctx) {
printf("Failed to create JS context\n");
free(boot_data); JS_FreeRuntime(g_runtime);
free(bin_data); JS_FreeRuntime(g_runtime);
return 1;
}
@@ -419,9 +502,6 @@ int cell_init(int argc, char **argv)
JS_SetPropertyStr(ctx, hidden_env, "core_path", JS_NewString(ctx, core_path));
JS_SetPropertyStr(ctx, hidden_env, "shop_path",
shop_path ? JS_NewString(ctx, shop_path) : JS_NULL);
/* TODO: remove after next 'make regen' — old bootstrap.mach reads these */
JS_SetPropertyStr(ctx, hidden_env, "emit_qbe", JS_FALSE);
JS_SetPropertyStr(ctx, hidden_env, "dump_mach", JS_FALSE);
JS_SetPropertyStr(ctx, hidden_env, "actorsym", JS_DupValue(ctx, cli_rt->actor_sym_ref.val));
JS_SetPropertyStr(ctx, hidden_env, "json", js_json_use(ctx));
JS_SetPropertyStr(ctx, hidden_env, "nota", js_nota_use(ctx));
@@ -435,10 +515,8 @@ int cell_init(int argc, char **argv)
JS_SetPropertyStr(ctx, hidden_env, "args", args_arr);
hidden_env = JS_Stone(ctx, hidden_env);
JSValue result = boot_is_mcode
? JS_RunMachMcode(ctx, boot_data, boot_size, hidden_env)
: JS_RunMachBin(ctx, (const uint8_t *)boot_data, boot_size, hidden_env);
free(boot_data);
JSValue result = JS_RunMachBin(ctx, (const uint8_t *)bin_data, bin_size, hidden_env);
free(bin_data);
int exit_code = 0;
if (JS_IsException(result)) {