Merge branch 'fix_core_scripts' into quicken_mcode

This commit is contained in:
2026-02-16 01:43:08 -06:00
25 changed files with 6635 additions and 74208 deletions

View File

@@ -1,22 +1,15 @@
// Hidden vars come from env:
// CLI mode (cell_init): os, args, core_path, shop_path
// Actor spawn (script_startup): os, json, actorsym, init, core_path, shop_path
// args[0] = script name, args[1..] = user args
// Minimal bootstrap — seeds the content-addressed cache
// Only runs on cold start (C runtime couldn't find engine in cache)
// Hidden vars: os, core_path, shop_path
var load_internal = os.load_internal
function use_embed(name) {
return load_internal("js_core_" + name + "_use")
}
var fd = use_embed('internal_fd')
var json = use_embed('json')
var json_mod = use_embed('json')
var crypto = use_embed('crypto')
var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
use_cache['crypto'] = crypto
function content_hash(content) {
return text(crypto.blake2(content), 'h')
}
@@ -29,250 +22,84 @@ function cache_path(hash) {
function ensure_build_dir() {
if (!shop_path) return null
var dir = shop_path + '/build'
if (!fd.is_dir(dir)) {
fd.mkdir(dir)
}
if (!fd.is_dir(dir)) fd.mkdir(dir)
return dir
}
// Bootstrap: load tokenize.cm, parse.cm, fold.cm from pre-compiled mach bytecode
function use_basic(path) {
if (use_cache[path])
return use_cache[path]
var result = use_embed(replace(path, '/', '_'))
use_cache[path] = result
return result
}
// Load a module from cached .mach or .mcode bytecode
function boot_load(name, env) {
var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
// Load seed pipeline from boot/ (tokenize, parse, mcode only)
function boot_load(name) {
var mcode_path = core_path + '/boot/' + name + '.cm.mcode'
var mcode_blob = null
var hash = null
var cached = null
var mcode_json = null
var mach_blob = null
if (fd.is_file(mcode_path)) {
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached)) {
return mach_load(fd.slurp(cached), env)
}
mcode_json = text(mcode_blob)
mach_blob = mach_compile_mcode_bin(name, mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
return mach_load(mach_blob, env)
if (!fd.is_file(mcode_path)) {
print("error: missing seed: " + name + "\n")
disrupt
}
print("error: missing bootstrap bytecode: " + name + "\n")
disrupt
mcode_blob = fd.slurp(mcode_path)
mach_blob = mach_compile_mcode_bin(name, text(mcode_blob))
return mach_load(mach_blob, {use: use_embed})
}
var boot_env = {use: use_basic}
var tokenize_mod = boot_load("tokenize", boot_env)
var parse_mod = boot_load("parse", boot_env)
var fold_mod = boot_load("fold", boot_env)
use_cache['tokenize'] = tokenize_mod
use_cache['parse'] = parse_mod
use_cache['fold'] = fold_mod
var tokenize_mod = boot_load("tokenize")
var parse_mod = boot_load("parse")
var fold_mod = boot_load("fold")
var mcode_mod = boot_load("mcode")
// Always load mcode compiler module
var mcode_mod = boot_load("mcode", boot_env)
use_cache['mcode'] = mcode_mod
use_cache['core/mcode'] = mcode_mod
var streamline_mod = null
// Warn if any .cm source is newer than its compiled bytecode
function check_mach_stale() {
var sources = [
{src: "tokenize.cm", mcode: "boot/tokenize.cm.mcode"},
{src: "parse.cm", mcode: "boot/parse.cm.mcode"},
{src: "fold.cm", mcode: "boot/fold.cm.mcode"},
{src: "mcode.cm", mcode: "boot/mcode.cm.mcode"},
{src: "streamline.cm", mcode: "boot/streamline.cm.mcode"},
{src: "qbe.cm", mcode: "boot/qbe.cm.mcode"},
{src: "qbe_emit.cm", mcode: "boot/qbe_emit.cm.mcode"},
{src: "verify_ir.cm", mcode: "boot/verify_ir.cm.mcode"},
{src: "internal/bootstrap.cm", mcode: "boot/bootstrap.cm.mcode"},
{src: "internal/engine.cm", mcode: "boot/engine.cm.mcode"}
]
var stale = []
var _i = 0
var cm_path = null
var mcode_path = null
var cm_stat = null
var compiled_stat = null
var entry = null
while (_i < length(sources)) {
entry = sources[_i]
cm_path = core_path + '/' + entry.src
mcode_path = core_path + '/' + entry.mcode
if (fd.is_file(mcode_path) && fd.is_file(cm_path)) {
compiled_stat = fd.stat(mcode_path)
cm_stat = fd.stat(cm_path)
if (cm_stat.mtime > compiled_stat.mtime) {
push(stale, entry.src)
}
}
_i = _i + 1
}
if (length(stale) > 0) {
print("warning: bytecode is stale for: " + text(stale, ", ") + "\n")
print("run 'make regen' to update\n")
}
}
check_mach_stale()
// analyze: tokenize + parse, check for errors
function analyze(src, filename) {
var tok_result = tokenize_mod(src, filename)
var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
var _i = 0
var prev_line = -1
var prev_msg = null
var e = null
var msg = null
var line = null
var col = null
var has_errors = ast.errors != null && length(ast.errors) > 0
if (has_errors) {
while (_i < length(ast.errors)) {
e = ast.errors[_i]
msg = e.message
line = e.line
col = e.column
if (msg != prev_msg || line != prev_line) {
if (line != null && col != null) {
print(`${filename}:${text(line)}:${text(col)}: error: ${msg}`)
} else {
print(`${filename}: error: ${msg}`)
}
}
prev_line = line
prev_msg = msg
if (e.line != null && e.column != null)
print(`${filename}:${text(e.line)}:${text(e.column)}: error: ${msg}`)
else
print(`${filename}: error: ${msg}`)
_i = _i + 1
}
disrupt
}
ast = fold_mod(ast)
return ast
return fold_mod(ast)
}
// Load optimization pipeline modules (needs analyze to be defined)
streamline_mod = boot_load("streamline", boot_env)
use_cache['streamline'] = streamline_mod
use_cache['core/streamline'] = streamline_mod
// Lazy-loaded verify_ir module (loaded on first use)
var _verify_ir_mod = null
// Run AST through mcode pipeline → register VM
function run_ast(name, ast, env) {
var compiled = mcode_mod(ast)
if (os._verify_ir) {
if (_verify_ir_mod == null) {
_verify_ir_mod = boot_load('verify_ir', boot_env)
}
compiled._verify = true
compiled._verify_mod = _verify_ir_mod
}
var optimized = streamline_mod(compiled)
// Clean up verify properties before JSON encoding
if (optimized._verify) {
delete optimized._verify
delete optimized._verify_mod
}
var mcode_json = json.encode(optimized)
var mach_blob = mach_compile_mcode_bin(name, mcode_json)
return mach_load(mach_blob, env)
}
// Run AST through mcode pipeline WITHOUT optimization → register VM
function run_ast_noopt(name, ast, env) {
var compiled = mcode_mod(ast)
var mcode_json = json.encode(compiled)
var mach_blob = mach_compile_mcode_bin(name, mcode_json)
return mach_load(mach_blob, env)
}
// Compile AST to blob without loading (for caching)
function compile_to_blob(name, ast) {
var compiled = mcode_mod(ast)
var optimized = streamline_mod(compiled)
return mach_compile_mcode_bin(name, json.encode(optimized))
}
// Helper to load engine.cm and run it with given env
function load_engine(env) {
var mcode_path = core_path + '/boot/engine.cm.mcode'
var mcode_blob = null
var hash = null
var cached = null
function compile_and_cache(name, source_path) {
var source_blob = fd.slurp(source_path)
var hash = content_hash(source_blob)
var cached = cache_path(hash)
var ast = null
var compiled = null
var mcode_json = null
var mach_blob = null
var engine_src = null
var engine_ast = null
if (fd.is_file(mcode_path)) {
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached)) {
return mach_load(fd.slurp(cached), env)
}
mcode_json = text(mcode_blob)
mach_blob = mach_compile_mcode_bin('engine', mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
return mach_load(mach_blob, env)
if (cached && fd.is_file(cached)) return
ast = analyze(text(source_blob), source_path)
compiled = mcode_mod(ast)
mcode_json = json_mod.encode(compiled)
mach_blob = mach_compile_mcode_bin(name, mcode_json)
if (cached) {
ensure_build_dir()
fd.slurpwrite(cached, mach_blob)
}
// Fallback: compile from source
var engine_cm = core_path + '/internal/engine.cm'
engine_src = text(fd.slurp(engine_cm))
engine_ast = analyze(engine_src, engine_cm)
return run_ast('engine', engine_ast, env)
}
// Detect mode and route
// CLI mode has 'args'; actor spawn mode has 'init'
var program = null
var user_args = []
var _j = 0
if (args != null) {
// CLI mode — always run as actor program (.ce)
program = args[0]
if (!program) {
print("error: no program specified\n")
disrupt
}
_j = 1
while (_j < length(args)) {
push(user_args, args[_j])
_j = _j + 1
}
load_engine({
os: os, actorsym: actorsym,
init: {program: program, arg: user_args},
core_path: core_path, shop_path: shop_path, json: json,
analyze: analyze, run_ast_fn: run_ast, run_ast_noopt_fn: run_ast_noopt,
use_cache: use_cache,
content_hash: content_hash, cache_path: cache_path,
ensure_build_dir: ensure_build_dir, compile_to_blob_fn: compile_to_blob
})
} else {
// Actor spawn mode — load engine.cm with full actor env
load_engine({
os: os, actorsym: actorsym, init: init,
core_path: core_path, shop_path: shop_path, json: json,
analyze: analyze, run_ast_fn: run_ast, run_ast_noopt_fn: run_ast_noopt,
use_cache: use_cache,
content_hash: content_hash, cache_path: cache_path,
ensure_build_dir: ensure_build_dir, compile_to_blob_fn: compile_to_blob
})
// Seed the cache with everything engine needs
var seed_files = [
{name: "tokenize", path: "tokenize.cm"},
{name: "parse", path: "parse.cm"},
{name: "fold", path: "fold.cm"},
{name: "mcode", path: "mcode.cm"},
{name: "streamline", path: "streamline.cm"},
{name: "engine", path: "internal/engine.cm"}
]
var _i = 0
var entry = null
while (_i < length(seed_files)) {
entry = seed_files[_i]
compile_and_cache(entry.name, core_path + '/' + entry.path)
_i = _i + 1
}
print("bootstrap: cache seeded\n")

View File

@@ -1,4 +1,5 @@
// Hidden vars (os, actorsym, init, core_path, shop_path, analyze, run_ast_fn, run_ast_noopt_fn, json, use_cache, content_hash, cache_path, ensure_build_dir, compile_to_blob_fn) come from env
// Hidden vars (os, actorsym, init, core_path, shop_path, json, args) come from env
// Engine is self-sufficient: defines its own compilation pipeline
var ACTORDATA = actorsym
var SYSYM = '__SYSTEM__'
@@ -14,7 +15,7 @@ var cases = {
var dylib_ext = cases[os.platform()]
var MOD_EXT = '.cm'
var ACTOR_EXT = '.ce'
var ACTOR_EXT = '.ce'
var load_internal = os.load_internal
function use_embed(name) {
@@ -47,11 +48,159 @@ function ends_with(str, suffix) {
var fd = use_embed('internal_fd')
var js = use_embed('js')
var crypto = use_embed('crypto')
// core_path and shop_path come from env (bootstrap.cm passes them through)
// core_path and shop_path come from env (C runtime passes them through)
// shop_path may be null if --core was used without --shop
var packages_path = shop_path ? shop_path + '/packages' : null
// Self-sufficient initialization: content-addressed cache
var use_cache = {}
function content_hash(content) {
return text(crypto.blake2(content), 'h')
}
function cache_path(hash) {
if (!shop_path) return null
return shop_path + '/build/' + hash
}
function ensure_build_dir() {
if (!shop_path) return null
var dir = shop_path + '/build'
if (!fd.is_dir(dir)) fd.mkdir(dir)
return dir
}
// Load a pipeline module from cache, with boot/ seed fallback
function load_pipeline_module(name, env) {
var source_path = core_path + '/' + name + '.cm'
var source_blob = null
var hash = null
var cached = null
var mcode_path = null
var mcode_blob = null
var mach_blob = null
if (fd.is_file(source_path)) {
source_blob = fd.slurp(source_path)
hash = content_hash(source_blob)
cached = cache_path(hash)
if (cached && fd.is_file(cached))
return mach_load(fd.slurp(cached), env)
}
// Boot seed fallback
mcode_path = core_path + '/boot/' + name + '.cm.mcode'
if (fd.is_file(mcode_path)) {
mcode_blob = fd.slurp(mcode_path)
mach_blob = mach_compile_mcode_bin(name, text(mcode_blob))
return mach_load(mach_blob, env)
}
print("error: cannot load pipeline module: " + name + "\n")
disrupt
}
// Load compilation pipeline
var pipeline_env = {use: use_embed}
var tokenize_mod = load_pipeline_module('tokenize', pipeline_env)
var parse_mod = load_pipeline_module('parse', pipeline_env)
var fold_mod = load_pipeline_module('fold', pipeline_env)
var mcode_mod = load_pipeline_module('mcode', pipeline_env)
var streamline_mod = load_pipeline_module('streamline', pipeline_env)
use_cache['tokenize'] = tokenize_mod
use_cache['parse'] = parse_mod
use_cache['fold'] = fold_mod
use_cache['mcode'] = mcode_mod
use_cache['core/mcode'] = mcode_mod
use_cache['streamline'] = streamline_mod
use_cache['core/streamline'] = streamline_mod
// analyze: tokenize + parse + fold, check for errors
function analyze(src, filename) {
var tok_result = tokenize_mod(src, filename)
var _ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
var _i = 0
var prev_line = -1
var prev_msg = null
var e = null
var msg = null
var line = null
var col = null
var has_errors = _ast.errors != null && length(_ast.errors) > 0
if (has_errors) {
while (_i < length(_ast.errors)) {
e = _ast.errors[_i]
msg = e.message
line = e.line
col = e.column
if (msg != prev_msg || line != prev_line) {
if (line != null && col != null)
print(`${filename}:${text(line)}:${text(col)}: error: ${msg}`)
else
print(`${filename}: error: ${msg}`)
}
prev_line = line
prev_msg = msg
_i = _i + 1
}
disrupt
}
return fold_mod(_ast)
}
// Lazy-loaded verify_ir module (loaded on first use)
var _verify_ir_mod = null
// Run AST through mcode pipeline -> register VM
function run_ast_fn(name, ast, env) {
var compiled = mcode_mod(ast)
if (os._verify_ir) {
if (_verify_ir_mod == null) {
_verify_ir_mod = load_pipeline_module('verify_ir', pipeline_env)
}
compiled._verify = true
compiled._verify_mod = _verify_ir_mod
}
var optimized = streamline_mod(compiled)
if (optimized._verify) {
delete optimized._verify
delete optimized._verify_mod
}
var mcode_json = json.encode(optimized)
var mach_blob = mach_compile_mcode_bin(name, mcode_json)
return mach_load(mach_blob, env)
}
// Run AST through mcode pipeline WITHOUT optimization -> register VM
function run_ast_noopt_fn(name, ast, env) {
var compiled = mcode_mod(ast)
var mcode_json = json.encode(compiled)
var mach_blob = mach_compile_mcode_bin(name, mcode_json)
return mach_load(mach_blob, env)
}
// Compile AST to blob without loading (for caching)
function compile_to_blob(name, ast) {
var compiled = mcode_mod(ast)
var optimized = streamline_mod(compiled)
return mach_compile_mcode_bin(name, json.encode(optimized))
}
// If loaded directly by C runtime (not via bootstrap), convert args -> init
var _program = null
var _user_args = []
var _j = 1
var _init = init
if (args != null && _init == null) {
_program = args[0]
while (_j < length(args)) {
push(_user_args, args[_j])
_j = _j + 1
}
_init = {program: _program, arg: _user_args}
}
use_cache['core/os'] = os
// Extra env properties added as engine initializes (log, runtime fns, etc.)
@@ -68,8 +217,6 @@ function use_core(path) {
var result = null
var script = null
var ast = null
var mcode_path = null
var mcode_blob = null
var _load_mod = null
// Build env: merge core_extras
@@ -82,32 +229,6 @@ function use_core(path) {
var source_blob = null
var file_path = null
// Check for pre-compiled .cm.mcode JSON IR (generated by regen)
mcode_path = core_path + '/boot/' + replace(path, '/', '_') + '.cm.mcode'
if (fd.is_file(mcode_path)) {
_load_mod = function() {
mcode_blob = fd.slurp(mcode_path)
hash = content_hash(mcode_blob)
cached_path = cache_path(hash)
if (cached_path && fd.is_file(cached_path)) {
result = mach_load(fd.slurp(cached_path), env)
} else {
mach_blob = mach_compile_mcode_bin('core:' + path, text(mcode_blob))
if (cached_path) {
ensure_build_dir()
fd.slurpwrite(cached_path, mach_blob)
}
result = mach_load(mach_blob, env)
}
} disruption {
print("use('" + path + "'): failed to load from " + mcode_path + "\n")
disrupt
}
_load_mod()
use_cache[cache_key] = result
return result
}
// Compile from source .cm file
file_path = core_path + '/' + path + MOD_EXT
if (fd.is_file(file_path)) {
@@ -120,7 +241,7 @@ function use_core(path) {
} else {
script = text(source_blob)
ast = analyze(script, file_path)
mach_blob = compile_to_blob_fn('core:' + path, ast)
mach_blob = compile_to_blob('core:' + path, ast)
if (cached_path) {
ensure_build_dir()
fd.slurpwrite(cached_path, mach_blob)
@@ -230,7 +351,7 @@ function actor_die(err)
//actor_mod.on_exception(actor_die)
_cell.args = init != null ? init : {}
_cell.args = _init != null ? _init : {}
function create_actor(desc) {
var _desc = desc == null ? {id:guid()} : desc
@@ -244,7 +365,7 @@ var $_ = {}
use_cache['core/json'] = json
// Create runtime_env early (empty) filled after pronto loads.
// Create runtime_env early (empty) -- filled after pronto loads.
// Shop accesses it lazily (in inject_env, called at module-use time, not load time)
// so it sees the filled version.
var runtime_env = {}
@@ -265,8 +386,9 @@ core_extras.runtime_env = runtime_env
core_extras.content_hash = content_hash
core_extras.cache_path = cache_path
core_extras.ensure_build_dir = ensure_build_dir
core_extras.compile_to_blob = compile_to_blob
// NOW load shop it receives all of the above via env
// NOW load shop -- it receives all of the above via env
var shop = use_core('internal/shop')
var time = use_core('time')
@@ -387,7 +509,7 @@ REPLYTIMEOUT = config.reply_timeout
replycc: the actor that is waiting for the reply
target: ID of the actor that's supposed to receive the message. Only added to non direct sends (out of portals)
return: reply ID so the replycc actor can know what callback to send the message to
data: the actual content of the message
}
@@ -458,7 +580,7 @@ $_.connection = function(callback, actor, config) {
callback({type:"local"})
return
}
callback()
}
@@ -539,10 +661,10 @@ $_.start = function start(cb, program) {
if (!program) return
var id = guid()
var startup = {
id,
overling: $_.self,
root,
var startup = {
id,
overling: $_.self,
root,
program,
}
greeters[id] = cb
@@ -748,7 +870,7 @@ actor_mod.register_actor(_cell.id, turn, true, config.ar_timer)
if (config.actor_memory)
js.mem_limit(config.actor_memory)
if (config.stack_max)
js.max_stacksize(config.system.stack_max);
@@ -857,7 +979,7 @@ function handle_message(msg) {
function enet_check()
{
if (portal) portal.service(handle_host)
$_.delay(enet_check, ENETSERVICE);
}
@@ -936,7 +1058,7 @@ $_.clock(_ => {
} else {
script = text(source_blob)
ast = analyze(script, prog_path)
mach_blob = compile_to_blob_fn(prog, ast)
mach_blob = compile_to_blob(prog, ast)
if (cached_path) {
ensure_build_dir()
fd.slurpwrite(cached_path, mach_blob)