cell/internal/bootstrap.cm

// Hidden vars come from env:
//   CLI mode (cell_init):       os, args, core_path, shop_path
//   Actor spawn (script_startup): os, json, nota, wota, actorsym, init, core_path, shop_path
// args[0] = script name, args[1..] = user args
var load_internal = os.load_internal
function use_embed(name) {
  return load_internal("js_" + name + "_use")
}

var fd = use_embed('fd')
var json = use_embed('json')
var crypto = use_embed('crypto')

var use_cache = {}
use_cache['fd'] = fd
use_cache['os'] = os
use_cache['json'] = json
use_cache['crypto'] = crypto

function content_hash(content) {
  return text(crypto.blake2(content), 'h')
}

function cache_path(hash) {
  if (!shop_path) return null
  return shop_path + '/build/' + hash + '.mach'
}

function ensure_build_dir() {
  if (!shop_path) return null
  var dir = shop_path + '/build'
  if (!fd.is_dir(dir)) {
    fd.mkdir(dir)
  }
  return dir
}

// Bootstrap: load tokenize.cm, parse.cm, fold.cm from pre-compiled mach bytecode
function use_basic(path) {
  if (use_cache[path])
    return use_cache[path]
  var result = use_embed(replace(path, '/', '_'))
  use_cache[path] = result
  return result
}

// Load a module from cached .mach or .mcode bytecode
function boot_load(name, env) {
  var mcode_path = core_path + '/boot/' + name + ".cm.mcode"
  var mcode_blob = null
  var hash = null
  var cached = null
  var mcode_json = null
  var mach_blob = null
  if (fd.is_file(mcode_path)) {
    mcode_blob = fd.slurp(mcode_path)
    hash = content_hash(mcode_blob)
    cached = cache_path(hash)
    if (cached && fd.is_file(cached)) {
      return mach_load(fd.slurp(cached), env)
    }
    mcode_json = text(mcode_blob)
    mach_blob = mach_compile_mcode_bin(name, mcode_json)
    if (cached) {
      ensure_build_dir()
      fd.slurpwrite(cached, mach_blob)
    }
    return mach_load(mach_blob, env)
  }
  print("error: missing bootstrap bytecode: " + name + "\n")
  disrupt
}

var boot_env = {use: use_basic}
var tokenize_mod = boot_load("tokenize", boot_env)
var parse_mod = boot_load("parse", boot_env)
var fold_mod = boot_load("fold", boot_env)
use_cache['tokenize'] = tokenize_mod
use_cache['parse'] = parse_mod
use_cache['fold'] = fold_mod

// Always load mcode compiler module
var mcode_mod = boot_load("mcode", boot_env)
use_cache['mcode'] = mcode_mod
var streamline_mod = null

// Warn if any .cm source is newer than its compiled bytecode
function check_mach_stale() {
  var sources = [
    {src: "tokenize.cm", mcode: "boot/tokenize.cm.mcode"},
    {src: "parse.cm", mcode: "boot/parse.cm.mcode"},
    {src: "fold.cm", mcode: "boot/fold.cm.mcode"},
    {src: "mcode.cm", mcode: "boot/mcode.cm.mcode"},
    {src: "streamline.cm", mcode: "boot/streamline.cm.mcode"},
    {src: "qbe.cm", mcode: "boot/qbe.cm.mcode"},
    {src: "qbe_emit.cm", mcode: "boot/qbe_emit.cm.mcode"},
    {src: "verify_ir.cm", mcode: "boot/verify_ir.cm.mcode"},
    {src: "internal/bootstrap.cm", mcode: "boot/bootstrap.cm.mcode"},
    {src: "internal/engine.cm", mcode: "boot/engine.cm.mcode"}
  ]
  var stale = []
  var _i = 0
  var cm_path = null
  var mcode_path = null
  var cm_stat = null
  var compiled_stat = null
  var entry = null
  while (_i < length(sources)) {
    entry = sources[_i]
    cm_path = core_path + '/' + entry.src
    mcode_path = core_path + '/' + entry.mcode
    if (fd.is_file(mcode_path) && fd.is_file(cm_path)) {
      compiled_stat = fd.stat(mcode_path)
      cm_stat = fd.stat(cm_path)
      if (cm_stat.mtime > compiled_stat.mtime) {
        push(stale, entry.src)
      }
    }
    _i = _i + 1
  }
  if (length(stale) > 0) {
    print("warning: bytecode is stale for: " + text(stale, ", ") + "\n")
    print("run 'make regen' to update\n")
  }
}
check_mach_stale()

// analyze: tokenize + parse, check for errors
function analyze(src, filename) {
  var tok_result = tokenize_mod(src, filename)
  var ast = parse_mod(tok_result.tokens, src, filename, tokenize_mod)
  var _i = 0
  var prev_line = -1
  var prev_msg = null
  var e = null
  var msg = null
  var line = null
  var col = null
  var has_errors = ast.errors != null && length(ast.errors) > 0
  if (has_errors) {
    while (_i < length(ast.errors)) {
      e = ast.errors[_i]
      msg = e.message
      line = e.line
      col = e.column
      if (msg != prev_msg || line != prev_line) {
        if (line != null && col != null) {
          print(`${filename}:${text(line)}:${text(col)}: error: ${msg}`)
        } else {
          print(`${filename}: error: ${msg}`)
        }
      }
      prev_line = line
      prev_msg = msg
      _i = _i + 1
    }
    disrupt
  }
  ast = fold_mod(ast)
  return ast
}

// Load optimization pipeline modules (needs analyze to be defined)
streamline_mod = boot_load("streamline", boot_env)
use_cache['streamline'] = streamline_mod

// Lazy-loaded verify_ir module (loaded on first use)
var _verify_ir_mod = null

// Run AST through mcode pipeline → register VM
function run_ast(name, ast, env) {
  var compiled = mcode_mod(ast)
  if (os._verify_ir) {
    if (_verify_ir_mod == null) {
      _verify_ir_mod = boot_load('verify_ir', boot_env)
    }
    compiled._verify = true
    compiled._verify_mod = _verify_ir_mod
  }
  var optimized = streamline_mod(compiled)
  // Clean up verify properties before JSON encoding
  if (optimized._verify) {
    delete optimized._verify
    delete optimized._verify_mod
  }
  return mach_eval_mcode(name, json.encode(optimized), env)
}

// Run AST through mcode pipeline WITHOUT optimization → register VM
function run_ast_noopt(name, ast, env) {
  var compiled = mcode_mod(ast)
  return mach_eval_mcode(name, json.encode(compiled), env)
}

// Helper to load engine.cm and run it with given env
function load_engine(env) {
  var mcode_path = core_path + '/boot/engine.cm.mcode'
  var mcode_blob = null
  var hash = null
  var cached = null
  var mcode_json = null
  var mach_blob = null
  var engine_src = null
  var engine_ast = null
  if (fd.is_file(mcode_path)) {
    mcode_blob = fd.slurp(mcode_path)
    hash = content_hash(mcode_blob)
    cached = cache_path(hash)
    if (cached && fd.is_file(cached)) {
      return mach_load(fd.slurp(cached), env)
    }
    mcode_json = text(mcode_blob)
    mach_blob = mach_compile_mcode_bin('engine', mcode_json)
    if (cached) {
      ensure_build_dir()
      fd.slurpwrite(cached, mach_blob)
    }
    return mach_load(mach_blob, env)
  }
  // Fallback: compile from source
  var engine_cm = core_path + '/internal/engine.cm'
  engine_src = text(fd.slurp(engine_cm))
  engine_ast = analyze(engine_src, engine_cm)
  return run_ast('engine', engine_ast, env)
}

// Detect mode and route
// CLI mode has 'args'; actor spawn mode has 'init'
var program = null
var user_args = []
var _j = 0

if (args != null) {
  // CLI mode — always run as actor program (.ce)
  program = args[0]
  if (!program) {
    print("error: no program specified\n")
    disrupt
  }
  _j = 1
  while (_j < length(args)) {
    push(user_args, args[_j])
    _j = _j + 1
  }

  load_engine({
    os: os, actorsym: actorsym,
    init: {program: program, arg: user_args},
    core_path: core_path, shop_path: shop_path, json: json,
    analyze: analyze, run_ast_fn: run_ast, run_ast_noopt_fn: run_ast_noopt,
    use_cache: use_cache
  })
} else {
  // Actor spawn mode — load engine.cm with full actor env
  load_engine({
    os: os, actorsym: actorsym, init: init,
    core_path: core_path, shop_path: shop_path, json: json, nota: nota, wota: wota,
    analyze: analyze, run_ast_fn: run_ast, run_ast_noopt_fn: run_ast_noopt,
    use_cache: use_cache
  })
}