parallel assembly

This commit is contained in:
2026-02-17 10:57:50 -06:00
parent 8c408a4b81
commit 5fcf765c8d
3 changed files with 126 additions and 59 deletions

113
build.cm
View File

@@ -467,6 +467,64 @@ Build.build_static = function(packages, target, output, buildtype) {
// Native .cm compilation (source → mcode → QBE IL → .o → .dylib)
// ============================================================================
// Batched native compilation: split functions into batches, run QBE on each,
// assemble in parallel, return array of .o paths.
// il_parts: {data: text, functions: [text, ...]}
// cc: C compiler path
// tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_<hash>)
function compile_native_batched(il_parts, cc, tmp_prefix) {
var nfuncs = length(il_parts.functions)
var nbatch = 8
var o_paths = []
var s_paths = []
var asm_cmds = []
var batch_fns = null
var batch_il = null
var asm_text = null
var s_path = null
var o_path = null
var end = 0
var bi = 0
var fi = 0
var ai = 0
var rc = null
var parallel_cmd = null
if (nfuncs < nbatch) nbatch = nfuncs
if (nbatch < 1) nbatch = 1
// Generate .s files: run QBE on each batch
while (bi < nbatch) {
batch_fns = []
end = nfuncs * (bi + 1) / nbatch
while (fi < end) {
batch_fns[] = il_parts.functions[fi]
fi = fi + 1
}
batch_il = il_parts.data + "\n\n" + text(batch_fns, "\n")
asm_text = os.qbe(batch_il)
s_path = tmp_prefix + '_b' + text(bi) + '.s'
o_path = tmp_prefix + '_b' + text(bi) + '.o'
fd.slurpwrite(s_path, stone(blob(asm_text)))
s_paths[] = s_path
o_paths[] = o_path
bi = bi + 1
}
// Assemble all batches in parallel
while (ai < length(s_paths)) {
asm_cmds[] = cc + ' -c ' + s_paths[ai] + ' -o ' + o_paths[ai]
ai = ai + 1
}
parallel_cmd = text(asm_cmds, ' & ') + ' & wait'
rc = os.system(parallel_cmd)
if (rc != 0) {
print('Parallel assembly failed'); disrupt
}
return o_paths
}
// Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement)
function qbe_insert_dead_labels(il_text) {
var lines = array(il_text, "\n")
@@ -536,10 +594,8 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
if (pkg) {
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
}
var il = qbe_emit(optimized, qbe_macros, sym_name)
// Step 3: Post-process (insert dead labels)
il = qbe_insert_dead_labels(il)
var il_parts = qbe_emit(optimized, qbe_macros, sym_name)
var il = il_parts.data + "\n\n" + text(il_parts.functions, "\n")
// Content hash for cache key
var hash = content_hash(src + '\n' + _target + '\nnative')
@@ -550,22 +606,14 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
if (fd.is_file(dylib_path))
return dylib_path
// Step 4: QBE compile IR to assembly (in-process)
// Compile and assemble via batched parallel pipeline
var tmp = '/tmp/cell_native_' + hash
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/cell_qbe_rt.o'
var asm_text = os.qbe(il)
fd.slurpwrite(s_path, stone(blob(asm_text)))
var o_paths = compile_native_batched(il_parts, cc, tmp)
// Step 5: Assemble
var rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('Assembly failed for: ' + src_path); disrupt
}
// Step 7: Compile QBE runtime stubs if needed
// Compile QBE runtime stubs if needed
var rc = null
if (!fd.is_file(rt_o_path)) {
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
@@ -574,14 +622,19 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
}
}
// Step 8: Link dylib
// Link dylib
var link_cmd = cc + ' -shared -fPIC'
if (tc.system == 'darwin') {
link_cmd = link_cmd + ' -undefined dynamic_lookup'
} else if (tc.system == 'linux') {
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
}
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
var oi = 0
while (oi < length(o_paths)) {
link_cmd = link_cmd + ' ' + o_paths[oi]
oi = oi + 1
}
link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path
rc = os.system(link_cmd)
if (rc != 0) {
@@ -625,8 +678,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
if (pkg) {
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
}
var il = qbe_emit(optimized, qbe_macros, sym_name)
il = qbe_insert_dead_labels(il)
var il_parts = qbe_emit(optimized, qbe_macros, sym_name)
var src = text(fd.slurp(src_path))
var hash = content_hash(src + '\n' + _target + '\nnative')
@@ -637,19 +689,14 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
if (fd.is_file(dylib_path))
return dylib_path
// Compile and assemble via batched parallel pipeline
var tmp = '/tmp/cell_native_' + hash
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/cell_qbe_rt.o'
var asm_text = os.qbe(il)
fd.slurpwrite(s_path, stone(blob(asm_text)))
var rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('Assembly failed for: ' + src_path); disrupt
}
var o_paths = compile_native_batched(il_parts, cc, tmp)
// Compile QBE runtime stubs if needed
var rc = null
if (!fd.is_file(rt_o_path)) {
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
@@ -658,13 +705,19 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
}
}
// Link dylib
var link_cmd = cc + ' -shared -fPIC'
if (tc.system == 'darwin') {
link_cmd = link_cmd + ' -undefined dynamic_lookup'
} else if (tc.system == 'linux') {
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
}
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
var oi = 0
while (oi < length(o_paths)) {
link_cmd = link_cmd + ' ' + o_paths[oi]
oi = oi + 1
}
link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path
rc = os.system(link_cmd)
if (rc != 0) {

View File

@@ -7,6 +7,7 @@ var build = use('build')
var fd_mod = use('fd')
var os = use('os')
var json = use('json')
var time = use('time')
var show = function(v) {
if (v == null) return "null"
@@ -39,12 +40,28 @@ var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var t0 = time.number()
var src = text(fd_mod.slurp(abs))
var t1 = time.number()
var tok = tokenize(src, abs)
var t2 = time.number()
var ast = parse_mod(tok.tokens, src, abs, tokenize)
var t3 = time.number()
var folded = fold(ast)
var t4 = time.number()
var compiled = mcode_mod(folded)
var t5 = time.number()
var optimized = streamline_mod(compiled)
var t6 = time.number()
print('--- front-end timing ---')
print(' read: ' + text(t1 - t0) + 's')
print(' tokenize: ' + text(t2 - t1) + 's')
print(' parse: ' + text(t3 - t2) + 's')
print(' fold: ' + text(t4 - t3) + 's')
print(' mcode: ' + text(t5 - t4) + 's')
print(' streamline: ' + text(t6 - t5) + 's')
print(' total: ' + text(t6 - t0) + 's')
// Shared env for both paths — only non-intrinsic runtime functions.
// Intrinsics (starts_with, ends_with, logical, some, every, etc.) live on

View File

@@ -127,6 +127,8 @@ var qbe_emit = function(ir, qbe, export_name) {
emit(` storel ${sv}, %${t}`)
}
var needs_exc_ret = false
var refresh_fp = function() {
emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`)
var exc = fresh()
@@ -134,9 +136,8 @@ var qbe_emit = function(ir, qbe, export_name) {
if (has_handler && !in_handler) {
emit(` jnz %${exc}, @disruption_handler, @${exc}_ok`)
} else {
emit(` jnz %${exc}, @${exc}_exc, @${exc}_ok`)
emit(`@${exc}_exc`)
emit(` ret 15`)
needs_exc_ret = true
emit(` jnz %${exc}, @_exc_ret, @${exc}_ok`)
}
emit(`@${exc}_ok`)
}
@@ -161,9 +162,9 @@ var qbe_emit = function(ir, qbe, export_name) {
}
i = i + 1
// Labels are plain strings; skip _nop_ur_ pseudo-labels from streamline
// Labels are plain strings; skip nop pseudo-labels from streamline
if (is_text(instr)) {
if (starts_with(instr, "_nop_ur_")) continue
if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue
lbl = sanitize(instr)
if (!last_was_term) {
emit(` jmp @${lbl}`)
@@ -839,9 +840,8 @@ var qbe_emit = function(ir, qbe, export_name) {
if (has_handler) {
emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`)
} else {
emit(` jnz %${chk}, @${chk}_exc, @${chk}_ok`)
emit(`@${chk}_exc`)
emit(` ret 15`)
needs_exc_ret = true
emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`)
}
emit(`@${chk}_ok`)
refresh_fp()
@@ -857,9 +857,8 @@ var qbe_emit = function(ir, qbe, export_name) {
if (has_handler) {
emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`)
} else {
emit(` jnz %${chk}, @${chk}_exc, @${chk}_ok`)
emit(`@${chk}_exc`)
emit(` ret 15`)
needs_exc_ret = true
emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`)
}
emit(`@${chk}_ok`)
refresh_fp()
@@ -886,9 +885,8 @@ var qbe_emit = function(ir, qbe, export_name) {
refresh_fp()
emit(` ret %${p}`)
} else {
emit(` jnz %${chk}, @${chk}_exc, @${chk}_ok`)
emit(`@${chk}_exc`)
emit(` ret 15`)
needs_exc_ret = true
emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`)
emit(`@${chk}_ok`)
emit(` ret %${p}`)
}
@@ -1028,6 +1026,12 @@ var qbe_emit = function(ir, qbe, export_name) {
emit(` call $cell_rt_disrupt(l %ctx)`)
emit(` ret 15`)
// Shared exception return (for functions without disruption handler)
if (needs_exc_ret) {
emit("@_exc_ret")
emit(" ret 15")
}
emit("}")
emit("")
}
@@ -1036,30 +1040,23 @@ var qbe_emit = function(ir, qbe, export_name) {
// Main: compile all functions then main
// ============================================================
var fn_bodies = []
var fi = 0
while (fi < length(ir.functions)) {
out = []
compile_fn(ir.functions[fi], fi, false)
fn_bodies[] = text(out, "\n")
fi = fi + 1
}
out = []
compile_fn(ir.main, -1, true)
fn_bodies[] = text(out, "\n")
// Assemble: data section first, then function bodies
var result = []
var di = 0
while (di < length(data_out)) {
push(result, data_out[di])
di = di + 1
return {
data: text(data_out, "\n"),
functions: fn_bodies
}
if (length(data_out) > 0) push(result, "")
di = 0
while (di < length(out)) {
push(result, out[di])
di = di + 1
}
return text(result, "\n")
}
return qbe_emit