diff --git a/build.cm b/build.cm index 69d775b0..8ed32357 100644 --- a/build.cm +++ b/build.cm @@ -467,6 +467,64 @@ Build.build_static = function(packages, target, output, buildtype) { // Native .cm compilation (source → mcode → QBE IL → .o → .dylib) // ============================================================================ +// Batched native compilation: split functions into batches, run QBE on each, +// assemble in parallel, return array of .o paths. +// il_parts: {data: text, functions: [text, ...]} +// cc: C compiler path +// tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_) +function compile_native_batched(il_parts, cc, tmp_prefix) { + var nfuncs = length(il_parts.functions) + var nbatch = 8 + var o_paths = [] + var s_paths = [] + var asm_cmds = [] + var batch_fns = null + var batch_il = null + var asm_text = null + var s_path = null + var o_path = null + var end = 0 + var bi = 0 + var fi = 0 + var ai = 0 + var rc = null + var parallel_cmd = null + + if (nfuncs < nbatch) nbatch = nfuncs + if (nbatch < 1) nbatch = 1 + + // Generate .s files: run QBE on each batch + while (bi < nbatch) { + batch_fns = [] + end = nfuncs * (bi + 1) / nbatch + while (fi < end) { + batch_fns[] = il_parts.functions[fi] + fi = fi + 1 + } + batch_il = il_parts.data + "\n\n" + text(batch_fns, "\n") + asm_text = os.qbe(batch_il) + s_path = tmp_prefix + '_b' + text(bi) + '.s' + o_path = tmp_prefix + '_b' + text(bi) + '.o' + fd.slurpwrite(s_path, stone(blob(asm_text))) + s_paths[] = s_path + o_paths[] = o_path + bi = bi + 1 + } + + // Assemble all batches in parallel + while (ai < length(s_paths)) { + asm_cmds[] = cc + ' -c ' + s_paths[ai] + ' -o ' + o_paths[ai] + ai = ai + 1 + } + parallel_cmd = text(asm_cmds, ' & ') + ' & wait' + rc = os.system(parallel_cmd) + if (rc != 0) { + print('Parallel assembly failed'); disrupt + } + + return o_paths +} + // Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement) function qbe_insert_dead_labels(il_text) { var lines = array(il_text, "\n") @@ -536,10 +594,8 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { if (pkg) { sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path)) } - var il = qbe_emit(optimized, qbe_macros, sym_name) - - // Step 3: Post-process (insert dead labels) - il = qbe_insert_dead_labels(il) + var il_parts = qbe_emit(optimized, qbe_macros, sym_name) + var il = il_parts.data + "\n\n" + text(il_parts.functions, "\n") // Content hash for cache key var hash = content_hash(src + '\n' + _target + '\nnative') @@ -550,22 +606,14 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { if (fd.is_file(dylib_path)) return dylib_path - // Step 4: QBE compile IR to assembly (in-process) + // Compile and assemble via batched parallel pipeline var tmp = '/tmp/cell_native_' + hash - var s_path = tmp + '.s' - var o_path = tmp + '.o' var rt_o_path = '/tmp/cell_qbe_rt.o' - var asm_text = os.qbe(il) - fd.slurpwrite(s_path, stone(blob(asm_text))) + var o_paths = compile_native_batched(il_parts, cc, tmp) - // Step 5: Assemble - var rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path) - if (rc != 0) { - print('Assembly failed for: ' + src_path); disrupt - } - - // Step 7: Compile QBE runtime stubs if needed + // Compile QBE runtime stubs if needed + var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') @@ -574,14 +622,19 @@ Build.compile_native = function(src_path, target, buildtype, pkg) { } } - // Step 8: Link dylib + // Link dylib var link_cmd = cc + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined' } - link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path + var oi = 0 + while (oi < length(o_paths)) { + link_cmd = link_cmd + ' ' + o_paths[oi] + oi = oi + 1 + } + link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path rc = os.system(link_cmd) if (rc != 0) { @@ -625,8 +678,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) { if (pkg) { sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path)) } - var il = qbe_emit(optimized, qbe_macros, sym_name) - il = qbe_insert_dead_labels(il) + var il_parts = qbe_emit(optimized, qbe_macros, sym_name) var src = text(fd.slurp(src_path)) var hash = content_hash(src + '\n' + _target + '\nnative') @@ -637,19 +689,14 @@ Build.compile_native_ir = function(optimized, src_path, opts) { if (fd.is_file(dylib_path)) return dylib_path + // Compile and assemble via batched parallel pipeline var tmp = '/tmp/cell_native_' + hash - var s_path = tmp + '.s' - var o_path = tmp + '.o' var rt_o_path = '/tmp/cell_qbe_rt.o' - var asm_text = os.qbe(il) - fd.slurpwrite(s_path, stone(blob(asm_text))) - - var rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path) - if (rc != 0) { - print('Assembly failed for: ' + src_path); disrupt - } + var o_paths = compile_native_batched(il_parts, cc, tmp) + // Compile QBE runtime stubs if needed + var rc = null if (!fd.is_file(rt_o_path)) { qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c' rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC') @@ -658,13 +705,19 @@ Build.compile_native_ir = function(optimized, src_path, opts) { } } + // Link dylib var link_cmd = cc + ' -shared -fPIC' if (tc.system == 'darwin') { link_cmd = link_cmd + ' -undefined dynamic_lookup' } else if (tc.system == 'linux') { link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined' } - link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path + var oi = 0 + while (oi < length(o_paths)) { + link_cmd = link_cmd + ' ' + o_paths[oi] + oi = oi + 1 + } + link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path rc = os.system(link_cmd) if (rc != 0) { diff --git a/compare_aot.ce b/compare_aot.ce index 35b2bcbc..e0539e73 100644 --- a/compare_aot.ce +++ b/compare_aot.ce @@ -7,6 +7,7 @@ var build = use('build') var fd_mod = use('fd') var os = use('os') var json = use('json') +var time = use('time') var show = function(v) { if (v == null) return "null" @@ -39,12 +40,28 @@ var fold = use('fold') var mcode_mod = use('mcode') var streamline_mod = use('streamline') +var t0 = time.number() var src = text(fd_mod.slurp(abs)) +var t1 = time.number() var tok = tokenize(src, abs) +var t2 = time.number() var ast = parse_mod(tok.tokens, src, abs, tokenize) +var t3 = time.number() var folded = fold(ast) +var t4 = time.number() var compiled = mcode_mod(folded) +var t5 = time.number() var optimized = streamline_mod(compiled) +var t6 = time.number() + +print('--- front-end timing ---') +print(' read: ' + text(t1 - t0) + 's') +print(' tokenize: ' + text(t2 - t1) + 's') +print(' parse: ' + text(t3 - t2) + 's') +print(' fold: ' + text(t4 - t3) + 's') +print(' mcode: ' + text(t5 - t4) + 's') +print(' streamline: ' + text(t6 - t5) + 's') +print(' total: ' + text(t6 - t0) + 's') // Shared env for both paths — only non-intrinsic runtime functions. // Intrinsics (starts_with, ends_with, logical, some, every, etc.) live on diff --git a/qbe_emit.cm b/qbe_emit.cm index faa301c9..c0f7f130 100644 --- a/qbe_emit.cm +++ b/qbe_emit.cm @@ -127,6 +127,8 @@ var qbe_emit = function(ir, qbe, export_name) { emit(` storel ${sv}, %${t}`) } + var needs_exc_ret = false + var refresh_fp = function() { emit(` %fp =l call $cell_rt_refresh_fp_checked(l %ctx)`) var exc = fresh() @@ -134,9 +136,8 @@ var qbe_emit = function(ir, qbe, export_name) { if (has_handler && !in_handler) { emit(` jnz %${exc}, @disruption_handler, @${exc}_ok`) } else { - emit(` jnz %${exc}, @${exc}_exc, @${exc}_ok`) - emit(`@${exc}_exc`) - emit(` ret 15`) + needs_exc_ret = true + emit(` jnz %${exc}, @_exc_ret, @${exc}_ok`) } emit(`@${exc}_ok`) } @@ -161,9 +162,9 @@ var qbe_emit = function(ir, qbe, export_name) { } i = i + 1 - // Labels are plain strings; skip _nop_ur_ pseudo-labels from streamline + // Labels are plain strings; skip nop pseudo-labels from streamline if (is_text(instr)) { - if (starts_with(instr, "_nop_ur_")) continue + if (starts_with(instr, "_nop_ur_") || starts_with(instr, "_nop_tc_")) continue lbl = sanitize(instr) if (!last_was_term) { emit(` jmp @${lbl}`) @@ -839,9 +840,8 @@ var qbe_emit = function(ir, qbe, export_name) { if (has_handler) { emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) } else { - emit(` jnz %${chk}, @${chk}_exc, @${chk}_ok`) - emit(`@${chk}_exc`) - emit(` ret 15`) + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) } emit(`@${chk}_ok`) refresh_fp() @@ -857,9 +857,8 @@ var qbe_emit = function(ir, qbe, export_name) { if (has_handler) { emit(` jnz %${chk}, @disruption_handler, @${chk}_ok`) } else { - emit(` jnz %${chk}, @${chk}_exc, @${chk}_ok`) - emit(`@${chk}_exc`) - emit(` ret 15`) + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) } emit(`@${chk}_ok`) refresh_fp() @@ -886,9 +885,8 @@ var qbe_emit = function(ir, qbe, export_name) { refresh_fp() emit(` ret %${p}`) } else { - emit(` jnz %${chk}, @${chk}_exc, @${chk}_ok`) - emit(`@${chk}_exc`) - emit(` ret 15`) + needs_exc_ret = true + emit(` jnz %${chk}, @_exc_ret, @${chk}_ok`) emit(`@${chk}_ok`) emit(` ret %${p}`) } @@ -1028,6 +1026,12 @@ var qbe_emit = function(ir, qbe, export_name) { emit(` call $cell_rt_disrupt(l %ctx)`) emit(` ret 15`) + // Shared exception return (for functions without disruption handler) + if (needs_exc_ret) { + emit("@_exc_ret") + emit(" ret 15") + } + emit("}") emit("") } @@ -1036,30 +1040,23 @@ var qbe_emit = function(ir, qbe, export_name) { // Main: compile all functions then main // ============================================================ + var fn_bodies = [] var fi = 0 while (fi < length(ir.functions)) { + out = [] compile_fn(ir.functions[fi], fi, false) + fn_bodies[] = text(out, "\n") fi = fi + 1 } + out = [] compile_fn(ir.main, -1, true) + fn_bodies[] = text(out, "\n") - // Assemble: data section first, then function bodies - var result = [] - var di = 0 - while (di < length(data_out)) { - push(result, data_out[di]) - di = di + 1 + return { + data: text(data_out, "\n"), + functions: fn_bodies } - if (length(data_out) > 0) push(result, "") - - di = 0 - while (di < length(out)) { - push(result, out[di]) - di = di + 1 - } - - return text(result, "\n") } return qbe_emit