Merge branch 'fix_native_suite'

This commit is contained in:
2026-02-17 12:35:20 -06:00
163 changed files with 77441 additions and 506 deletions

128
build.cm
View File

@@ -490,6 +490,69 @@ Build.build_static = function(packages, target, output, buildtype) {
// Native .cm compilation (source → mcode → QBE IL → .o → .dylib)
// ============================================================================
// Batched native compilation: split functions into batches, run QBE on each,
// assemble in parallel, return array of .o paths.
// il_parts: {data: text, functions: [text, ...]}
// cc: C compiler path
// tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_<hash>)
function compile_native_batched(il_parts, cc, tmp_prefix) {
var nfuncs = length(il_parts.functions)
var nbatch = 8
var o_paths = []
var s_paths = []
var asm_cmds = []
var batch_fns = null
var batch_il = null
var asm_text = null
var s_path = null
var o_path = null
var end = 0
var bi = 0
var fi = 0
var ai = 0
var rc = null
var parallel_cmd = null
var helpers_il = (il_parts.helpers && length(il_parts.helpers) > 0)
? text(il_parts.helpers, "\n") : ""
var prefix = null
if (nfuncs < nbatch) nbatch = nfuncs
if (nbatch < 1) nbatch = 1
// Generate .s files: run QBE on each batch
while (bi < nbatch) {
batch_fns = []
end = nfuncs * (bi + 1) / nbatch
while (fi < end) {
batch_fns[] = il_parts.functions[fi]
fi = fi + 1
}
// Batch 0 includes helper functions; others reference them as external symbols
prefix = (bi == 0 && helpers_il != "") ? helpers_il + "\n\n" : ""
batch_il = il_parts.data + "\n\n" + prefix + text(batch_fns, "\n")
asm_text = os.qbe(batch_il)
s_path = tmp_prefix + '_b' + text(bi) + '.s'
o_path = tmp_prefix + '_b' + text(bi) + '.o'
fd.slurpwrite(s_path, stone(blob(asm_text)))
s_paths[] = s_path
o_paths[] = o_path
bi = bi + 1
}
// Assemble all batches in parallel
while (ai < length(s_paths)) {
asm_cmds[] = cc + ' -c ' + s_paths[ai] + ' -o ' + o_paths[ai]
ai = ai + 1
}
parallel_cmd = text(asm_cmds, ' & ') + ' & wait'
rc = os.system(parallel_cmd)
if (rc != 0) {
print('Parallel assembly failed'); disrupt
}
return o_paths
}
// Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement)
function qbe_insert_dead_labels(il_text) {
var lines = array(il_text, "\n")
@@ -559,10 +622,7 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
if (pkg) {
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
}
var il = qbe_emit(optimized, qbe_macros, sym_name)
// Step 3: Post-process (insert dead labels)
il = qbe_insert_dead_labels(il)
var il_parts = qbe_emit(optimized, qbe_macros, sym_name)
// Content hash for cache key
var hash = content_hash(src + '\n' + _target + '\nnative')
@@ -573,28 +633,14 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
if (fd.is_file(dylib_path))
return dylib_path
// Step 4: Write QBE IL to temp file
// Compile and assemble via batched parallel pipeline
var tmp = '/tmp/cell_native_' + hash
var ssa_path = tmp + '.ssa'
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/cell_qbe_rt.o'
fd.slurpwrite(ssa_path, stone(blob(il)))
var o_paths = compile_native_batched(il_parts, cc, tmp)
// Step 5: QBE compile to assembly
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
if (rc != 0) {
print('QBE compilation failed for: ' + src_path); disrupt
}
// Step 6: Assemble
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('Assembly failed for: ' + src_path); disrupt
}
// Step 7: Compile QBE runtime stubs if needed
// Compile QBE runtime stubs if needed
var rc = null
if (!fd.is_file(rt_o_path)) {
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
@@ -603,14 +649,19 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
}
}
// Step 8: Link dylib
// Link dylib
var link_cmd = cc + ' -shared -fPIC'
if (tc.system == 'darwin') {
link_cmd = link_cmd + ' -undefined dynamic_lookup'
} else if (tc.system == 'linux') {
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
}
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
var oi = 0
while (oi < length(o_paths)) {
link_cmd = link_cmd + ' ' + o_paths[oi]
oi = oi + 1
}
link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path
rc = os.system(link_cmd)
if (rc != 0) {
@@ -654,8 +705,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
if (pkg) {
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
}
var il = qbe_emit(optimized, qbe_macros, sym_name)
il = qbe_insert_dead_labels(il)
var il_parts = qbe_emit(optimized, qbe_macros, sym_name)
var src = text(fd.slurp(src_path))
var hash = content_hash(src + '\n' + _target + '\nnative')
@@ -666,24 +716,14 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
if (fd.is_file(dylib_path))
return dylib_path
// Compile and assemble via batched parallel pipeline
var tmp = '/tmp/cell_native_' + hash
var ssa_path = tmp + '.ssa'
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/cell_qbe_rt.o'
fd.slurpwrite(ssa_path, stone(blob(il)))
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
if (rc != 0) {
print('QBE compilation failed for: ' + src_path); disrupt
}
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('Assembly failed for: ' + src_path); disrupt
}
var o_paths = compile_native_batched(il_parts, cc, tmp)
// Compile QBE runtime stubs if needed
var rc = null
if (!fd.is_file(rt_o_path)) {
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
@@ -692,13 +732,19 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
}
}
// Link dylib
var link_cmd = cc + ' -shared -fPIC'
if (tc.system == 'darwin') {
link_cmd = link_cmd + ' -undefined dynamic_lookup'
} else if (tc.system == 'linux') {
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
}
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
var oi = 0
while (oi < length(o_paths)) {
link_cmd = link_cmd + ' ' + o_paths[oi]
oi = oi + 1
}
link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path
rc = os.system(link_cmd)
if (rc != 0) {

View File

@@ -7,6 +7,7 @@ var build = use('build')
var fd_mod = use('fd')
var os = use('os')
var json = use('json')
var time = use('time')
var show = function(v) {
if (v == null) return "null"
@@ -39,12 +40,28 @@ var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var t0 = time.number()
var src = text(fd_mod.slurp(abs))
var t1 = time.number()
var tok = tokenize(src, abs)
var t2 = time.number()
var ast = parse_mod(tok.tokens, src, abs, tokenize)
var t3 = time.number()
var folded = fold(ast)
var t4 = time.number()
var compiled = mcode_mod(folded)
var t5 = time.number()
var optimized = streamline_mod(compiled)
var t6 = time.number()
print('--- front-end timing ---')
print(' read: ' + text(t1 - t0) + 's')
print(' tokenize: ' + text(t2 - t1) + 's')
print(' parse: ' + text(t3 - t2) + 's')
print(' fold: ' + text(t4 - t3) + 's')
print(' mcode: ' + text(t5 - t4) + 's')
print(' streamline: ' + text(t6 - t5) + 's')
print(' total: ' + text(t6 - t0) + 's')
// Shared env for both paths — only non-intrinsic runtime functions.
// Intrinsics (starts_with, ends_with, logical, some, every, etc.) live on

View File

@@ -447,6 +447,7 @@ static JSValue js_os_dylib_close(JSContext *js, JSValue self, int argc, JSValue
Uses cell_rt_native_module_load from qbe_helpers.c */
extern JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env);
extern JSValue cell_rt_native_module_load_named(JSContext *ctx, void *dl_handle, const char *sym_name, JSValue env);
extern JSValue js_os_qbe(JSContext *, JSValue, int, JSValue *);
static JSValue js_os_native_module_load(JSContext *js, JSValue self, int argc, JSValue *argv)
{
@@ -663,6 +664,7 @@ static const JSCFunctionListEntry js_os_funcs[] = {
MIST_FUNC_DEF(os, print, 1),
MIST_FUNC_DEF(os, random, 0),
MIST_FUNC_DEF(os, getenv, 1),
MIST_FUNC_DEF(os, qbe, 1),
};
JSValue js_core_os_use(JSContext *js) {

View File

@@ -60,6 +60,7 @@ src += [ # core
src += ['scheduler.c']
src += ['qbe_helpers.c']
src += ['qbe_backend.c']
scripts = [
'debug/js.c',
@@ -84,18 +85,67 @@ foreach file: scripts
endforeach
srceng = 'source'
includes = [srceng, 'internal', 'debug', 'net', 'archive']
includes = [srceng, 'internal', 'debug', 'net', 'archive', 'src/qbe']
foreach file : src
full_path = join_paths(srceng, file)
sources += files(full_path)
endforeach
# QBE compiler sources (all except main.c)
# Built as a separate static library to avoid -x objective-c on macOS
# (QBE uses 'Class' as a struct name, which conflicts with ObjC)
qbe_src = [
'src/qbe/util.c',
'src/qbe/parse.c',
'src/qbe/abi.c',
'src/qbe/cfg.c',
'src/qbe/mem.c',
'src/qbe/ssa.c',
'src/qbe/alias.c',
'src/qbe/load.c',
'src/qbe/copy.c',
'src/qbe/fold.c',
'src/qbe/gvn.c',
'src/qbe/gcm.c',
'src/qbe/simpl.c',
'src/qbe/ifopt.c',
'src/qbe/live.c',
'src/qbe/spill.c',
'src/qbe/rega.c',
'src/qbe/emit.c',
'src/qbe/amd64/targ.c',
'src/qbe/amd64/sysv.c',
'src/qbe/amd64/isel.c',
'src/qbe/amd64/emit.c',
'src/qbe/amd64/winabi.c',
'src/qbe/arm64/targ.c',
'src/qbe/arm64/abi.c',
'src/qbe/arm64/isel.c',
'src/qbe/arm64/emit.c',
'src/qbe/rv64/targ.c',
'src/qbe/rv64/abi.c',
'src/qbe/rv64/isel.c',
'src/qbe/rv64/emit.c',
]
qbe_files = []
foreach file : qbe_src
qbe_files += files(file)
endforeach
includers = []
foreach inc : includes
includers += include_directories(inc)
endforeach
qbe_c_args = ['-x', 'c']
qbe_lib = static_library('qbe',
qbe_files,
include_directories: includers,
c_args: qbe_c_args,
)
if host_machine.system() == 'windows'
exe_ext = '.exe'
link += '-Wl,--export-all-symbols'
@@ -109,6 +159,7 @@ cell_so = shared_library(
sources,
include_directories: includers,
dependencies: deps,
link_whole: qbe_lib,
install : true,
)

7
qbe.cm
View File

@@ -519,12 +519,9 @@ var ne_bool = function(p, a, b) {
`
}
// --- Type guard: is_identical ---
// --- Type guard: is_identical (chases forwarding pointers via C helper) ---
var is_identical = function(p, a, b) {
return ` %${p}.cr =w ceql ${a}, ${b}
%${p}.crext =l extuw %${p}.cr
%${p}.sh =l shl %${p}.crext, 5
%${p} =l or %${p}.sh, 3
return ` %${p} =l call $cell_rt_is_identical(l %ctx, l ${a}, l ${b})
`
}

File diff suppressed because it is too large Load Diff

172
source/qbe_backend.c Normal file
View File

@@ -0,0 +1,172 @@
/*
* QBE Backend — in-process QBE IR → assembly compilation.
*
* Wraps QBE as a library: feeds IR text via fmemopen(), captures
* assembly output via open_memstream(), returns it as a JS string.
* No subprocess, no temp files for IR, no external qbe binary needed.
*/
#include "cell.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* QBE headers */
#include "all.h"
#include "config.h"
/* QBE globals (declared extern in all.h) */
Target T;
char debug['Z'+1] = {0};
extern Target T_amd64_sysv;
extern Target T_amd64_apple;
extern Target T_amd64_win;
extern Target T_arm64;
extern Target T_arm64_apple;
extern Target T_rv64;
/* Captured output stream — set before calling parse() */
static FILE *qbe_outf;
static void qbe_data(Dat *d) {
emitdat(d, qbe_outf);
if (d->type == DEnd) {
fputs("/* end data */\n\n", qbe_outf);
freeall();
}
}
static void qbe_func(Fn *fn) {
uint n;
T.abi0(fn);
fillcfg(fn);
filluse(fn);
promote(fn);
filluse(fn);
ssa(fn);
filluse(fn);
ssacheck(fn);
fillalias(fn);
loadopt(fn);
filluse(fn);
fillalias(fn);
coalesce(fn);
filluse(fn);
filldom(fn);
ssacheck(fn);
gvn(fn);
fillcfg(fn);
simplcfg(fn);
filluse(fn);
filldom(fn);
gcm(fn);
filluse(fn);
ssacheck(fn);
if (T.cansel) {
ifconvert(fn);
fillcfg(fn);
filluse(fn);
filldom(fn);
ssacheck(fn);
}
T.abi1(fn);
simpl(fn);
fillcfg(fn);
filluse(fn);
T.isel(fn);
fillcfg(fn);
filllive(fn);
fillloop(fn);
fillcost(fn);
spill(fn);
rega(fn);
fillcfg(fn);
simpljmp(fn);
fillcfg(fn);
assert(fn->rpo[0] == fn->start);
for (n = 0;; n++)
if (n == fn->nblk - 1) {
fn->rpo[n]->link = 0;
break;
} else
fn->rpo[n]->link = fn->rpo[n+1];
T.emitfn(fn, qbe_outf);
fprintf(qbe_outf, "/* end function %s */\n\n", fn->name);
freeall();
}
static void qbe_dbgfile(char *fn) {
emitdbgfile(fn, qbe_outf);
}
/*
* js_os_qbe(ctx, self, argc, argv)
*
* Takes a single string argument (QBE IR text).
* Returns the compiled assembly as a string.
*/
JSValue js_os_qbe(JSContext *js, JSValue self, int argc, JSValue *argv) {
if (argc < 1)
return JS_ThrowTypeError(js, "os.qbe requires an IR string argument");
const char *ir = JS_ToCString(js, argv[0]);
if (!ir)
return JS_EXCEPTION;
size_t ir_len = strlen(ir);
/* Select target for host platform */
#if defined(__APPLE__) && defined(__aarch64__)
T = T_arm64_apple;
#elif defined(__APPLE__) && defined(__x86_64__)
T = T_amd64_apple;
#elif defined(_WIN32) && defined(__x86_64__)
T = T_amd64_win;
#elif defined(__x86_64__)
T = T_amd64_sysv;
#elif defined(__aarch64__)
T = T_arm64;
#elif defined(__riscv) && __riscv_xlen == 64
T = T_rv64;
#else
T = Deftgt;
#endif
memset(debug, 0, sizeof(debug));
/* Open IR string as input FILE */
FILE *inf = fmemopen((void *)ir, ir_len, "r");
if (!inf) {
JS_FreeCString(js, ir);
return JS_ThrowInternalError(js, "os.qbe: fmemopen failed");
}
/* Open output memory stream */
char *out_buf = NULL;
size_t out_len = 0;
qbe_outf = open_memstream(&out_buf, &out_len);
if (!qbe_outf) {
fclose(inf);
JS_FreeCString(js, ir);
return JS_ThrowInternalError(js, "os.qbe: open_memstream failed");
}
/* Run the QBE pipeline */
parse(inf, "<ir>", qbe_dbgfile, qbe_data, qbe_func);
fclose(inf);
/* Finalize (emit assembler directives) */
T.emitfin(qbe_outf);
fflush(qbe_outf);
fclose(qbe_outf);
qbe_outf = NULL;
JS_FreeCString(js, ir);
/* Return assembly text */
JSValue result = JS_NewStringLen(js, out_buf, out_len);
free(out_buf);
return result;
}

View File

@@ -222,6 +222,16 @@ JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b) {
/* --- Property access --- */
JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) {
if (JS_IsFunction(obj)) {
JS_ThrowTypeError(ctx, "cannot read property of function");
return JS_EXCEPTION;
}
return JS_GetPropertyStr(ctx, obj, name);
}
/* Like cell_rt_load_field but without the function guard.
Used by load_dynamic when the key happens to be a static string. */
JSValue cell_rt_load_prop_str(JSContext *ctx, JSValue obj, const char *name) {
return JS_GetPropertyStr(ctx, obj, name);
}
@@ -238,10 +248,15 @@ JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) {
void cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj,
JSValue key) {
if (JS_IsInt(key))
if (JS_IsInt(key)) {
JS_SetPropertyNumber(ctx, obj, (uint32_t)JS_VALUE_GET_INT(key), val);
else
} else if (JS_IsArray(obj) && !JS_IsInt(key)) {
JS_ThrowTypeError(ctx, "array index must be a number");
} else if (JS_IsBool(key) || JS_IsNull(key) || JS_IsArray(key) || JS_IsFunction(key)) {
JS_ThrowTypeError(ctx, "object key must be text");
} else {
JS_SetProperty(ctx, obj, key, val);
}
}
JSValue cell_rt_load_index(JSContext *ctx, JSValue arr, JSValue idx) {
@@ -466,7 +481,8 @@ static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val,
return result;
}
JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp) {
JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp,
int64_t nr_args) {
(void)outer_fp;
if (g_native_fn_count >= MAX_NATIVE_FN)
return JS_ThrowTypeError(ctx, "too many native functions (max %d)", MAX_NATIVE_FN);
@@ -487,7 +503,7 @@ JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp) {
}
return JS_NewCFunction2(ctx, (JSCFunction *)cell_fn_trampoline, "native_fn",
255, JS_CFUNC_generic_magic, global_id);
(int)nr_args, JS_CFUNC_generic_magic, global_id);
}
/* --- Frame-based function calling --- */
@@ -515,15 +531,35 @@ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
int nr_slots = (int)objhdr_cap56(fr->header);
int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
JSValue fn_val = fr->function;
/* Copy args to C stack */
JSValue args[c_argc > 0 ? c_argc : 1];
for (int i = 0; i < c_argc; i++)
args[i] = fr->slots[i + 1];
if (!JS_IsFunction(fn_val)) {
JS_ThrowTypeError(ctx, "not a function");
return JS_EXCEPTION;
}
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
JSValue result;
if (fn->kind == JS_FUNC_KIND_C) {
/* Match MACH_INVOKE: C functions go directly to js_call_c_function,
bypassing JS_Call's arity check. Extra args are silently available. */
result = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]);
} else {
/* Register/bytecode functions — use JS_CallInternal (no arity gate) */
JSValue args[c_argc > 0 ? c_argc : 1];
for (int i = 0; i < c_argc; i++)
args[i] = fr->slots[i + 1];
result = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, args, 0);
}
JSValue result = JS_Call(ctx, fr->function, fr->slots[0], c_argc, args);
if (JS_IsException(result))
return JS_EXCEPTION;
/* Clear any stale exception left by functions that returned a valid
value despite internal error (e.g., sign("text") returns null
but JS_ToFloat64 leaves an exception flag) */
if (JS_HasException(ctx))
JS_GetException(ctx);
return result;
}
@@ -549,6 +585,16 @@ JSValue cell_rt_pop(JSContext *ctx, JSValue arr) {
JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key) {
int ret = JS_DeleteProperty(ctx, obj, key);
if (ret < 0)
return JS_EXCEPTION;
return JS_NewBool(ctx, ret >= 0);
}
JSValue cell_rt_delete_str(JSContext *ctx, JSValue obj, const char *name) {
JSValue key = JS_NewString(ctx, name);
int ret = JS_DeleteProperty(ctx, obj, key);
if (ret < 0)
return JS_EXCEPTION;
return JS_NewBool(ctx, ret >= 0);
}
@@ -595,12 +641,37 @@ JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b) {
return JS_NewBool(ctx, r);
}
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b) {
return JS_NewBool(ctx, a == b);
static int cell_rt_tol_eq_inner(JSContext *ctx, JSValue a, JSValue b,
JSValue tol) {
if (JS_IsNumber(a) && JS_IsNumber(b) && JS_IsNumber(tol)) {
double da, db, dt;
JS_ToFloat64(ctx, &da, a);
JS_ToFloat64(ctx, &db, b);
JS_ToFloat64(ctx, &dt, tol);
return fabs(da - db) <= dt;
}
if (JS_IsText(a) && JS_IsText(b) && JS_IsBool(tol) && JS_VALUE_GET_BOOL(tol)) {
return js_string_compare_value_nocase(ctx, a, b) == 0;
}
/* Fallback to standard equality */
if (a == b) return 1;
if (JS_IsText(a) && JS_IsText(b))
return js_string_compare_value(ctx, a, b, 1) == 0;
if (JS_IsNumber(a) && JS_IsNumber(b)) {
double da, db;
JS_ToFloat64(ctx, &da, a);
JS_ToFloat64(ctx, &db, b);
return da == db;
}
return 0;
}
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b) {
return JS_NewBool(ctx, a != b);
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b, JSValue tol) {
return JS_NewBool(ctx, cell_rt_tol_eq_inner(ctx, a, b, tol));
}
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b, JSValue tol) {
return JS_NewBool(ctx, !cell_rt_tol_eq_inner(ctx, a, b, tol));
}
/* --- Type check: is_proxy (function with arity 2) --- */
@@ -612,6 +683,14 @@ int cell_rt_is_proxy(JSContext *ctx, JSValue v) {
return fn->length == 2;
}
/* --- Identity check (chases forwarding pointers) --- */
JSValue cell_rt_is_identical(JSContext *ctx, JSValue a, JSValue b) {
if (JS_IsPtr(a)) a = JS_MKPTR(chase(a));
if (JS_IsPtr(b)) b = JS_MKPTR(chase(b));
return JS_NewBool(ctx, a == b);
}
/* --- Short-circuit and/or (non-allocating) --- */
JSValue cell_rt_and(JSContext *ctx, JSValue left, JSValue right) {

6
src/qbe/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
*.o
qbe
config.h
.comfile
*.out
*~

19
src/qbe/LICENSE Normal file
View File

@@ -0,0 +1,19 @@
© 2015-2026 Quentin Carbonneaux <quentin@c9x.me>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

103
src/qbe/Makefile Normal file
View File

@@ -0,0 +1,103 @@
.POSIX:
.SUFFIXES: .o .c
PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
copy.o fold.o gvn.o gcm.o simpl.o ifopt.o live.o spill.o rega.o \
emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o amd64/winabi.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
OBJ = $(COMMOBJ) $(AMD64OBJ) $(ARM64OBJ) $(RV64OBJ)
SRCALL = $(OBJ:.o=.c)
CC = cc
CFLAGS = -std=c99 -g -Wall -Wextra -Wpedantic
qbe: $(OBJ)
$(CC) $(LDFLAGS) $(OBJ) -o $@
.c.o:
$(CC) $(CFLAGS) -c $< -o $@
$(OBJ): all.h ops.h
$(AMD64OBJ): amd64/all.h
$(ARM64OBJ): arm64/all.h
$(RV64OBJ): rv64/all.h
main.o: config.h
config.h:
@case `uname` in \
*Darwin*) \
case `uname -m` in \
*arm64*) \
echo "#define Deftgt T_arm64_apple";\
;; \
*) \
echo "#define Deftgt T_amd64_apple";\
;; \
esac \
;; \
*) \
case `uname -m` in \
*aarch64*|*arm64*) \
echo "#define Deftgt T_arm64"; \
;; \
*riscv64*) \
echo "#define Deftgt T_rv64"; \
;; \
*) \
echo "#define Deftgt T_amd64_sysv";\
;; \
esac \
;; \
esac > $@
install: qbe
mkdir -p "$(DESTDIR)$(BINDIR)"
install -m755 qbe "$(DESTDIR)$(BINDIR)/qbe"
uninstall:
rm -f "$(DESTDIR)$(BINDIR)/qbe"
clean:
rm -f *.o */*.o qbe
clean-gen: clean
rm -f config.h
check: qbe
tools/test.sh all
check-x86_64: qbe
TARGET=x86_64 tools/test.sh all
check-arm64: qbe
TARGET=arm64 tools/test.sh all
check-rv64: qbe
TARGET=rv64 tools/test.sh all
check-amd64_win: qbe
TARGET=amd64_win tools/test.sh all
src:
@echo $(SRCALL)
80:
@for F in $(SRCALL); \
do \
awk "{ \
gsub(/\\t/, \" \"); \
if (length(\$$0) > $@) \
printf(\"$$F:%d: %s\\n\", NR, \$$0); \
}" < $$F; \
done
wc:
@wc -l $(SRCALL)
.PHONY: clean clean-gen check check-arm64 check-rv64 src 80 wc install uninstall

18
src/qbe/README Normal file
View File

@@ -0,0 +1,18 @@
QBE - Backend Compiler http://c9x.me/compile/
doc/ Documentation.
minic/ An example C frontend for QBE.
tools/ Miscellaneous tools (testing).
test/ Tests.
amd64/
arm64/
rv64/ Architecture-specific code.
The LICENSE file applies to all files distributed.
- Compilation and Installation
Invoke make in this directory to create the executable
file qbe. Install using 'make install', the standard
DESTDIR and PREFIX environment variables are supported.
Alternatively, you may simply copy the qbe binary.

25
src/qbe/abi.c Normal file
View File

@@ -0,0 +1,25 @@
#include "all.h"
/* eliminate sub-word abi op
* variants for targets that
* treat char/short/... as
* words with arbitrary high
* bits
*/
void
elimsb(Fn *fn)
{
Blk *b;
Ins *i;
for (b=fn->start; b; b=b->link) {
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (isargbh(i->op))
i->op = Oarg;
if (isparbh(i->op))
i->op = Opar;
}
if (isretbh(b->jmp.type))
b->jmp.type = Jretw;
}
}

222
src/qbe/alias.c Normal file
View File

@@ -0,0 +1,222 @@
#include "all.h"
void
getalias(Alias *a, Ref r, Fn *fn)
{
Con *c;
switch (rtype(r)) {
default:
die("unreachable");
case RTmp:
*a = fn->tmp[r.val].alias;
if (astack(a->type))
a->type = a->slot->type;
assert(a->type != ABot);
break;
case RCon:
c = &fn->con[r.val];
if (c->type == CAddr) {
a->type = ASym;
a->u.sym = c->sym;
} else
a->type = ACon;
a->offset = c->bits.i;
a->slot = 0;
break;
}
}
int
alias(Ref p, int op, int sp, Ref q, int sq, int *delta, Fn *fn)
{
Alias ap, aq;
int ovlap;
getalias(&ap, p, fn);
getalias(&aq, q, fn);
ap.offset += op;
/* when delta is meaningful (ovlap == 1),
* we do not overflow int because sp and
* sq are bounded by 2^28 */
*delta = ap.offset - aq.offset;
ovlap = ap.offset < aq.offset + sq && aq.offset < ap.offset + sp;
if (astack(ap.type) && astack(aq.type)) {
/* if both are offsets of the same
* stack slot, they alias iif they
* overlap */
if (ap.base == aq.base && ovlap)
return MustAlias;
return NoAlias;
}
if (ap.type == ASym && aq.type == ASym) {
/* they conservatively alias if the
* symbols are different, or they
* alias for sure if they overlap */
if (!symeq(ap.u.sym, aq.u.sym))
return MayAlias;
if (ovlap)
return MustAlias;
return NoAlias;
}
if ((ap.type == ACon && aq.type == ACon)
|| (ap.type == aq.type && ap.base == aq.base)) {
assert(ap.type == ACon || ap.type == AUnk);
/* if they have the same base, we
* can rely on the offsets only */
if (ovlap)
return MustAlias;
return NoAlias;
}
/* if one of the two is unknown
* there may be aliasing unless
* the other is provably local */
if (ap.type == AUnk && aq.type != ALoc)
return MayAlias;
if (aq.type == AUnk && ap.type != ALoc)
return MayAlias;
return NoAlias;
}
int
escapes(Ref r, Fn *fn)
{
Alias *a;
if (rtype(r) != RTmp)
return 1;
a = &fn->tmp[r.val].alias;
return !astack(a->type) || a->slot->type == AEsc;
}
static void
esc(Ref r, Fn *fn)
{
Alias *a;
assert(rtype(r) <= RType);
if (rtype(r) == RTmp) {
a = &fn->tmp[r.val].alias;
if (astack(a->type))
a->slot->type = AEsc;
}
}
static void
store(Ref r, int sz, Fn *fn)
{
Alias *a;
int64_t off;
bits m;
if (rtype(r) == RTmp) {
a = &fn->tmp[r.val].alias;
if (a->slot) {
assert(astack(a->type));
off = a->offset;
if (sz >= NBit
|| (off < 0 || off >= NBit))
m = -1;
else
m = (BIT(sz) - 1) << off;
a->slot->u.loc.m |= m;
}
}
}
void
fillalias(Fn *fn)
{
uint n;
int t, sz;
int64_t x;
Blk *b;
Phi *p;
Ins *i;
Con *c;
Alias *a, a0, a1;
for (t=0; t<fn->ntmp; t++)
fn->tmp[t].alias.type = ABot;
for (n=0; n<fn->nblk; ++n) {
b = fn->rpo[n];
for (p=b->phi; p; p=p->link) {
assert(rtype(p->to) == RTmp);
a = &fn->tmp[p->to.val].alias;
assert(a->type == ABot);
a->type = AUnk;
a->base = p->to.val;
a->offset = 0;
a->slot = 0;
}
for (i=b->ins; i<&b->ins[b->nins]; ++i) {
a = 0;
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
a = &fn->tmp[i->to.val].alias;
assert(a->type == ABot);
if (Oalloc <= i->op && i->op <= Oalloc1) {
a->type = ALoc;
a->slot = a;
a->u.loc.sz = -1;
if (rtype(i->arg[0]) == RCon) {
c = &fn->con[i->arg[0].val];
x = c->bits.i;
if (c->type == CBits)
if (0 <= x && x <= NBit)
a->u.loc.sz = x;
}
} else {
a->type = AUnk;
a->slot = 0;
}
a->base = i->to.val;
a->offset = 0;
}
if (i->op == Ocopy) {
assert(a);
getalias(a, i->arg[0], fn);
}
if (i->op == Oadd) {
getalias(&a0, i->arg[0], fn);
getalias(&a1, i->arg[1], fn);
if (a0.type == ACon) {
*a = a1;
a->offset += a0.offset;
}
else if (a1.type == ACon) {
*a = a0;
a->offset += a1.offset;
}
}
if (req(i->to, R) || a->type == AUnk)
if (i->op != Oblit0) {
if (!isload(i->op))
esc(i->arg[0], fn);
if (!isstore(i->op))
if (i->op != Oargc)
esc(i->arg[1], fn);
}
if (i->op == Oblit0) {
++i;
assert(i->op == Oblit1);
assert(rtype(i->arg[0]) == RInt);
sz = abs(rsval(i->arg[0]));
store((i-1)->arg[1], sz, fn);
}
if (isstore(i->op))
store(i->arg[1], storesz(i), fn);
}
if (b->jmp.type != Jretc)
esc(b->jmp.arg, fn);
}
for (b=fn->start; b; b=b->link)
for (p=b->phi; p; p=p->link)
for (n=0; n<p->narg; n++)
esc(p->arg[n], fn);
}

631
src/qbe/all.h Normal file
View File

@@ -0,0 +1,631 @@
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1]
#define die(...) die_(__FILE__, __VA_ARGS__)
typedef unsigned char uchar;
typedef unsigned int uint;
typedef unsigned long ulong;
typedef unsigned long long bits;
typedef struct BSet BSet;
typedef struct Ref Ref;
typedef struct Op Op;
typedef struct Ins Ins;
typedef struct Phi Phi;
typedef struct Blk Blk;
typedef struct Use Use;
typedef struct Sym Sym;
typedef struct Num Num;
typedef struct Alias Alias;
typedef struct Tmp Tmp;
typedef struct Con Con;
typedef struct Addr Mem;
typedef struct Fn Fn;
typedef struct Typ Typ;
typedef struct Field Field;
typedef struct Dat Dat;
typedef struct Lnk Lnk;
typedef struct Target Target;
enum {
NString = 80,
NIns = 1 << 20,
NAlign = 3,
NField = 32,
NBit = CHAR_BIT * sizeof(bits),
};
struct Target {
char name[16];
char apple;
char windows;
int gpr0; /* first general purpose reg */
int ngpr;
int fpr0; /* first floating point reg */
int nfpr;
bits rglob; /* globally live regs (e.g., sp, fp) */
int nrglob;
int *rsave; /* caller-save */
int nrsave[2];
bits (*retregs)(Ref, int[2]);
bits (*argregs)(Ref, int[2]);
int (*memargs)(int);
void (*abi0)(Fn *);
void (*abi1)(Fn *);
void (*isel)(Fn *);
void (*emitfn)(Fn *, FILE *);
void (*emitfin)(FILE *);
char asloc[4];
char assym[4];
uint cansel:1;
};
#define BIT(n) ((bits)1 << (n))
enum {
RXX = 0,
Tmp0 = NBit, /* first non-reg temporary */
};
struct BSet {
uint nt;
bits *t;
};
struct Ref {
uint type:3;
uint val:29;
};
enum {
RTmp,
RCon,
RInt,
RType, /* last kind to come out of the parser */
RSlot,
RCall,
RMem,
};
#define R (Ref){RTmp, 0}
#define UNDEF (Ref){RCon, 0} /* represents uninitialized data */
#define CON_Z (Ref){RCon, 1}
#define TMP(x) (Ref){RTmp, x}
#define CON(x) (Ref){RCon, x}
#define SLOT(x) (Ref){RSlot, (x)&0x1fffffff}
#define TYPE(x) (Ref){RType, x}
#define CALL(x) (Ref){RCall, x}
#define MEM(x) (Ref){RMem, x}
#define INT(x) (Ref){RInt, (x)&0x1fffffff}
static inline int req(Ref a, Ref b)
{
return a.type == b.type && a.val == b.val;
}
static inline int rtype(Ref r)
{
if (req(r, R))
return -1;
return r.type;
}
static inline int rsval(Ref r)
{
return ((int)r.val ^ 0x10000000) - 0x10000000;
}
enum CmpI {
Cieq,
Cine,
Cisge,
Cisgt,
Cisle,
Cislt,
Ciuge,
Ciugt,
Ciule,
Ciult,
NCmpI,
};
enum CmpF {
Cfeq,
Cfge,
Cfgt,
Cfle,
Cflt,
Cfne,
Cfo,
Cfuo,
NCmpF,
NCmp = NCmpI + NCmpF,
};
enum O {
Oxxx,
#define O(op, x, y) O##op,
#include "ops.h"
NOp,
};
enum J {
Jxxx,
#define JMPS(X) \
X(retw) X(retl) X(rets) X(retd) \
X(retsb) X(retub) X(retsh) X(retuh) \
X(retc) X(ret0) X(jmp) X(jnz) \
X(jfieq) X(jfine) X(jfisge) X(jfisgt) \
X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
X(jfiule) X(jfiult) X(jffeq) X(jffge) \
X(jffgt) X(jffle) X(jfflt) X(jffne) \
X(jffo) X(jffuo) X(hlt)
#define X(j) J##j,
JMPS(X)
#undef X
NJmp
};
enum {
Ocmpw = Oceqw,
Ocmpw1 = Ocultw,
Ocmpl = Oceql,
Ocmpl1 = Ocultl,
Ocmps = Oceqs,
Ocmps1 = Ocuos,
Ocmpd = Oceqd,
Ocmpd1 = Ocuod,
Oalloc = Oalloc4,
Oalloc1 = Oalloc16,
Oflag = Oflagieq,
Oflag1 = Oflagfuo,
Oxsel = Oxselieq,
Oxsel1 = Oxselfuo,
NPubOp = Onop,
Jjf = Jjfieq,
Jjf1 = Jjffuo,
};
#define INRANGE(x, l, u) ((unsigned)(x) - l <= u - l) /* linear in x */
#define isstore(o) INRANGE(o, Ostoreb, Ostored)
#define isload(o) INRANGE(o, Oloadsb, Oload)
#define isalloc(o) INRANGE(o, Oalloc4, Oalloc16)
#define isext(o) INRANGE(o, Oextsb, Oextuw)
#define ispar(o) INRANGE(o, Opar, Opare)
#define isarg(o) INRANGE(o, Oarg, Oargv)
#define isret(j) INRANGE(j, Jretw, Jret0)
#define isparbh(o) INRANGE(o, Oparsb, Oparuh)
#define isargbh(o) INRANGE(o, Oargsb, Oarguh)
#define isretbh(j) INRANGE(j, Jretsb, Jretuh)
#define isxsel(o) INRANGE(o, Oxsel, Oxsel1)
enum {
Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
Kw,
Kl,
Ks,
Kd
};
#define KWIDE(k) ((k)&1)
#define KBASE(k) ((k)>>1)
struct Op {
char *name;
short argcls[2][4];
uint canfold:1;
uint hasid:1; /* op identity value? */
uint idval:1; /* identity value 0/1 */
uint commutes:1; /* commutative op? */
uint assoc:1; /* associative op? */
uint idemp:1; /* idempotent op? */
uint cmpeqwl:1; /* Kl/Kw cmp eq/ne? */
uint cmplgtewl:1; /* Kl/Kw cmp lt/gt/le/ge? */
uint eqval:1; /* 1 for eq; 0 for ne */
uint pinned:1; /* GCM pinned op? */
};
struct Ins {
uint op:30;
uint cls:2;
Ref to;
Ref arg[2];
};
struct Phi {
Ref to;
Ref *arg;
Blk **blk;
uint narg;
short cls;
uint visit:1;
Phi *link;
};
struct Blk {
Phi *phi;
Ins *ins;
uint nins;
struct {
short type;
Ref arg;
} jmp;
Blk *s1;
Blk *s2;
Blk *link;
uint id;
uint visit;
Blk *idom;
Blk *dom, *dlink;
Blk **fron;
uint nfron;
int depth;
Blk **pred;
uint npred;
BSet in[1], out[1], gen[1];
int nlive[2];
int loop;
char name[NString];
};
struct Use {
enum {
UXXX,
UPhi,
UIns,
UJmp,
} type;
uint bid;
union {
Ins *ins;
Phi *phi;
} u;
};
struct Sym {
enum {
SGlo,
SThr,
} type;
uint32_t id;
};
struct Num {
uchar n;
uchar nl, nr;
Ref l, r;
};
enum {
NoAlias,
MayAlias,
MustAlias
};
struct Alias {
enum {
ABot = 0,
ALoc = 1, /* stack local */
ACon = 2,
AEsc = 3, /* stack escaping */
ASym = 4,
AUnk = 6,
#define astack(t) ((t) & 1)
} type;
int base;
int64_t offset;
union {
Sym sym;
struct {
int sz; /* -1 if > NBit */
bits m;
} loc;
} u;
Alias *slot;
};
struct Tmp {
char name[NString];
Ins *def;
Use *use;
uint ndef, nuse;
uint bid; /* id of a defining block */
uint cost;
int slot; /* -1 for unset */
short cls;
struct {
int r; /* register or -1 */
int w; /* weight */
bits m; /* avoid these registers */
} hint;
int phi;
Alias alias;
enum {
WFull,
Wsb, /* must match Oload/Oext order */
Wub,
Wsh,
Wuh,
Wsw,
Wuw
} width;
int visit;
uint gcmbid;
};
struct Con {
enum {
CUndef,
CBits,
CAddr,
} type;
Sym sym;
union {
int64_t i;
double d;
float s;
} bits;
char flt; /* 1 to print as s, 2 to print as d */
};
typedef struct Addr Addr;
struct Addr { /* amd64 addressing */
Con offset;
Ref base;
Ref index;
int scale;
};
struct Lnk {
char export;
char thread;
char common;
char align;
char *sec;
char *secf;
};
struct Fn {
Blk *start;
Tmp *tmp;
Con *con;
Mem *mem;
int ntmp;
int ncon;
int nmem;
uint nblk;
int retty; /* index in typ[], -1 if no aggregate return */
Ref retr;
Blk **rpo;
bits reg;
int slot;
int salign;
char vararg;
char dynalloc;
char leaf;
char name[NString];
Lnk lnk;
};
struct Typ {
char name[NString];
char isdark;
char isunion;
int align;
uint64_t size;
uint nunion;
struct Field {
enum {
FEnd,
Fb,
Fh,
Fw,
Fl,
Fs,
Fd,
FPad,
FTyp,
} type;
uint len; /* or index in typ[] for FTyp */
} (*fields)[NField+1];
};
struct Dat {
enum {
DStart,
DEnd,
DB,
DH,
DW,
DL,
DZ
} type;
char *name;
Lnk *lnk;
union {
int64_t num;
double fltd;
float flts;
char *str;
struct {
char *name;
int64_t off;
} ref;
} u;
char isref;
char isstr;
};
/* main.c */
extern Target T;
extern char debug['Z'+1];
/* util.c */
typedef enum {
PHeap, /* free() necessary */
PFn, /* discarded after processing the function */
} Pool;
extern Typ *typ;
extern Ins insb[NIns], *curi;
uint32_t hash(char *);
void die_(char *, char *, ...) __attribute__((noreturn));
void *emalloc(size_t);
void *alloc(size_t);
void freeall(void);
void *vnew(ulong, size_t, Pool);
void vfree(void *);
void vgrow(void *, ulong);
void addins(Ins **, uint *, Ins *);
void addbins(Ins **, uint *, Blk *);
void strf(char[NString], char *, ...);
uint32_t intern(char *);
char *str(uint32_t);
int argcls(Ins *, int);
int isreg(Ref);
int iscmp(int, int *, int *);
void igroup(Blk *, Ins *, Ins **, Ins **);
void emit(int, int, Ref, Ref, Ref);
void emiti(Ins);
void idup(Blk *, Ins *, ulong);
Ins *icpy(Ins *, Ins *, ulong);
int cmpop(int);
int cmpneg(int);
int cmpwlneg(int);
int clsmerge(short *, short);
int phicls(int, Tmp *);
uint phiargn(Phi *, Blk *);
Ref phiarg(Phi *, Blk *);
Ref newtmp(char *, int, Fn *);
void chuse(Ref, int, Fn *);
int symeq(Sym, Sym);
Ref newcon(Con *, Fn *);
Ref getcon(int64_t, Fn *);
int addcon(Con *, Con *, int);
int isconbits(Fn *fn, Ref r, int64_t *v);
void salloc(Ref, Ref, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
void runmatch(uchar *, Num *, Ref, Ref *);
void bsinit(BSet *, uint);
void bszero(BSet *);
uint bscount(BSet *);
void bsset(BSet *, uint);
void bsclr(BSet *, uint);
void bscopy(BSet *, BSet *);
void bsunion(BSet *, BSet *);
void bsinter(BSet *, BSet *);
void bsdiff(BSet *, BSet *);
int bsequal(BSet *, BSet *);
int bsiter(BSet *, int *);
static inline int
bshas(BSet *bs, uint elt)
{
assert(elt < bs->nt * NBit);
return (bs->t[elt/NBit] & BIT(elt%NBit)) != 0;
}
/* parse.c */
extern Op optab[NOp];
void parse(FILE *, char *, void (char *), void (Dat *), void (Fn *));
void printfn(Fn *, FILE *);
void printref(Ref, Fn *, FILE *);
void err(char *, ...) __attribute__((noreturn));
/* abi.c */
void elimsb(Fn *);
/* cfg.c */
Blk *newblk(void);
void fillpreds(Fn *);
void fillcfg(Fn *);
void filldom(Fn *);
int sdom(Blk *, Blk *);
int dom(Blk *, Blk *);
void fillfron(Fn *);
void loopiter(Fn *, void (*)(Blk *, Blk *));
void filldepth(Fn *);
Blk *lca(Blk *, Blk *);
void fillloop(Fn *);
void simpljmp(Fn *);
int reaches(Fn *, Blk *, Blk *);
int reachesnotvia(Fn *, Blk *, Blk *, Blk *);
int ifgraph(Blk *, Blk **, Blk **, Blk **);
void simplcfg(Fn *);
/* mem.c */
void promote(Fn *);
void coalesce(Fn *);
/* alias.c */
void fillalias(Fn *);
void getalias(Alias *, Ref, Fn *);
int alias(Ref, int, int, Ref, int, int *, Fn *);
int escapes(Ref, Fn *);
/* load.c */
int loadsz(Ins *);
int storesz(Ins *);
void loadopt(Fn *);
/* ssa.c */
void adduse(Tmp *, int, Blk *, ...);
void filluse(Fn *);
void ssa(Fn *);
void ssacheck(Fn *);
/* copy.c */
void narrowpars(Fn *fn);
Ref copyref(Fn *, Blk *, Ins *);
Ref phicopyref(Fn *, Blk *, Phi *);
/* fold.c */
int foldint(Con *, int, int, Con *, Con *);
Ref foldref(Fn *, Ins *);
/* gvn.c */
extern Ref con01[2]; /* 0 and 1 */
int zeroval(Fn *, Blk *, Ref, int, int *);
void gvn(Fn *);
/* gcm.c */
int pinned(Ins *);
void gcm(Fn *);
/* ifopt.c */
void ifconvert(Fn *fn);
/* simpl.c */
void simpl(Fn *);
/* live.c */
void liveon(BSet *, Blk *, Blk *);
void filllive(Fn *);
/* spill.c */
void fillcost(Fn *);
void spill(Fn *);
/* rega.c */
void rega(Fn *);
/* emit.c */
void emitfnlnk(char *, Lnk *, FILE *);
void emitdat(Dat *, FILE *);
void emitdbgfile(char *, FILE *);
void emitdbgloc(uint, uint, FILE *);
int stashbits(bits, int);
void elf_emitfnfin(char *, FILE *);
void elf_emitfin(FILE *);
void macho_emitfin(FILE *);
void pe_emitfin(FILE *);

82
src/qbe/amd64/all.h Normal file
View File

@@ -0,0 +1,82 @@
#include "../all.h"
typedef struct Amd64Op Amd64Op;
enum Amd64Reg {
RAX = RXX+1, /* caller-save */
RCX, /* caller-save */
RDX, /* caller-save */
RSI, /* caller-save on sysv, callee-save on win */
RDI, /* caller-save on sysv, callee-save on win */
R8, /* caller-save */
R9, /* caller-save */
R10, /* caller-save */
R11, /* caller-save */
RBX, /* callee-save */
R12,
R13,
R14,
R15,
RBP, /* globally live */
RSP,
XMM0, /* sse */
XMM1,
XMM2,
XMM3,
XMM4,
XMM5,
XMM6,
XMM7,
XMM8,
XMM9,
XMM10,
XMM11,
XMM12,
XMM13,
XMM14,
XMM15,
NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */
NGPR = RSP - RAX + 1,
NFPS = NFPR,
NGPS_SYSV = R11 - RAX + 1,
NCLR_SYSV = R15 - RBX + 1,
NGPS_WIN = R11 - RAX + 1 - 2, /* -2 for RDI/RDI */
NCLR_WIN = R15 - RBX + 1 + 2, /* +2 for RDI/RDI */
};
MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);
struct Amd64Op {
char nmem;
char zflag;
char lflag;
};
/* targ.c */
extern Amd64Op amd64_op[];
/* sysv.c (abi) */
extern int amd64_sysv_rsave[];
extern int amd64_sysv_rclob[];
bits amd64_sysv_retregs(Ref, int[2]);
bits amd64_sysv_argregs(Ref, int[2]);
void amd64_sysv_abi(Fn *);
/* winabi.c */
extern int amd64_winabi_rsave[];
extern int amd64_winabi_rclob[];
bits amd64_winabi_retregs(Ref, int[2]);
bits amd64_winabi_argregs(Ref, int[2]);
void amd64_winabi_abi(Fn *);
/* isel.c */
void amd64_isel(Fn *);
/* emit.c */
void amd64_sysv_emitfn(Fn *, FILE *);
void amd64_winabi_emitfn(Fn *, FILE *);

844
src/qbe/amd64/emit.c Normal file
View File

@@ -0,0 +1,844 @@
#include "all.h"
typedef struct E E;
struct E {
FILE *f;
Fn *fn;
int fp;
uint64_t fsz;
int nclob;
};
#define CMP(X) \
X(Ciule, "be", "a") \
X(Ciult, "b", "ae") \
X(Cisle, "le", "g") \
X(Cislt, "l", "ge") \
X(Cisgt, "g", "le") \
X(Cisge, "ge", "l") \
X(Ciugt, "a", "be") \
X(Ciuge, "ae", "b") \
X(Cieq, "z", "nz") \
X(Cine, "nz", "z") \
X(NCmpI+Cfle, "be", "a") \
X(NCmpI+Cflt, "b", "ae") \
X(NCmpI+Cfgt, "a", "be") \
X(NCmpI+Cfge, "ae", "b") \
X(NCmpI+Cfo, "np", "p") \
X(NCmpI+Cfuo, "p", "np")
enum {
SLong = 0,
SWord = 1,
SShort = 2,
SByte = 3,
Ki = -1, /* matches Kw and Kl */
Ka = -2, /* matches all classes */
};
/* Instruction format strings:
*
* if the format string starts with -, the instruction
* is assumed to be 3-address and is put in 2-address
* mode using an extra mov if necessary
*
* if the format string starts with +, the same as the
* above applies, but commutativity is also assumed
*
* %k is used to set the class of the instruction,
* it'll expand to "l", "q", "ss", "sd", depending
* on the instruction class
* %0 designates the first argument
* %1 designates the second argument
* %= designates the result
*
* if %k is not used, a prefix to 0, 1, or = must be
* added, it can be:
* M - memory reference
* L - long (64 bits)
* W - word (32 bits)
* H - short (16 bits)
* B - byte (8 bits)
* S - single precision float
* D - double precision float
*/
static struct {
short op;
short cls;
char *fmt;
} omap[] = {
{ Oadd, Ka, "+add%k %1, %=" },
{ Osub, Ka, "-sub%k %1, %=" },
{ Oand, Ki, "+and%k %1, %=" },
{ Oor, Ki, "+or%k %1, %=" },
{ Oxor, Ki, "+xor%k %1, %=" },
{ Osar, Ki, "-sar%k %B1, %=" },
{ Oshr, Ki, "-shr%k %B1, %=" },
{ Oshl, Ki, "-shl%k %B1, %=" },
{ Omul, Ki, "+imul%k %1, %=" },
{ Omul, Ks, "+mulss %1, %=" },
{ Omul, Kd, "+mulsd %1, %=" },
{ Odiv, Ka, "-div%k %1, %=" },
{ Ostorel, Ka, "movq %L0, %M1" },
{ Ostorew, Ka, "movl %W0, %M1" },
{ Ostoreh, Ka, "movw %H0, %M1" },
{ Ostoreb, Ka, "movb %B0, %M1" },
{ Ostores, Ka, "movss %S0, %M1" },
{ Ostored, Ka, "movsd %D0, %M1" },
{ Oload, Ka, "mov%k %M0, %=" },
{ Oloadsw, Kl, "movslq %M0, %L=" },
{ Oloadsw, Kw, "movl %M0, %W=" },
{ Oloaduw, Ki, "movl %M0, %W=" },
{ Oloadsh, Ki, "movsw%k %M0, %=" },
{ Oloaduh, Ki, "movzw%k %M0, %=" },
{ Oloadsb, Ki, "movsb%k %M0, %=" },
{ Oloadub, Ki, "movzb%k %M0, %=" },
{ Oextsw, Kl, "movslq %W0, %L=" },
{ Oextuw, Kl, "movl %W0, %W=" },
{ Oextsh, Ki, "movsw%k %H0, %=" },
{ Oextuh, Ki, "movzw%k %H0, %=" },
{ Oextsb, Ki, "movsb%k %B0, %=" },
{ Oextub, Ki, "movzb%k %B0, %=" },
{ Oexts, Kd, "cvtss2sd %0, %=" },
{ Otruncd, Ks, "cvtsd2ss %0, %=" },
{ Ostosi, Ki, "cvttss2si%k %0, %=" },
{ Odtosi, Ki, "cvttsd2si%k %0, %=" },
{ Oswtof, Ka, "cvtsi2%k %W0, %=" },
{ Osltof, Ka, "cvtsi2%k %L0, %=" },
{ Ocast, Ki, "movq %D0, %L=" },
{ Ocast, Ka, "movq %L0, %D=" },
{ Oaddr, Ki, "lea%k %M0, %=" },
{ Oswap, Ki, "xchg%k %0, %1" },
{ Osign, Kl, "cqto" },
{ Osign, Kw, "cltd" },
{ Oxdiv, Ki, "div%k %0" },
{ Oxidiv, Ki, "idiv%k %0" },
{ Oxcmp, Ks, "ucomiss %S0, %S1" },
{ Oxcmp, Kd, "ucomisd %D0, %D1" },
{ Oxcmp, Ki, "cmp%k %0, %1" },
{ Oxtest, Ki, "test%k %0, %1" },
#define X(c, s, _) \
{ Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
CMP(X)
#undef X
{ Oflagfeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
{ Oflagfne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
{ NOp, 0, 0 }
};
static char cmov[][2][16] = {
#define X(c, s0, s1) \
[c] = { \
"cmov" s0 " %0, %=", \
"cmov" s1 " %1, %=", \
},
CMP(X)
#undef X
};
static char *rname[][4] = {
[RAX] = {"rax", "eax", "ax", "al"},
[RBX] = {"rbx", "ebx", "bx", "bl"},
[RCX] = {"rcx", "ecx", "cx", "cl"},
[RDX] = {"rdx", "edx", "dx", "dl"},
[RSI] = {"rsi", "esi", "si", "sil"},
[RDI] = {"rdi", "edi", "di", "dil"},
[RBP] = {"rbp", "ebp", "bp", "bpl"},
[RSP] = {"rsp", "esp", "sp", "spl"},
[R8 ] = {"r8" , "r8d", "r8w", "r8b"},
[R9 ] = {"r9" , "r9d", "r9w", "r9b"},
[R10] = {"r10", "r10d", "r10w", "r10b"},
[R11] = {"r11", "r11d", "r11w", "r11b"},
[R12] = {"r12", "r12d", "r12w", "r12b"},
[R13] = {"r13", "r13d", "r13w", "r13b"},
[R14] = {"r14", "r14d", "r14w", "r14b"},
[R15] = {"r15", "r15d", "r15w", "r15b"},
};
static int
slot(Ref r, E *e)
{
int s;
s = rsval(r);
assert(s <= e->fn->slot);
/* specific to NAlign == 3 */
if (s < 0) {
if (e->fp == RSP)
return 4*-s - 8 + e->fsz + e->nclob*8;
else
return 4*-s;
}
else if (e->fp == RSP)
return 4*s + e->nclob*8;
else if (e->fn->vararg) {
if (T.windows)
return -4 * (e->fn->slot - s);
else
return -176 + -4 * (e->fn->slot - s);
} else
return -4 * (e->fn->slot - s);
}
static void
emitcon(Con *con, E *e)
{
char *p, *l;
switch (con->type) {
case CAddr:
l = str(con->sym.id);
p = l[0] == '"' ? "" : T.assym;
if (con->sym.type == SThr) {
if (T.apple)
fprintf(e->f, "%s%s@TLVP", p, l);
else
fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
} else
fprintf(e->f, "%s%s", p, l);
if (con->bits.i)
fprintf(e->f, "%+"PRId64, con->bits.i);
break;
case CBits:
fprintf(e->f, "%"PRId64, con->bits.i);
break;
default:
die("unreachable");
}
}
static char *
regtoa(int reg, int sz)
{
static char buf[6];
assert(reg <= XMM15);
if (reg >= XMM0) {
sprintf(buf, "xmm%d", reg-XMM0);
return buf;
} else
return rname[reg][sz];
}
static Ref
getarg(char c, Ins *i)
{
switch (c) {
case '0':
return i->arg[0];
case '1':
return i->arg[1];
case '=':
return i->to;
default:
die("invalid arg letter %c", c);
}
}
static void emitins(Ins, E *);
static void
emitcopy(Ref r1, Ref r2, int k, E *e)
{
Ins icp;
icp.op = Ocopy;
icp.arg[0] = r2;
icp.to = r1;
icp.cls = k;
emitins(icp, e);
}
static void
emitf(char *s, Ins *i, E *e)
{
static char clstoa[][3] = {"l", "q", "ss", "sd"};
char c;
int sz;
Ref ref;
Mem *m;
Con off;
switch (*s) {
case '+':
if (req(i->arg[1], i->to)) {
ref = i->arg[0];
i->arg[0] = i->arg[1];
i->arg[1] = ref;
}
/* fall through */
case '-':
assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
"cannot convert to 2-address");
emitcopy(i->to, i->arg[0], i->cls, e);
s++;
break;
}
fputc('\t', e->f);
Next:
while ((c = *s++) != '%')
if (!c) {
fputc('\n', e->f);
return;
} else
fputc(c, e->f);
switch ((c = *s++)) {
case '%':
fputc('%', e->f);
break;
case 'k':
fputs(clstoa[i->cls], e->f);
break;
case '0':
case '1':
case '=':
sz = KWIDE(i->cls) ? SLong : SWord;
s--;
goto Ref;
case 'D':
case 'S':
sz = SLong; /* does not matter for floats */
Ref:
c = *s++;
ref = getarg(c, i);
switch (rtype(ref)) {
case RTmp:
assert(isreg(ref));
fprintf(e->f, "%%%s", regtoa(ref.val, sz));
break;
case RSlot:
fprintf(e->f, "%d(%%%s)",
slot(ref, e),
regtoa(e->fp, SLong)
);
break;
case RMem:
Mem:
m = &e->fn->mem[ref.val];
if (rtype(m->base) == RSlot) {
off.type = CBits;
off.bits.i = slot(m->base, e);
addcon(&m->offset, &off, 1);
m->base = TMP(e->fp);
}
if (m->offset.type != CUndef)
emitcon(&m->offset, e);
fputc('(', e->f);
if (!req(m->base, R))
fprintf(e->f, "%%%s",
regtoa(m->base.val, SLong)
);
else if (m->offset.type == CAddr)
fprintf(e->f, "%%rip");
if (!req(m->index, R))
fprintf(e->f, ", %%%s, %d",
regtoa(m->index.val, SLong),
m->scale
);
fputc(')', e->f);
break;
case RCon:
fputc('$', e->f);
emitcon(&e->fn->con[ref.val], e);
break;
default:
die("unreachable");
}
break;
case 'L':
sz = SLong;
goto Ref;
case 'W':
sz = SWord;
goto Ref;
case 'H':
sz = SShort;
goto Ref;
case 'B':
sz = SByte;
goto Ref;
case 'M':
c = *s++;
ref = getarg(c, i);
switch (rtype(ref)) {
case RMem:
goto Mem;
case RSlot:
fprintf(e->f, "%d(%%%s)",
slot(ref, e),
regtoa(e->fp, SLong)
);
break;
case RCon:
off = e->fn->con[ref.val];
emitcon(&off, e);
if (off.type == CAddr)
if (off.sym.type != SThr || T.apple)
fprintf(e->f, "(%%rip)");
break;
case RTmp:
assert(isreg(ref));
fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
break;
default:
die("unreachable");
}
break;
default:
die("invalid format specifier %%%c", c);
}
goto Next;
}
static bits negmask[4] = {
[Ks] = 0x80000000,
[Kd] = 0x8000000000000000,
};
static void
emitins(Ins i, E *e)
{
Ref r;
int64_t val;
int o, t0;
Ins ineg;
Con *con;
char *sym;
switch (i.op) {
default:
if (isxsel(i.op))
goto case_Oxsel;
Table:
/* most instructions are just pulled out of
* the table omap[], some special cases are
* detailed below */
for (o=0;; o++) {
/* this linear search should really be a binary
* search */
if (omap[o].op == NOp)
die("no match for %s(%c)",
optab[i.op].name, "wlsd"[i.cls]);
if (omap[o].op == i.op)
if (omap[o].cls == i.cls
|| (omap[o].cls == Ki && KBASE(i.cls) == 0)
|| (omap[o].cls == Ka))
break;
}
emitf(omap[o].fmt, &i, e);
break;
case Onop:
/* just do nothing for nops, they are inserted
* by some passes */
break;
case Omul:
/* here, we try to use the 3-addresss form
* of multiplication when possible */
if (rtype(i.arg[1]) == RCon) {
r = i.arg[0];
i.arg[0] = i.arg[1];
i.arg[1] = r;
}
if (KBASE(i.cls) == 0 /* only available for ints */
&& rtype(i.arg[0]) == RCon
&& rtype(i.arg[1]) == RTmp) {
emitf("imul%k %0, %1, %=", &i, e);
break;
}
goto Table;
case Osub:
/* we have to use the negation trick to handle
* some 3-address subtractions */
if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
emitins(ineg, e);
emitf("add%k %0, %=", &i, e);
break;
}
goto Table;
case Oneg:
if (!req(i.to, i.arg[0]))
emitf("mov%k %0, %=", &i, e);
if (KBASE(i.cls) == 0)
emitf("neg%k %=", &i, e);
else
fprintf(e->f,
"\txorp%c %sfp%d(%%rip), %%%s\n",
"xxsd"[i.cls],
T.asloc,
stashbits(negmask[i.cls], 16),
regtoa(i.to.val, SLong)
);
break;
case Odiv:
/* use xmm15 to adjust the instruction when the
* conversion to 2-address in emitf() would fail */
if (req(i.to, i.arg[1])) {
i.arg[1] = TMP(XMM0+15);
emitf("mov%k %=, %1", &i, e);
emitf("mov%k %0, %=", &i, e);
i.arg[0] = i.to;
}
goto Table;
case Ocopy:
/* copies are used for many things; see my note
* to understand how to load big constants:
* https://c9x.me/notes/2015-09-19.html */
assert(rtype(i.to) != RMem);
if (req(i.to, R) || req(i.arg[0], R))
break;
if (req(i.to, i.arg[0]))
break;
t0 = rtype(i.arg[0]);
if (i.cls == Kl
&& t0 == RCon
&& e->fn->con[i.arg[0].val].type == CBits) {
val = e->fn->con[i.arg[0].val].bits.i;
if (isreg(i.to))
if (val >= 0 && val <= UINT32_MAX) {
emitf("movl %W0, %W=", &i, e);
break;
}
if (rtype(i.to) == RSlot)
if (val < INT32_MIN || val > INT32_MAX) {
emitf("movl %0, %=", &i, e);
emitf("movl %0>>32, 4+%=", &i, e);
break;
}
}
if (isreg(i.to)
&& t0 == RCon
&& e->fn->con[i.arg[0].val].type == CAddr) {
emitf("lea%k %M0, %=", &i, e);
break;
}
if (rtype(i.to) == RSlot
&& (t0 == RSlot || t0 == RMem)) {
i.cls = KWIDE(i.cls) ? Kd : Ks;
i.arg[1] = TMP(XMM0+15);
emitf("mov%k %0, %1", &i, e);
emitf("mov%k %1, %=", &i, e);
break;
}
/* conveniently, the assembler knows if it
* should use movabsq when reading movq */
emitf("mov%k %0, %=", &i, e);
break;
case Oaddr:
if (!T.apple
&& rtype(i.arg[0]) == RCon
&& e->fn->con[i.arg[0].val].sym.type == SThr) {
/* derive the symbol address from the TCB
* address at offset 0 of %fs */
assert(isreg(i.to));
con = &e->fn->con[i.arg[0].val];
sym = str(con->sym.id);
emitf("movq %%fs:0, %L=", &i, e);
fprintf(e->f, "\tleaq %s%s@tpoff",
sym[0] == '"' ? "" : T.assym, sym);
if (con->bits.i)
fprintf(e->f, "%+"PRId64,
con->bits.i);
fprintf(e->f, "(%%%s), %%%s\n",
regtoa(i.to.val, SLong),
regtoa(i.to.val, SLong));
break;
}
goto Table;
case Ocall:
/* calls simply have a weird syntax in AT&T
* assembly... */
switch (rtype(i.arg[0])) {
case RCon:
fprintf(e->f, "\tcallq ");
emitcon(&e->fn->con[i.arg[0].val], e);
fprintf(e->f, "\n");
break;
case RTmp:
emitf("callq *%L0", &i, e);
break;
default:
die("invalid call argument");
}
break;
case Osalloc:
/* there is no good reason why this is here
* maybe we should split Osalloc in 2 different
* instructions depending on the result
*/
assert(e->fp == RBP);
emitf("subq %L0, %%rsp", &i, e);
if (!req(i.to, R))
emitcopy(i.to, TMP(RSP), Kl, e);
break;
case Oswap:
if (KBASE(i.cls) == 0)
goto Table;
/* for floats, there is no swap instruction
* so we use xmm15 as a temporary
*/
emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
emitcopy(i.arg[0], i.arg[1], i.cls, e);
emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
break;
case Odbgloc:
emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
break;
case_Oxsel:
if (req(i.to, i.arg[1]))
emitf(cmov[i.op-Oxsel][0], &i, e);
else {
if (!req(i.to, i.arg[0]))
emitf("mov %0, %=", &i, e);
emitf(cmov[i.op-Oxsel][1], &i, e);
}
break;
}
}
static void
sysv_framesz(E *e)
{
uint64_t i, o, f;
/* specific to NAlign == 3 */
o = 0;
if (!e->fn->leaf) {
for (i=0, o=0; i<NCLR_SYSV; i++)
o ^= e->fn->reg >> amd64_sysv_rclob[i];
o &= 1;
}
f = e->fn->slot;
f = (f + 3) & -4;
if (f > 0
&& e->fp == RSP
&& e->fn->salign == 4)
f += 2;
e->fsz = 4*f + 8*o + 176*e->fn->vararg;
}
void
amd64_sysv_emitfn(Fn *fn, FILE *f)
{
static char *ctoa[] = {
#define X(c, s, _) [c] = s,
CMP(X)
#undef X
};
static int id0;
Blk *b, *s;
Ins *i, itmp;
int *r, c, o, n, lbl;
uint p;
E *e;
e = &(E){.f = f, .fn = fn};
emitfnlnk(fn->name, &fn->lnk, f);
fputs("\tendbr64\n", f);
if (!fn->leaf || fn->vararg || fn->dynalloc) {
e->fp = RBP;
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
} else
e->fp = RSP;
sysv_framesz(e);
if (e->fsz)
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
if (fn->vararg) {
o = -176;
for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
for (n=0; n<8; ++n, o+=16)
fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
}
for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR_SYSV]; r++)
if (fn->reg & BIT(*r)) {
itmp.arg[0] = TMP(*r);
emitf("pushq %L0", &itmp, e);
e->nclob++;
}
for (lbl=0, b=fn->start; b; b=b->link) {
if (lbl || b->npred > 1) {
for (p=0; p<b->npred; p++)
if (b->pred[p]->id >= b->id)
break;
if (p != b->npred)
fprintf(f, ".p2align 4\n");
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
}
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(*i, e);
lbl = 1;
switch (b->jmp.type) {
case Jhlt:
fprintf(f, "\tud2\n");
break;
case Jret0:
if (fn->dynalloc)
fprintf(f,
"\tmovq %%rbp, %%rsp\n"
"\tsubq $%"PRIu64", %%rsp\n",
e->fsz + e->nclob * 8);
for (r=&amd64_sysv_rclob[NCLR_SYSV]; r>amd64_sysv_rclob;)
if (fn->reg & BIT(*--r)) {
itmp.arg[0] = TMP(*r);
emitf("popq %L0", &itmp, e);
}
if (e->fp == RBP)
fputs("\tleave\n", f);
else if (e->fsz)
fprintf(f,
"\taddq $%"PRIu64", %%rsp\n",
e->fsz);
fputs("\tret\n", f);
break;
case Jjmp:
Jmp:
if (b->s1 != b->link)
fprintf(f, "\tjmp %sbb%d\n",
T.asloc, id0+b->s1->id);
else
lbl = 0;
break;
default:
c = b->jmp.type - Jjf;
if (0 <= c && c <= NCmp) {
if (b->link == b->s2) {
s = b->s1;
b->s1 = b->s2;
b->s2 = s;
} else
c = cmpneg(c);
fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
T.asloc, id0+b->s2->id);
goto Jmp;
}
die("unhandled jump %d", b->jmp.type);
}
}
id0 += fn->nblk;
if (!T.apple)
elf_emitfnfin(fn->name, f);
}
static void
winabi_framesz(E *e)
{
uint64_t i, o, f;
/* specific to NAlign == 3 */
o = 0;
if (!e->fn->leaf) {
for (i=0, o=0; i<NCLR_WIN; i++)
o ^= e->fn->reg >> amd64_winabi_rclob[i];
o &= 1;
}
f = e->fn->slot;
f = (f + 3) & -4;
if (f > 0
&& e->fp == RSP
&& e->fn->salign == 4)
f += 2;
e->fsz = 4*f + 8*o;
}
void
amd64_winabi_emitfn(Fn *fn, FILE *f)
{
static char *ctoa[] = {
#define X(c, s, _) [c] = s,
CMP(X)
#undef X
};
static int id0;
Blk *b, *s;
Ins *i, itmp;
int *r, c, lbl;
E *e;
e = &(E){.f = f, .fn = fn};
emitfnlnk(fn->name, &fn->lnk, f);
fputs("\tendbr64\n", f);
if (fn->vararg) {
fprintf(f, "\tmovq %%rcx, 0x8(%%rsp)\n");
fprintf(f, "\tmovq %%rdx, 0x10(%%rsp)\n");
fprintf(f, "\tmovq %%r8, 0x18(%%rsp)\n");
fprintf(f, "\tmovq %%r9, 0x20(%%rsp)\n");
}
if (!fn->leaf || fn->vararg || fn->dynalloc) {
e->fp = RBP;
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
} else
e->fp = RSP;
winabi_framesz(e);
if (e->fsz)
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
for (r=amd64_winabi_rclob; r<&amd64_winabi_rclob[NCLR_WIN]; r++)
if (fn->reg & BIT(*r)) {
itmp.arg[0] = TMP(*r);
emitf("pushq %L0", &itmp, e);
e->nclob++;
}
for (lbl=0, b=fn->start; b; b=b->link) {
if (lbl || b->npred > 1)
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(*i, e);
lbl = 1;
switch (b->jmp.type) {
case Jhlt:
fprintf(f, "\tud2\n");
break;
case Jret0:
if (fn->dynalloc)
fprintf(f,
"\tmovq %%rbp, %%rsp\n"
"\tsubq $%"PRIu64", %%rsp\n",
e->fsz + e->nclob * 8);
for (r=&amd64_winabi_rclob[NCLR_WIN]; r>amd64_winabi_rclob;)
if (fn->reg & BIT(*--r)) {
itmp.arg[0] = TMP(*r);
emitf("popq %L0", &itmp, e);
}
if (e->fp == RBP)
fputs("\tleave\n", f);
else if (e->fsz)
fprintf(f,
"\taddq $%"PRIu64", %%rsp\n",
e->fsz);
fputs("\tret\n", f);
break;
case Jjmp:
Jmp:
if (b->s1 != b->link)
fprintf(f, "\tjmp %sbb%d\n",
T.asloc, id0+b->s1->id);
else
lbl = 0;
break;
default:
c = b->jmp.type - Jjf;
if (0 <= c && c <= NCmp) {
if (b->link == b->s2) {
s = b->s1;
b->s1 = b->s2;
b->s2 = s;
} else
c = cmpneg(c);
fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
T.asloc, id0+b->s2->id);
goto Jmp;
}
die("unhandled jump %d", b->jmp.type);
}
}
id0 += fn->nblk;
}

942
src/qbe/amd64/isel.c Normal file
View File

@@ -0,0 +1,942 @@
#include "all.h"
#include <limits.h>
/* For x86_64, do the following:
*
* - check that constants are used only in
* places allowed
* - ensure immediates always fit in 32b
* - expose machine register contraints
* on instructions like division.
* - implement fast locals (the streak of
* constant allocX in the first basic block)
* - recognize complex addressing modes
*
* Invariant: the use counts that are used
* in sel() must be sound. This
* is not so trivial, maybe the
* dce should be moved out...
*/
static int amatch(Addr *, Num *, Ref, Fn *);
static int
noimm(Ref r, Fn *fn)
{
int64_t val;
if (rtype(r) != RCon)
return 0;
switch (fn->con[r.val].type) {
case CAddr:
/* we only support the 'small'
* code model of the ABI, this
* means that we can always
* address data with 32bits
*/
return 0;
case CBits:
val = fn->con[r.val].bits.i;
return (val < INT32_MIN || val > INT32_MAX);
default:
die("invalid constant");
}
}
static int
rslot(Ref r, Fn *fn)
{
if (rtype(r) != RTmp)
return -1;
return fn->tmp[r.val].slot;
}
static int
hascon(Ref r, Con **pc, Fn *fn)
{
switch (rtype(r)) {
case RCon:
*pc = &fn->con[r.val];
return 1;
case RMem:
*pc = &fn->mem[r.val].offset;
return 1;
default:
return 0;
}
}
static void
fixarg(Ref *r, int k, Ins *i, Fn *fn)
{
char buf[32];
Addr a, *m;
Con cc, *c;
Ref r0, r1, r2, r3;
int s, n, op;
r1 = r0 = *r;
s = rslot(r0, fn);
op = i ? i->op : Ocopy;
if (KBASE(k) == 1 && rtype(r0) == RCon) {
/* load floating points from memory
* slots, they can't be used as
* immediates
*/
r1 = MEM(fn->nmem);
vgrow(&fn->mem, ++fn->nmem);
memset(&a, 0, sizeof a);
a.offset.type = CAddr;
n = stashbits(fn->con[r0.val].bits.i, KWIDE(k) ? 8 : 4);
/* quote the name so that we do not
* add symbol prefixes on the apple
* target variant
*/
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
a.offset.sym.id = intern(buf);
fn->mem[fn->nmem-1] = a;
}
else if (op == Ocall && r == &i->arg[0]
&& rtype(r0) == RCon && fn->con[r0.val].type != CAddr) {
/* use a temporary register so that we
* produce an indirect call
*/
r1 = newtmp("isel", Kl, fn);
emit(Ocopy, Kl, r1, r0, R);
}
else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
/* load constants that do not fit in
* a 32bit signed integer into a
* long temporary
*/
r1 = newtmp("isel", Kl, fn);
emit(Ocopy, Kl, r1, r0, R);
}
else if (s != -1) {
/* load fast locals' addresses into
* temporaries right before the
* instruction
*/
r1 = newtmp("isel", Kl, fn);
emit(Oaddr, Kl, r1, SLOT(s), R);
}
else if (T.apple && hascon(r0, &c, fn)
&& c->type == CAddr && c->sym.type == SThr) {
r1 = newtmp("isel", Kl, fn);
if (c->bits.i) {
r2 = newtmp("isel", Kl, fn);
cc = (Con){.type = CBits};
cc.bits.i = c->bits.i;
r3 = newcon(&cc, fn);
emit(Oadd, Kl, r1, r2, r3);
} else
r2 = r1;
emit(Ocopy, Kl, r2, TMP(RAX), R);
r2 = newtmp("isel", Kl, fn);
r3 = newtmp("isel", Kl, fn);
emit(Ocall, 0, R, r3, CALL(17));
emit(Ocopy, Kl, TMP(RDI), r2, R);
emit(Oload, Kl, r3, r2, R);
cc = *c;
cc.bits.i = 0;
r3 = newcon(&cc, fn);
emit(Oload, Kl, r2, r3, R);
if (rtype(r0) == RMem) {
m = &fn->mem[r0.val];
m->offset.type = CUndef;
m->base = r1;
r1 = r0;
}
}
else if (!(isstore(op) && r == &i->arg[1])
&& !isload(op) && op != Ocall && rtype(r0) == RCon
&& fn->con[r0.val].type == CAddr) {
/* apple as does not support 32-bit
* absolute addressing, use a rip-
* relative leaq instead
*/
r1 = newtmp("isel", Kl, fn);
emit(Oaddr, Kl, r1, r0, R);
}
else if (rtype(r0) == RMem) {
/* eliminate memory operands of
* the form $foo(%rip, ...)
*/
m = &fn->mem[r0.val];
if (req(m->base, R))
if (m->offset.type == CAddr) {
r0 = newtmp("isel", Kl, fn);
emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
m->offset.type = CUndef;
m->base = r0;
}
}
else if (isxsel(op) && rtype(*r) == RCon) {
r1 = newtmp("isel", i->cls, fn);
emit(Ocopy, i->cls, r1, *r, R);
}
*r = r1;
}
static void
seladdr(Ref *r, Num *tn, Fn *fn)
{
Addr a;
Ref r0;
r0 = *r;
if (rtype(r0) == RTmp) {
memset(&a, 0, sizeof a);
if (!amatch(&a, tn, r0, fn))
return;
if (!req(a.base, R))
if (a.offset.type == CAddr) {
/* apple as does not support
* $foo(%r0, %r1, M); try to
* rewrite it or bail out if
* impossible
*/
if (!req(a.index, R) || rtype(a.base) != RTmp)
return;
else {
a.index = a.base;
a.scale = 1;
a.base = R;
}
}
chuse(r0, -1, fn);
vgrow(&fn->mem, ++fn->nmem);
fn->mem[fn->nmem-1] = a;
chuse(a.base, +1, fn);
chuse(a.index, +1, fn);
*r = MEM(fn->nmem-1);
}
}
static int
cmpswap(Ref arg[2], int op)
{
switch (op) {
case NCmpI+Cflt:
case NCmpI+Cfle:
return 1;
case NCmpI+Cfgt:
case NCmpI+Cfge:
return 0;
}
return rtype(arg[0]) == RCon;
}
static void
selcmp(Ref arg[2], int k, int swap, Fn *fn)
{
Ref r;
Ins *icmp;
if (swap) {
r = arg[1];
arg[1] = arg[0];
arg[0] = r;
}
emit(Oxcmp, k, R, arg[1], arg[0]);
icmp = curi;
if (rtype(arg[0]) == RCon) {
assert(k != Kw);
icmp->arg[1] = newtmp("isel", k, fn);
emit(Ocopy, k, icmp->arg[1], arg[0], R);
fixarg(&curi->arg[0], k, curi, fn);
}
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
}
static void
sel(Ins i, Num *tn, Fn *fn)
{
Ref r0, r1, tmp[7];
int x, j, k, kc, sh, swap;
Ins *i0, *i1;
if (rtype(i.to) == RTmp)
if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
if (fn->tmp[i.to.val].nuse == 0) {
chuse(i.arg[0], -1, fn);
chuse(i.arg[1], -1, fn);
return;
}
i0 = curi;
k = i.cls;
switch (i.op) {
case Odiv:
case Orem:
case Oudiv:
case Ourem:
if (KBASE(k) == 1)
goto Emit;
if (i.op == Odiv || i.op == Oudiv)
r0 = TMP(RAX), r1 = TMP(RDX);
else
r0 = TMP(RDX), r1 = TMP(RAX);
emit(Ocopy, k, i.to, r0, R);
emit(Ocopy, k, R, r1, R);
if (rtype(i.arg[1]) == RCon) {
/* immediates not allowed for
* divisions in x86
*/
r0 = newtmp("isel", k, fn);
} else
r0 = i.arg[1];
if (fn->tmp[r0.val].slot != -1)
err("unlikely argument %%%s in %s",
fn->tmp[r0.val].name, optab[i.op].name);
if (i.op == Odiv || i.op == Orem) {
emit(Oxidiv, k, R, r0, R);
emit(Osign, k, TMP(RDX), TMP(RAX), R);
} else {
emit(Oxdiv, k, R, r0, R);
emit(Ocopy, k, TMP(RDX), CON_Z, R);
}
emit(Ocopy, k, TMP(RAX), i.arg[0], R);
fixarg(&curi->arg[0], k, curi, fn);
if (rtype(i.arg[1]) == RCon)
emit(Ocopy, k, r0, i.arg[1], R);
break;
case Osar:
case Oshr:
case Oshl:
r0 = i.arg[1];
if (rtype(r0) == RCon)
goto Emit;
if (fn->tmp[r0.val].slot != -1)
err("unlikely argument %%%s in %s",
fn->tmp[r0.val].name, optab[i.op].name);
i.arg[1] = TMP(RCX);
emit(Ocopy, Kw, R, TMP(RCX), R);
emiti(i);
i1 = curi;
emit(Ocopy, Kw, TMP(RCX), r0, R);
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
break;
case Ouwtof:
r0 = newtmp("utof", Kl, fn);
emit(Osltof, k, i.to, r0, R);
emit(Oextuw, Kl, r0, i.arg[0], R);
fixarg(&curi->arg[0], k, curi, fn);
break;
case Oultof:
/* %mask =l and %arg.0, 1
* %isbig =l shr %arg.0, 63
* %divided =l shr %arg.0, %isbig
* %or =l or %mask, %divided
* %float =d sltof %or
* %cast =l cast %float
* %addend =l shl %isbig, 52
* %sum =l add %cast, %addend
* %result =d cast %sum
*/
r0 = newtmp("utof", k, fn);
if (k == Ks)
kc = Kw, sh = 23;
else
kc = Kl, sh = 52;
for (j=0; j<4; j++)
tmp[j] = newtmp("utof", Kl, fn);
for (; j<7; j++)
tmp[j] = newtmp("utof", kc, fn);
emit(Ocast, k, i.to, tmp[6], R);
emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
emit(Ocast, kc, tmp[4], r0, R);
emit(Osltof, k, r0, tmp[3], R);
emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
sel(*curi++, 0, fn);
emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
fixarg(&curi->arg[0], Kl, curi, fn);
emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
fixarg(&curi->arg[0], Kl, curi, fn);
break;
case Ostoui:
i.op = Ostosi;
kc = Ks;
tmp[4] = getcon(0xdf000000, fn);
goto Oftoui;
case Odtoui:
i.op = Odtosi;
kc = Kd;
tmp[4] = getcon(0xc3e0000000000000, fn);
Oftoui:
if (k == Kw) {
r0 = newtmp("ftou", Kl, fn);
emit(Ocopy, Kw, i.to, r0, R);
i.cls = Kl;
i.to = r0;
goto Emit;
}
/* %try0 =l {s,d}tosi %fp
* %mask =l sar %try0, 63
*
* mask is all ones if the first
* try was oob, all zeroes o.w.
*
* %fps ={s,d} sub %fp, (1<<63)
* %try1 =l {s,d}tosi %fps
*
* %tmp =l and %mask, %try1
* %res =l or %tmp, %try0
*/
r0 = newtmp("ftou", kc, fn);
for (j=0; j<4; j++)
tmp[j] = newtmp("ftou", Kl, fn);
emit(Oor, Kl, i.to, tmp[0], tmp[3]);
emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
emit(i.op, Kl, tmp[2], r0, R);
emit(Oadd, kc, r0, tmp[4], i.arg[0]);
i1 = curi; /* fixarg() can change curi */
fixarg(&i1->arg[0], kc, i1, fn);
fixarg(&i1->arg[1], kc, i1, fn);
emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
emit(i.op, Kl, tmp[0], i.arg[0], R);
fixarg(&curi->arg[0], Kl, curi, fn);
break;
case Onop:
break;
case Ostored:
case Ostores:
case Ostorel:
case Ostorew:
case Ostoreh:
case Ostoreb:
if (rtype(i.arg[0]) == RCon) {
if (i.op == Ostored)
i.op = Ostorel;
if (i.op == Ostores)
i.op = Ostorew;
}
seladdr(&i.arg[1], tn, fn);
goto Emit;
case_Oload:
seladdr(&i.arg[0], tn, fn);
goto Emit;
case Odbgloc:
case Ocall:
case Osalloc:
case Ocopy:
case Oadd:
case Osub:
case Oneg:
case Omul:
case Oand:
case Oor:
case Oxor:
case Oxtest:
case Ostosi:
case Odtosi:
case Oswtof:
case Osltof:
case Oexts:
case Otruncd:
case Ocast:
case_Oxsel:
case_Oext:
Emit:
emiti(i);
i1 = curi; /* fixarg() can change curi */
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
break;
case Oalloc4:
case Oalloc8:
case Oalloc16:
salloc(i.to, i.arg[0], fn);
break;
default:
if (isext(i.op))
goto case_Oext;
if (isxsel(i.op))
goto case_Oxsel;
if (isload(i.op))
goto case_Oload;
if (iscmp(i.op, &kc, &x)) {
switch (x) {
case NCmpI+Cfeq:
/* zf is set when operands are
* unordered, so we may have to
* check pf
*/
r0 = newtmp("isel", Kw, fn);
r1 = newtmp("isel", Kw, fn);
emit(Oand, Kw, i.to, r0, r1);
emit(Oflagfo, k, r1, R, R);
i.to = r0;
break;
case NCmpI+Cfne:
r0 = newtmp("isel", Kw, fn);
r1 = newtmp("isel", Kw, fn);
emit(Oor, Kw, i.to, r0, r1);
emit(Oflagfuo, k, r1, R, R);
i.to = r0;
break;
}
swap = cmpswap(i.arg, x);
if (swap)
x = cmpop(x);
emit(Oflag+x, k, i.to, R, R);
selcmp(i.arg, kc, swap, fn);
break;
}
die("unknown instruction %s", optab[i.op].name);
}
while (i0>curi && --i0) {
assert(rslot(i0->arg[0], fn) == -1);
assert(rslot(i0->arg[1], fn) == -1);
}
}
static Ins *
flagi(Ins *i0, Ins *i)
{
while (i>i0) {
i--;
if (amd64_op[i->op].zflag)
return i;
if (amd64_op[i->op].lflag)
continue;
return 0;
}
return 0;
}
static Ins*
selsel(Fn *fn, Blk *b, Ins *i, Num *tn)
{
Ref r, cr[2];
int c, k, swap, gencmp, gencpy;
Ins *isel0, *isel1, *fi;
Tmp *t;
assert(i->op == Osel1);
for (isel0=i; b->ins<isel0; isel0--) {
if (isel0->op == Osel0)
break;
assert(isel0->op == Osel1);
}
assert(isel0->op == Osel0);
r = isel0->arg[0];
assert(rtype(r) == RTmp);
t = &fn->tmp[r.val];
fi = flagi(b->ins, isel0);
cr[0] = cr[1] = R;
gencmp = gencpy = swap = 0;
k = Kw;
c = Cine;
if (!fi || !req(fi->to, r)) {
gencmp = 1;
cr[0] = r;
cr[1] = CON_Z;
}
else if (iscmp(fi->op, &k, &c)) {
if (c == NCmpI+Cfeq
|| c == NCmpI+Cfne) {
/* these are selected as 'and'
* or 'or', so we check their
* result with Cine
*/
c = Cine;
goto Other;
}
swap = cmpswap(fi->arg, c);
if (swap)
c = cmpop(c);
if (t->nuse == 1) {
gencmp = 1;
cr[0] = fi->arg[0];
cr[1] = fi->arg[1];
*fi = (Ins){.op = Onop};
}
}
else if (fi->op == Oand && t->nuse == 1
&& (rtype(fi->arg[0]) == RTmp ||
rtype(fi->arg[1]) == RTmp)) {
fi->op = Oxtest;
fi->to = R;
if (rtype(fi->arg[1]) == RCon) {
r = fi->arg[1];
fi->arg[1] = fi->arg[0];
fi->arg[0] = r;
}
}
else {
Other:
/* since flags are not tracked in liveness,
* the result of the flag-setting instruction
* has to be marked as live
*/
if (t->nuse == 1)
gencpy = 1;
}
/* generate conditional moves */
for (isel1=i; isel0<isel1; --isel1) {
isel1->op = Oxsel+c;
sel(*isel1, tn, fn);
}
assert(!gencmp || !gencpy);
if (gencmp)
selcmp(cr, k, swap, fn);
if (gencpy)
emit(Ocopy, Kw, R, r, R);
*isel0 = (Ins){.op = Onop};
return isel0;
}
static void
seljmp(Blk *b, Fn *fn)
{
Ref r;
int c, k, swap;
Ins *fi;
Tmp *t;
if (b->jmp.type == Jret0
|| b->jmp.type == Jjmp
|| b->jmp.type == Jhlt)
return;
assert(b->jmp.type == Jjnz);
r = b->jmp.arg;
t = &fn->tmp[r.val];
b->jmp.arg = R;
assert(rtype(r) == RTmp);
if (b->s1 == b->s2) {
chuse(r, -1, fn);
b->jmp.type = Jjmp;
b->s2 = 0;
return;
}
fi = flagi(b->ins, &b->ins[b->nins]);
if (!fi || !req(fi->to, r)) {
selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
b->jmp.type = Jjf + Cine;
}
else if (iscmp(fi->op, &k, &c)
&& c != NCmpI+Cfeq /* see sel(), selsel() */
&& c != NCmpI+Cfne) {
swap = cmpswap(fi->arg, c);
if (swap)
c = cmpop(c);
if (t->nuse == 1) {
selcmp(fi->arg, k, swap, fn);
*fi = (Ins){.op = Onop};
}
b->jmp.type = Jjf + c;
}
else if (fi->op == Oand && t->nuse == 1
&& (rtype(fi->arg[0]) == RTmp ||
rtype(fi->arg[1]) == RTmp)) {
fi->op = Oxtest;
fi->to = R;
b->jmp.type = Jjf + Cine;
if (rtype(fi->arg[1]) == RCon) {
r = fi->arg[1];
fi->arg[1] = fi->arg[0];
fi->arg[0] = r;
}
}
else {
/* since flags are not tracked in liveness,
* the result of the flag-setting instruction
* has to be marked as live
*/
if (t->nuse == 1)
emit(Ocopy, Kw, R, r, R);
b->jmp.type = Jjf + Cine;
}
}
enum {
Pob,
Pbis,
Pois,
Pobis,
Pbi1,
Pobi1,
};
/* mgen generated code
*
* (with-vars (o b i s)
* (patterns
* (ob (add (con o) (tmp b)))
* (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
* (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
* (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
* (bi1 (add (tmp b) (tmp i)))
* (obi1 (add (con o) (tmp b) (tmp i)))
* ))
*/
static int
opn(int op, int l, int r)
{
static uchar Oaddtbl[91] = {
2,
2,2,
4,4,5,
6,6,8,8,
4,4,9,10,9,
7,7,5,8,9,5,
4,4,12,10,12,12,12,
4,4,9,10,9,9,12,9,
11,11,5,8,9,5,12,9,5,
7,7,5,8,9,5,12,9,5,5,
11,11,5,8,9,5,12,9,5,5,5,
4,4,9,10,9,9,12,9,9,9,9,9,
7,7,5,8,9,5,12,9,5,5,5,9,5,
};
int t;
if (l < r)
t = l, l = r, r = t;
switch (op) {
case Omul:
if (2 <= l)
if (r == 0) {
return 3;
}
return 2;
case Oadd:
return Oaddtbl[(l + l*l)/2 + r];
default:
return 2;
}
}
static int
refn(Ref r, Num *tn, Con *con)
{
int64_t n;
switch (rtype(r)) {
case RTmp:
if (!tn[r.val].n)
tn[r.val].n = 2;
return tn[r.val].n;
case RCon:
if (con[r.val].type != CBits)
return 1;
n = con[r.val].bits.i;
if (n == 8 || n == 4 || n == 2 || n == 1)
return 0;
return 1;
default:
return INT_MIN;
}
}
static bits match[13] = {
[4] = BIT(Pob),
[5] = BIT(Pbi1),
[6] = BIT(Pob) | BIT(Pois),
[7] = BIT(Pob) | BIT(Pobi1),
[8] = BIT(Pbi1) | BIT(Pbis),
[9] = BIT(Pbi1) | BIT(Pobi1),
[10] = BIT(Pbi1) | BIT(Pbis) | BIT(Pobi1) | BIT(Pobis),
[11] = BIT(Pob) | BIT(Pobi1) | BIT(Pobis),
[12] = BIT(Pbi1) | BIT(Pobi1) | BIT(Pobis),
};
static uchar *matcher[] = {
[Pbi1] = (uchar[]){
1,3,1,3,2,0
},
[Pbis] = (uchar[]){
5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
3,3,3,2,3,1,0,1,29
},
[Pob] = (uchar[]){
1,3,0,3,1,0
},
[Pobi1] = (uchar[]){
5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
5,7,2,7,8,37,29,1,3,0,1,32
},
[Pobis] = (uchar[]){
5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
3,1,1,3,0,45
},
[Pois] = (uchar[]){
1,3,0,1,3,3,3,2,0
},
};
/* end of generated code */
static void
anumber(Num *tn, Blk *b, Con *con)
{
Ins *i;
Num *n;
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (rtype(i->to) != RTmp)
continue;
n = &tn[i->to.val];
n->l = i->arg[0];
n->r = i->arg[1];
n->nl = refn(n->l, tn, con);
n->nr = refn(n->r, tn, con);
n->n = opn(i->op, n->nl, n->nr);
}
}
static Ref
adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
{
Ref v[2];
int n;
while (!req(r, R)) {
assert(rtype(r) == RTmp);
n = refn(r, tn, fn->con);
if (!(match[n] & BIT(Pob)))
break;
runmatch(matcher[Pob], tn, r, v);
assert(rtype(v[0]) == RCon);
addcon(c, &fn->con[v[0].val], s);
r = v[1];
}
return r;
}
static int
amatch(Addr *a, Num *tn, Ref r, Fn *fn)
{
static int pat[] = {Pobis, Pobi1, Pbis, Pois, Pbi1, -1};
Ref ro, rb, ri, rs, v[4];
Con *c, co;
int s, n, *p;
if (rtype(r) != RTmp)
return 0;
n = refn(r, tn, fn->con);
memset(v, 0, sizeof v);
for (p=pat; *p>=0; p++)
if (match[n] & BIT(*p)) {
runmatch(matcher[*p], tn, r, v);
break;
}
if (*p < 0)
v[1] = r;
memset(&co, 0, sizeof co);
ro = v[0];
rb = adisp(&co, tn, v[1], fn, 1);
ri = v[2];
rs = v[3];
s = 1;
if (*p < 0 && co.type != CUndef)
if (amatch(a, tn, rb, fn))
return addcon(&a->offset, &co, 1);
if (!req(ro, R)) {
assert(rtype(ro) == RCon);
c = &fn->con[ro.val];
if (!addcon(&co, c, 1))
return 0;
}
if (!req(rs, R)) {
assert(rtype(rs) == RCon);
c = &fn->con[rs.val];
assert(c->type == CBits);
s = c->bits.i;
}
ri = adisp(&co, tn, ri, fn, s);
*a = (Addr){co, rb, ri, s};
if (rtype(ri) == RTmp)
if (fn->tmp[ri.val].slot != -1) {
if (a->scale != 1
|| fn->tmp[rb.val].slot != -1)
return 0;
a->base = ri;
a->index = rb;
}
if (!req(a->base, R)) {
assert(rtype(a->base) == RTmp);
s = fn->tmp[a->base.val].slot;
if (s != -1)
a->base = SLOT(s);
}
return 1;
}
/* instruction selection
* requires use counts (as given by parsing)
*/
void
amd64_isel(Fn *fn)
{
Blk *b, **sb;
Ins *i;
Phi *p;
uint a;
int n, al;
int64_t sz;
Num *num;
/* assign slots to fast allocs */
b = fn->start;
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op == al) {
if (rtype(i->arg[0]) != RCon)
break;
sz = fn->con[i->arg[0].val].bits.i;
if (sz < 0 || sz >= INT_MAX-15)
err("invalid alloc size %"PRId64, sz);
sz = (sz + n-1) & -n;
sz /= 4;
if (sz > INT_MAX - fn->slot)
die("alloc too large");
fn->tmp[i->to.val].slot = fn->slot;
fn->slot += sz;
fn->salign = 2 + al - Oalloc;
*i = (Ins){.op = Onop};
}
/* process basic blocks */
n = fn->ntmp;
num = emalloc(n * sizeof num[0]);
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
for (p=(*sb)->phi; p; p=p->link) {
for (a=0; p->blk[a] != b; a++)
assert(a+1 < p->narg);
fixarg(&p->arg[a], p->cls, 0, fn);
}
memset(num, 0, n * sizeof num[0]);
anumber(num, b, fn->con);
seljmp(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;) {
--i;
assert(i->op != Osel0);
if (i->op == Osel1)
i = selsel(fn, b, i, num);
else
sel(*i, num, fn);
}
idup(b, curi, &insb[NIns]-curi);
}
free(num);
if (debug['I']) {
fprintf(stderr, "\n> After instruction selection:\n");
printfn(fn, stderr);
}
}

721
src/qbe/amd64/sysv.c Normal file
View File

@@ -0,0 +1,721 @@
#include "all.h"
typedef struct AClass AClass;
typedef struct RAlloc RAlloc;
struct AClass {
Typ *type;
int inmem;
int align;
uint size;
int cls[2];
Ref ref[2];
};
struct RAlloc {
Ins i;
RAlloc *link;
};
static void
classify(AClass *a, Typ *t, uint s)
{
Field *f;
int *cls;
uint n, s1;
for (n=0, s1=s; n<t->nunion; n++, s=s1)
for (f=t->fields[n]; f->type!=FEnd; f++) {
assert(s <= 16);
cls = &a->cls[s/8];
switch (f->type) {
case FEnd:
die("unreachable");
case FPad:
/* don't change anything */
s += f->len;
break;
case Fs:
case Fd:
if (*cls == Kx)
*cls = Kd;
s += f->len;
break;
case Fb:
case Fh:
case Fw:
case Fl:
*cls = Kl;
s += f->len;
break;
case FTyp:
classify(a, &typ[f->len], s);
s += typ[f->len].size;
break;
}
}
}
static void
typclass(AClass *a, Typ *t)
{
uint sz, al;
sz = t->size;
al = 1u << t->align;
/* the ABI requires sizes to be rounded
* up to the nearest multiple of 8, moreover
* it makes it easy load and store structures
* in registers
*/
if (al < 8)
al = 8;
sz = (sz + al-1) & -al;
a->type = t;
a->size = sz;
a->align = t->align;
if (t->isdark || sz > 16 || sz == 0) {
/* large or unaligned structures are
* required to be passed in memory
*/
a->inmem = 1;
return;
}
a->cls[0] = Kx;
a->cls[1] = Kx;
a->inmem = 0;
classify(a, t, 0);
}
static int
retr(Ref reg[2], AClass *aret)
{
static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
int n, k, ca, nr[2];
nr[0] = nr[1] = 0;
ca = 0;
for (n=0; (uint)n*8<aret->size; n++) {
k = KBASE(aret->cls[n]);
reg[n] = TMP(retreg[k][nr[k]++]);
ca += 1 << (2 * k);
}
return ca;
}
static void
selret(Blk *b, Fn *fn)
{
int j, k, ca;
Ref r, r0, reg[2];
AClass aret;
j = b->jmp.type;
if (!isret(j) || j == Jret0)
return;
r0 = b->jmp.arg;
b->jmp.type = Jret0;
if (j == Jretc) {
typclass(&aret, &typ[fn->retty]);
if (aret.inmem) {
assert(rtype(fn->retr) == RTmp);
emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
emit(Oblit1, 0, R, INT(aret.type->size), R);
emit(Oblit0, 0, R, r0, fn->retr);
ca = 1;
} else {
ca = retr(reg, &aret);
if (aret.size > 8) {
r = newtmp("abi", Kl, fn);
emit(Oload, Kl, reg[1], r, R);
emit(Oadd, Kl, r, r0, getcon(8, fn));
}
emit(Oload, Kl, reg[0], r0, R);
}
} else {
k = j - Jretw;
if (KBASE(k) == 0) {
emit(Ocopy, k, TMP(RAX), r0, R);
ca = 1;
} else {
emit(Ocopy, k, TMP(XMM0), r0, R);
ca = 1 << 2;
}
}
b->jmp.arg = CALL(ca);
}
static int
argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
{
int varc, envc, nint, ni, nsse, ns, n, *pn;
AClass *a;
Ins *i;
if (aret && aret->inmem)
nint = 5; /* hidden argument */
else
nint = 6;
nsse = 8;
varc = 0;
envc = 0;
for (i=i0, a=ac; i<i1; i++, a++)
switch (i->op - op + Oarg) {
case Oarg:
if (KBASE(i->cls) == 0)
pn = &nint;
else
pn = &nsse;
if (*pn > 0) {
--*pn;
a->inmem = 0;
} else
a->inmem = 2;
a->align = 3;
a->size = 8;
a->cls[0] = i->cls;
break;
case Oargc:
n = i->arg[0].val;
typclass(a, &typ[n]);
if (a->inmem)
continue;
ni = ns = 0;
for (n=0; (uint)n*8<a->size; n++)
if (KBASE(a->cls[n]) == 0)
ni++;
else
ns++;
if (nint >= ni && nsse >= ns) {
nint -= ni;
nsse -= ns;
} else
a->inmem = 1;
break;
case Oarge:
envc = 1;
if (op == Opar)
*env = i->to;
else
*env = i->arg[0];
break;
case Oargv:
varc = 1;
break;
default:
die("unreachable");
}
if (varc && envc)
err("sysv abi does not support variadic env calls");
return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8);
}
int amd64_sysv_rsave[] = {
RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
};
int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
MAKESURE(sysv_arrays_ok,
sizeof amd64_sysv_rsave == (NGPS_SYSV+NFPS+1) * sizeof(int) &&
sizeof amd64_sysv_rclob == (NCLR_SYSV+1) * sizeof(int)
);
/* layout of call's second argument (RCall)
*
* 29 12 8 4 3 0
* |0...00|x|xxxx|xxxx|xx|xx| range
* | | | | ` gp regs returned (0..2)
* | | | ` sse regs returned (0..2)
* | | ` gp regs passed (0..6)
* | ` sse regs passed (0..8)
* ` 1 if rax is used to pass data (0..1)
*/
bits
amd64_sysv_retregs(Ref r, int p[2])
{
bits b;
int ni, nf;
assert(rtype(r) == RCall);
b = 0;
ni = r.val & 3;
nf = (r.val >> 2) & 3;
if (ni >= 1)
b |= BIT(RAX);
if (ni >= 2)
b |= BIT(RDX);
if (nf >= 1)
b |= BIT(XMM0);
if (nf >= 2)
b |= BIT(XMM1);
if (p) {
p[0] = ni;
p[1] = nf;
}
return b;
}
bits
amd64_sysv_argregs(Ref r, int p[2])
{
bits b;
int j, ni, nf, ra;
assert(rtype(r) == RCall);
b = 0;
ni = (r.val >> 4) & 15;
nf = (r.val >> 8) & 15;
ra = (r.val >> 12) & 1;
for (j=0; j<ni; j++)
b |= BIT(amd64_sysv_rsave[j]);
for (j=0; j<nf; j++)
b |= BIT(XMM0+j);
if (p) {
p[0] = ni + ra;
p[1] = nf;
}
return b | (ra ? BIT(RAX) : 0);
}
static Ref
rarg(int ty, int *ni, int *ns)
{
if (KBASE(ty) == 0)
return TMP(amd64_sysv_rsave[(*ni)++]);
else
return TMP(XMM0 + (*ns)++);
}
static void
selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
{
Ins *i;
AClass *ac, *a, aret;
int ca, ni, ns, al;
uint stk, off;
Ref r, r1, r2, reg[2], env;
RAlloc *ra;
env = R;
ac = alloc((i1-i0) * sizeof ac[0]);
if (!req(i1->arg[1], R)) {
assert(rtype(i1->arg[1]) == RType);
typclass(&aret, &typ[i1->arg[1].val]);
ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
} else
ca = argsclass(i0, i1, ac, Oarg, 0, &env);
for (stk=0, a=&ac[i1-i0]; a>ac;)
if ((--a)->inmem) {
if (a->align > 4)
err("sysv abi requires alignments of 16 or less");
stk += a->size;
if (a->align == 4)
stk += stk & 15;
}
stk += stk & 15;
if (stk) {
r = getcon(-(int64_t)stk, fn);
emit(Osalloc, Kl, R, r, R);
}
if (!req(i1->arg[1], R)) {
if (aret.inmem) {
/* get the return location from eax
* it saves one callee-save reg */
r1 = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, i1->to, TMP(RAX), R);
ca += 1;
} else {
/* todo, may read out of bounds.
* gcc did this up until 5.2, but
* this should still be fixed.
*/
if (aret.size > 8) {
r = newtmp("abi", Kl, fn);
aret.ref[1] = newtmp("abi", aret.cls[1], fn);
emit(Ostorel, 0, R, aret.ref[1], r);
emit(Oadd, Kl, r, i1->to, getcon(8, fn));
}
aret.ref[0] = newtmp("abi", aret.cls[0], fn);
emit(Ostorel, 0, R, aret.ref[0], i1->to);
ca += retr(reg, &aret);
if (aret.size > 8)
emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
r1 = i1->to;
}
/* allocate return pad */
ra = alloc(sizeof *ra);
/* specific to NAlign == 3 */
al = aret.align >= 2 ? aret.align - 2 : 0;
ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
ra->link = (*rap);
*rap = ra;
} else {
ra = 0;
if (KBASE(i1->cls) == 0) {
emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
ca += 1;
} else {
emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
ca += 1 << 2;
}
}
emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
if (!req(R, env))
emit(Ocopy, Kl, TMP(RAX), env, R);
else if ((ca >> 12) & 1) /* vararg call */
emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
ni = ns = 0;
if (ra && aret.inmem)
emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
for (i=i0, a=ac; i<i1; i++, a++) {
if (i->op >= Oarge || a->inmem)
continue;
r1 = rarg(a->cls[0], &ni, &ns);
if (i->op == Oargc) {
if (a->size > 8) {
r2 = rarg(a->cls[1], &ni, &ns);
r = newtmp("abi", Kl, fn);
emit(Oload, a->cls[1], r2, r, R);
emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
}
emit(Oload, a->cls[0], r1, i->arg[1], R);
} else
emit(Ocopy, i->cls, r1, i->arg[0], R);
}
if (!stk)
return;
r = newtmp("abi", Kl, fn);
for (i=i0, a=ac, off=0; i<i1; i++, a++) {
if (i->op >= Oarge || !a->inmem)
continue;
r1 = newtmp("abi", Kl, fn);
if (i->op == Oargc) {
if (a->align == 4)
off += off & 15;
emit(Oblit1, 0, R, INT(a->type->size), R);
emit(Oblit0, 0, R, i->arg[1], r1);
} else
emit(Ostorel, 0, R, i->arg[0], r1);
emit(Oadd, Kl, r1, r, getcon(off, fn));
off += a->size;
}
emit(Osalloc, Kl, r, getcon(stk, fn), R);
}
static int
selpar(Fn *fn, Ins *i0, Ins *i1)
{
AClass *ac, *a, aret;
Ins *i;
int ni, ns, s, al, fa;
Ref r, env;
env = R;
ac = alloc((i1-i0) * sizeof ac[0]);
curi = &insb[NIns];
ni = ns = 0;
if (fn->retty >= 0) {
typclass(&aret, &typ[fn->retty]);
fa = argsclass(i0, i1, ac, Opar, &aret, &env);
} else
fa = argsclass(i0, i1, ac, Opar, 0, &env);
fn->reg = amd64_sysv_argregs(CALL(fa), 0);
for (i=i0, a=ac; i<i1; i++, a++) {
if (i->op != Oparc || a->inmem)
continue;
if (a->size > 8) {
r = newtmp("abi", Kl, fn);
a->ref[1] = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, a->ref[1], r);
emit(Oadd, Kl, r, i->to, getcon(8, fn));
}
a->ref[0] = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, a->ref[0], i->to);
/* specific to NAlign == 3 */
al = a->align >= 2 ? a->align - 2 : 0;
emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
}
if (fn->retty >= 0 && aret.inmem) {
r = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
fn->retr = r;
}
for (i=i0, a=ac, s=4; i<i1; i++, a++) {
switch (a->inmem) {
case 1:
if (a->align > 4)
err("sysv abi requires alignments of 16 or less");
if (a->align == 4)
s = (s+3) & -4;
fn->tmp[i->to.val].slot = -s;
s += a->size / 4;
continue;
case 2:
emit(Oload, i->cls, i->to, SLOT(-s), R);
s += 2;
continue;
}
if (i->op == Opare)
continue;
r = rarg(a->cls[0], &ni, &ns);
if (i->op == Oparc) {
emit(Ocopy, a->cls[0], a->ref[0], r, R);
if (a->size > 8) {
r = rarg(a->cls[1], &ni, &ns);
emit(Ocopy, a->cls[1], a->ref[1], r, R);
}
} else
emit(Ocopy, i->cls, i->to, r, R);
}
if (!req(R, env))
emit(Ocopy, Kl, env, TMP(RAX), R);
return fa | (s*4)<<12;
}
static Blk *
split(Fn *fn, Blk *b)
{
Blk *bn;
++fn->nblk;
bn = newblk();
idup(bn, curi, &insb[NIns]-curi);
curi = &insb[NIns];
bn->visit = ++b->visit;
strf(bn->name, "%s.%d", b->name, b->visit);
bn->loop = b->loop;
bn->link = b->link;
b->link = bn;
return bn;
}
static void
chpred(Blk *b, Blk *bp, Blk *bp1)
{
Phi *p;
uint a;
for (p=b->phi; p; p=p->link) {
for (a=0; p->blk[a]!=bp; a++)
assert(a+1<p->narg);
p->blk[a] = bp1;
}
}
static void
selvaarg(Fn *fn, Blk *b, Ins *i)
{
Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
Blk *b0, *bstk, *breg;
int isint;
c4 = getcon(4, fn);
c8 = getcon(8, fn);
c16 = getcon(16, fn);
ap = i->arg[0];
isint = KBASE(i->cls) == 0;
/* @b [...]
r0 =l add ap, (0 or 4)
nr =l loadsw r0
r1 =w cultw nr, (48 or 176)
jnz r1, @breg, @bstk
@breg
r0 =l add ap, 16
r1 =l loadl r0
lreg =l add r1, nr
r0 =w add nr, (8 or 16)
r1 =l add ap, (0 or 4)
storew r0, r1
@bstk
r0 =l add ap, 8
lstk =l loadl r0
r1 =l add lstk, 8
storel r1, r0
@b0
%loc =l phi @breg %lreg, @bstk %lstk
i->to =(i->cls) load %loc
*/
loc = newtmp("abi", Kl, fn);
emit(Oload, i->cls, i->to, loc, R);
b0 = split(fn, b);
b0->jmp = b->jmp;
b0->s1 = b->s1;
b0->s2 = b->s2;
if (b->s1)
chpred(b->s1, b, b0);
if (b->s2 && b->s2 != b->s1)
chpred(b->s2, b, b0);
lreg = newtmp("abi", Kl, fn);
nr = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kw, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, r0, r1);
emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Oadd, Kl, lreg, r1, nr);
emit(Oload, Kl, r1, r0, R);
emit(Oadd, Kl, r0, ap, c16);
breg = split(fn, b);
breg->jmp.type = Jjmp;
breg->s1 = b0;
lstk = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, lstk, c8);
emit(Oload, Kl, lstk, r0, R);
emit(Oadd, Kl, r0, ap, c8);
bstk = split(fn, b);
bstk->jmp.type = Jjmp;
bstk->s1 = b0;
b0->phi = alloc(sizeof *b0->phi);
*b0->phi = (Phi){
.cls = Kl, .to = loc,
.narg = 2,
.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
};
b0->phi->blk[0] = bstk;
b0->phi->blk[1] = breg;
b0->phi->arg[0] = lstk;
b0->phi->arg[1] = lreg;
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kw, fn);
b->jmp.type = Jjnz;
b->jmp.arg = r1;
b->s1 = breg;
b->s2 = bstk;
c = getcon(isint ? 48 : 176, fn);
emit(Ocmpw+Ciult, Kw, r1, nr, c);
emit(Oloadsw, Kl, nr, r0, R);
emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
}
static void
selvastart(Fn *fn, int fa, Ref ap)
{
Ref r0, r1;
int gp, fp, sp;
gp = ((fa >> 4) & 15) * 8;
fp = 48 + ((fa >> 8) & 15) * 16;
sp = fa >> 12;
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
emit(Oadd, Kl, r0, ap, getcon(16, fn));
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
emit(Oadd, Kl, r0, ap, getcon(8, fn));
r0 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, getcon(fp, fn), r0);
emit(Oadd, Kl, r0, ap, getcon(4, fn));
emit(Ostorew, Kw, R, getcon(gp, fn), ap);
}
void
amd64_sysv_abi(Fn *fn)
{
Blk *b;
Ins *i, *i0;
RAlloc *ral;
int n0, n1, ioff, fa;
for (b=fn->start; b; b=b->link)
b->visit = 0;
/* lower parameters */
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
if (!ispar(i->op))
break;
fa = selpar(fn, b->ins, i);
n0 = &insb[NIns] - curi;
ioff = i - b->ins;
n1 = b->nins - ioff;
vgrow(&b->ins, n0+n1);
icpy(b->ins+n0, b->ins+ioff, n1);
icpy(b->ins, curi, n0);
b->nins = n0+n1;
/* lower calls, returns, and vararg instructions */
ral = 0;
b = fn->start;
do {
if (!(b = b->link))
b = fn->start; /* do it last */
if (b->visit)
continue;
curi = &insb[NIns];
selret(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
switch ((--i)->op) {
default:
emiti(*i);
break;
case Ocall:
for (i0=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
selcall(fn, i0, i, &ral);
i = i0;
break;
case Ovastart:
selvastart(fn, fa, i->arg[0]);
break;
case Ovaarg:
selvaarg(fn, b, i);
break;
case Oarg:
case Oargc:
die("unreachable");
}
if (b == fn->start)
for (; ral; ral=ral->link)
emiti(ral->i);
idup(b, curi, &insb[NIns]-curi);
} while (b != fn->start);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(fn, stderr);
}
}

67
src/qbe/amd64/targ.c Normal file
View File

@@ -0,0 +1,67 @@
#include "all.h"
Amd64Op amd64_op[NOp] = {
#define O(op, t, x) [O##op] =
#define X(nm, zf, lf) { nm, zf, lf, },
#include "../ops.h"
};
static int
amd64_memargs(int op)
{
return amd64_op[op].nmem;
}
#define AMD64_COMMON \
.gpr0 = RAX, \
.ngpr = NGPR, \
.fpr0 = XMM0, \
.nfpr = NFPR, \
.rglob = BIT(RBP) | BIT(RSP), \
.nrglob = 2, \
.memargs = amd64_memargs, \
.abi0 = elimsb, \
.isel = amd64_isel, \
.cansel = 1,
Target T_amd64_sysv = {
.name = "amd64_sysv",
.emitfin = elf_emitfin,
.asloc = ".L",
.abi1 = amd64_sysv_abi,
.rsave = amd64_sysv_rsave,
.nrsave = {NGPS_SYSV, NFPS},
.retregs = amd64_sysv_retregs,
.argregs = amd64_sysv_argregs,
.emitfn = amd64_sysv_emitfn,
AMD64_COMMON
};
Target T_amd64_apple = {
.name = "amd64_apple",
.apple = 1,
.emitfin = macho_emitfin,
.asloc = "L",
.assym = "_",
.abi1 = amd64_sysv_abi,
.rsave = amd64_sysv_rsave,
.nrsave = {NGPS_SYSV, NFPS},
.retregs = amd64_sysv_retregs,
.argregs = amd64_sysv_argregs,
.emitfn = amd64_sysv_emitfn,
AMD64_COMMON
};
Target T_amd64_win = {
.name = "amd64_win",
.windows = 1,
.emitfin = pe_emitfin,
.asloc = "L",
.abi1 = amd64_winabi_abi,
.rsave = amd64_winabi_rsave,
.nrsave = {NGPS_WIN, NFPS},
.retregs = amd64_winabi_retregs,
.argregs = amd64_winabi_argregs,
.emitfn = amd64_winabi_emitfn,
AMD64_COMMON
};

763
src/qbe/amd64/winabi.c Executable file
View File

@@ -0,0 +1,763 @@
#include "all.h"
#include <stdbool.h>
typedef enum ArgPassStyle {
APS_Invalid = 0,
APS_Register,
APS_InlineOnStack,
APS_CopyAndPointerInRegister,
APS_CopyAndPointerOnStack,
APS_VarargsTag,
APS_EnvTag,
} ArgPassStyle;
typedef struct ArgClass {
Typ* type;
ArgPassStyle style;
int align;
uint size;
int cls;
Ref ref;
} ArgClass;
typedef struct ExtraAlloc ExtraAlloc;
struct ExtraAlloc {
Ins instr;
ExtraAlloc* link;
};
#define ALIGN_DOWN(n, a) ((n) & ~((a)-1))
#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a))
// Number of stack bytes required be reserved for the callee.
#define SHADOW_SPACE_SIZE 32
int amd64_winabi_rsave[] = {RCX, RDX, R8, R9, R10, R11, RAX, XMM0,
XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8,
XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1};
int amd64_winabi_rclob[] = {RBX, R12, R13, R14, R15, RSI, RDI, -1};
MAKESURE(winabi_arrays_ok,
sizeof amd64_winabi_rsave == (NGPS_WIN + NFPS + 1) * sizeof(int) &&
sizeof amd64_winabi_rclob == (NCLR_WIN + 1) * sizeof(int));
// layout of call's second argument (RCall)
//
// bit 0: rax returned
// bit 1: xmm0 returned
// bits 23: 0
// bits 4567: rcx, rdx, r8, r9 passed
// bits 89ab: xmm0,1,2,3 passed
// bit c: env call (rax passed)
// bits d..1f: 0
bits amd64_winabi_retregs(Ref r, int p[2]) {
assert(rtype(r) == RCall);
bits b = 0;
int num_int_returns = r.val & 1;
int num_float_returns = r.val & 2;
if (num_int_returns == 1) {
b |= BIT(RAX);
} else {
b |= BIT(XMM0);
}
if (p) {
p[0] = num_int_returns;
p[1] = num_float_returns;
}
return b;
}
static uint popcnt(bits b) {
b = (b & 0x5555555555555555) + ((b >> 1) & 0x5555555555555555);
b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
b = (b & 0x0f0f0f0f0f0f0f0f) + ((b >> 4) & 0x0f0f0f0f0f0f0f0f);
b += (b >> 8);
b += (b >> 16);
b += (b >> 32);
return b & 0xff;
}
bits amd64_winabi_argregs(Ref r, int p[2]) {
assert(rtype(r) == RCall);
// On SysV, these are counts. Here, a count isn't sufficient, we actually need
// to know which ones are in use because they're not necessarily contiguous.
int int_passed = (r.val >> 4) & 15;
int float_passed = (r.val >> 8) & 15;
bool env_param = (r.val >> 12) & 1;
bits b = 0;
b |= (int_passed & 1) ? BIT(RCX) : 0;
b |= (int_passed & 2) ? BIT(RDX) : 0;
b |= (int_passed & 4) ? BIT(R8) : 0;
b |= (int_passed & 8) ? BIT(R9) : 0;
b |= (float_passed & 1) ? BIT(XMM0) : 0;
b |= (float_passed & 2) ? BIT(XMM1) : 0;
b |= (float_passed & 4) ? BIT(XMM2) : 0;
b |= (float_passed & 8) ? BIT(XMM3) : 0;
b |= env_param ? BIT(RAX) : 0;
if (p) {
// TODO: The only place this is used is live.c. I'm not sure what should be
// returned here wrt to using the same counter for int/float regs on win.
// For now, try the number of registers in use even though they're not
// contiguous.
p[0] = popcnt(int_passed);
p[1] = popcnt(float_passed);
}
return b;
}
typedef struct RegisterUsage {
// Counter for both int/float as they're counted together. Only if the bool's
// set in regs_passed is the given register *actually* needed for a value
// (i.e. needs to be saved, etc.).
int num_regs_passed;
// Indexed first by 0=int, 1=float, use KBASE(cls).
// Indexed second by register index in calling convention, so for integer,
// 0=RCX, 1=RDX, 2=R8, 3=R9, and for float XMM0, XMM1, XMM2, XMM3.
bool regs_passed[2][4];
bool rax_returned;
bool xmm0_returned;
// This is also used as where the va_start will start for varargs functions
// (there's no 'Oparv', so we need to keep track of a count here.)
int num_named_args_passed;
// This is set when classifying the arguments for a call (but not when
// classifying the parameters of a function definition).
bool is_varargs_call;
bool has_env;
} RegisterUsage;
static int register_usage_to_call_arg_value(RegisterUsage reg_usage) {
return (reg_usage.rax_returned << 0) | //
(reg_usage.xmm0_returned << 1) | //
(reg_usage.regs_passed[0][0] << 4) | //
(reg_usage.regs_passed[0][1] << 5) | //
(reg_usage.regs_passed[0][2] << 6) | //
(reg_usage.regs_passed[0][3] << 7) | //
(reg_usage.regs_passed[1][0] << 8) | //
(reg_usage.regs_passed[1][1] << 9) | //
(reg_usage.regs_passed[1][2] << 10) | //
(reg_usage.regs_passed[1][3] << 11) | //
(reg_usage.has_env << 12);
}
// Assigns the argument to a register if there's any left according to the
// calling convention, and updates the regs_passed bools. Otherwise marks the
// value as needing stack space to be passed.
static void assign_register_or_stack(RegisterUsage* reg_usage,
ArgClass* arg,
bool is_float,
bool by_copy) {
if (reg_usage->num_regs_passed == 4) {
arg->style = by_copy ? APS_CopyAndPointerOnStack : APS_InlineOnStack;
} else {
reg_usage->regs_passed[is_float][reg_usage->num_regs_passed] = true;
++reg_usage->num_regs_passed;
arg->style = by_copy ? APS_CopyAndPointerInRegister : APS_Register;
}
++reg_usage->num_named_args_passed;
}
static bool type_is_by_copy(Typ* type) {
// Note that only these sizes are passed by register, even though e.g. a
// 5 byte struct would "fit", it still is passed by copy-and-pointer.
return type->isdark || (type->size != 1 && type->size != 2 &&
type->size != 4 && type->size != 8);
}
// This function is used for both arguments and parameters.
// begin_instr should either point at the first Oarg or Opar, and end_instr
// should point past the last one (so to the Ocall for arguments, or to the
// first 'real' instruction of the function for parameters).
static void classify_arguments(RegisterUsage* reg_usage,
Ins* begin_instr,
Ins* end_instr,
ArgClass* arg_classes,
Ref* env) {
ArgClass* arg = arg_classes;
// For each argument, determine how it will be passed (int, float, stack)
// and update the `reg_usage` counts. Additionally, fill out arg_classes for
// each argument.
for (Ins* instr = begin_instr; instr < end_instr; ++instr, ++arg) {
switch (instr->op) {
case Oarg:
case Opar:
assign_register_or_stack(reg_usage, arg, KBASE(instr->cls),
/*by_copy=*/false);
arg->cls = instr->cls;
arg->align = 3;
arg->size = 8;
break;
case Oargc:
case Oparc: {
int typ_index = instr->arg[0].val;
Typ* type = &typ[typ_index];
bool by_copy = type_is_by_copy(type);
assign_register_or_stack(reg_usage, arg, /*is_float=*/false, by_copy);
arg->cls = Kl;
if (!by_copy && type->size <= 4) {
arg->cls = Kw;
}
arg->align = 3;
arg->size = type->size;
break;
}
case Oarge:
*env = instr->arg[0];
arg->style = APS_EnvTag;
reg_usage->has_env = true;
break;
case Opare:
*env = instr->to;
arg->style = APS_EnvTag;
reg_usage->has_env = true;
break;
case Oargv:
reg_usage->is_varargs_call = true;
arg->style = APS_VarargsTag;
break;
}
}
if (reg_usage->has_env && reg_usage->is_varargs_call) {
die("can't use env with varargs");
}
// During a varargs call, float arguments have to be duplicated to their
// associated integer register, so mark them as in-use too.
if (reg_usage->is_varargs_call) {
for (int i = 0; i < 4; ++i) {
if (reg_usage->regs_passed[/*float*/ 1][i]) {
reg_usage->regs_passed[/*int*/ 0][i] = true;
}
}
}
}
static bool is_integer_type(int ty) {
assert(ty >= 0 && ty < 4 && "expecting Kw Kl Ks Kd");
return KBASE(ty) == 0;
}
static Ref register_for_arg(int cls, int counter) {
assert(counter < 4);
if (is_integer_type(cls)) {
return TMP(amd64_winabi_rsave[counter]);
} else {
return TMP(XMM0 + counter);
}
}
static Ins* lower_call(Fn* func,
Blk* block,
Ins* call_instr,
ExtraAlloc** pextra_alloc) {
// Call arguments are instructions. Walk through them to find the end of the
// call+args that we need to process (and return the instruction past the body
// of the instruction for continuing processing).
Ins* instr_past_args = call_instr - 1;
for (; instr_past_args >= block->ins; --instr_past_args) {
if (!isarg(instr_past_args->op)) {
break;
}
}
Ins* earliest_arg_instr = instr_past_args + 1;
// Don't need an ArgClass for the call itself, so one less than the total
// number of instructions we're dealing with.
uint num_args = call_instr - earliest_arg_instr;
ArgClass* arg_classes = alloc(num_args * sizeof(ArgClass));
RegisterUsage reg_usage = {0};
ArgClass ret_arg_class = {0};
// Ocall's two arguments are the the function to be called in 0, and, if the
// the function returns a non-basic type, then arg[1] is a reference to the
// type of the return. req checks if Refs are equal; `R` is 0.
bool il_has_struct_return = !req(call_instr->arg[1], R);
bool is_struct_return = false;
if (il_has_struct_return) {
Typ* ret_type = &typ[call_instr->arg[1].val];
is_struct_return = type_is_by_copy(ret_type);
if (is_struct_return) {
assign_register_or_stack(&reg_usage, &ret_arg_class, /*is_float=*/false,
/*by_copy=*/true);
}
ret_arg_class.size = ret_type->size;
}
Ref env = R;
classify_arguments(&reg_usage, earliest_arg_instr, call_instr, arg_classes,
&env);
// We now know which arguments are on the stack and which are in registers, so
// we can allocate the correct amount of space to stash the stack-located ones
// into.
uint stack_usage = 0;
for (uint i = 0; i < num_args; ++i) {
ArgClass* arg = &arg_classes[i];
// stack_usage only accounts for pushes that are for values that don't have
// enough registers. Large struct copies are alloca'd separately, and then
// only have (potentially) 8 bytes to add to stack_usage here.
if (arg->style == APS_InlineOnStack) {
if (arg->align > 4) {
err("win abi cannot pass alignments > 16");
}
stack_usage += arg->size;
} else if (arg->style == APS_CopyAndPointerOnStack) {
stack_usage += 8;
}
}
stack_usage = ALIGN_UP(stack_usage, 16);
// Note that here we're logically 'after' the call (due to emitting
// instructions in reverse order), so we're doing a negative stack
// allocation to clean up after the call.
Ref stack_size_ref =
getcon(-(int64_t)(stack_usage + SHADOW_SPACE_SIZE), func);
emit(Osalloc, Kl, R, stack_size_ref, R);
ExtraAlloc* return_pad = NULL;
if (is_struct_return) {
return_pad = alloc(sizeof(ExtraAlloc));
Ref ret_pad_ref = newtmp("abi.ret_pad", Kl, func);
return_pad->instr =
(Ins){Oalloc8, Kl, ret_pad_ref, {getcon(ret_arg_class.size, func)}};
return_pad->link = (*pextra_alloc);
*pextra_alloc = return_pad;
reg_usage.rax_returned = true;
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
} else {
if (il_has_struct_return) {
// In the case that at the IL level, a struct return was specified, but as
// far as the calling convention is concerned it's not actually by
// pointer, we need to store the return value into an alloca because
// subsequent IL will still be treating the function return as a pointer.
ExtraAlloc* return_copy = alloc(sizeof(ExtraAlloc));
return_copy->instr =
(Ins){Oalloc8, Kl, call_instr->to, {getcon(8, func)}};
return_copy->link = (*pextra_alloc);
*pextra_alloc = return_copy;
Ref copy = newtmp("abi.copy", Kl, func);
emit(Ostorel, 0, R, copy, call_instr->to);
emit(Ocopy, Kl, copy, TMP(RAX), R);
reg_usage.rax_returned = true;
} else if (is_integer_type(call_instr->cls)) {
// Only a basic type returned from the call, integer.
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
reg_usage.rax_returned = true;
} else {
// Basic type, floating point.
emit(Ocopy, call_instr->cls, call_instr->to, TMP(XMM0), R);
reg_usage.xmm0_returned = true;
}
}
// Emit the actual call instruction. There's no 'to' value by this point
// because we've lowered it into register manipulation (that's the `R`),
// arg[0] of the call is the function, and arg[1] is register usage is
// documented as above (copied from SysV).
emit(Ocall, call_instr->cls, R, call_instr->arg[0],
CALL(register_usage_to_call_arg_value(reg_usage)));
if (!req(R, env)) {
// If there's an env arg to be passed, it gets stashed in RAX.
emit(Ocopy, Kl, TMP(RAX), env, R);
}
if (reg_usage.is_varargs_call) {
// Any float arguments need to be duplicated to integer registers. This is
// required by the calling convention so that dumping to shadow space can be
// done without a prototype and for varargs.
#define DUP_IF_USED(index, floatreg, intreg) \
if (reg_usage.regs_passed[/*float*/ 1][index]) { \
emit(Ocast, Kl, TMP(intreg), TMP(floatreg), R); \
}
DUP_IF_USED(0, XMM0, RCX);
DUP_IF_USED(1, XMM1, RDX);
DUP_IF_USED(2, XMM2, R8);
DUP_IF_USED(3, XMM3, R9);
#undef DUP_IF_USED
}
int reg_counter = 0;
if (is_struct_return) {
Ref first_reg = register_for_arg(Kl, reg_counter++);
emit(Ocopy, Kl, first_reg, return_pad->instr.to, R);
}
// This is where we actually do the load of values into registers or into
// stack slots.
Ref arg_stack_slots = newtmp("abi.args", Kl, func);
uint slot_offset = SHADOW_SPACE_SIZE;
ArgClass* arg = arg_classes;
for (Ins* instr = earliest_arg_instr; instr != call_instr; ++instr, ++arg) {
switch (arg->style) {
case APS_Register: {
Ref into = register_for_arg(arg->cls, reg_counter++);
if (instr->op == Oargc) {
// If this is a small struct being passed by value. The value in the
// instruction in this case is a pointer, but it needs to be loaded
// into the register.
emit(Oload, arg->cls, into, instr->arg[1], R);
} else {
// Otherwise, a normal value passed in a register.
emit(Ocopy, instr->cls, into, instr->arg[0], R);
}
break;
}
case APS_InlineOnStack: {
Ref slot = newtmp("abi.off", Kl, func);
if (instr->op == Oargc) {
// This is a small struct, so it's not passed by copy, but the
// instruction is a pointer. So we need to copy it into the stack
// slot. (And, remember that these are emitted backwards, so store,
// then load.)
Ref smalltmp = newtmp("abi.smalltmp", arg->cls, func);
emit(Ostorel, 0, R, smalltmp, slot);
emit(Oload, arg->cls, smalltmp, instr->arg[1], R);
} else {
// Stash the value into the stack slot.
emit(Ostorel, 0, R, instr->arg[0], slot);
}
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
slot_offset += arg->size;
break;
}
case APS_CopyAndPointerInRegister:
case APS_CopyAndPointerOnStack: {
// Alloca a space to copy into, and blit the value from the instr to the
// copied location.
ExtraAlloc* arg_copy = alloc(sizeof(ExtraAlloc));
Ref copy_ref = newtmp("abi.copy", Kl, func);
arg_copy->instr =
(Ins){Oalloc8, Kl, copy_ref, {getcon(arg->size, func)}};
arg_copy->link = (*pextra_alloc);
*pextra_alloc = arg_copy;
emit(Oblit1, 0, R, INT(arg->size), R);
emit(Oblit0, 0, R, instr->arg[1], copy_ref);
// Now load the pointer into the correct register or stack slot.
if (arg->style == APS_CopyAndPointerInRegister) {
Ref into = register_for_arg(arg->cls, reg_counter++);
emit(Ocopy, Kl, into, copy_ref, R);
} else {
assert(arg->style == APS_CopyAndPointerOnStack);
Ref slot = newtmp("abi.off", Kl, func);
emit(Ostorel, 0, R, copy_ref, slot);
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
slot_offset += 8;
}
break;
}
case APS_EnvTag:
case APS_VarargsTag:
// Nothing to do here, see right before the call for reg dupe.
break;
case APS_Invalid:
die("unreachable");
}
}
if (stack_usage) {
// The last (first in call order) thing we do is allocate the the stack
// space we're going to fill with temporaries.
emit(Osalloc, Kl, arg_stack_slots,
getcon(stack_usage + SHADOW_SPACE_SIZE, func), R);
} else {
// When there's no usage for temporaries, we can add this into the other
// alloca, but otherwise emit it separately (not storing into a reference)
// so that it doesn't get removed later for being useless.
emit(Osalloc, Kl, R, getcon(SHADOW_SPACE_SIZE, func), R);
}
return instr_past_args;
}
static void lower_block_return(Fn* func, Blk* block) {
int jmp_type = block->jmp.type;
if (!isret(jmp_type) || jmp_type == Jret0) {
return;
}
// Save the argument, and set the block to be a void return because once it's
// lowered it's handled by the the register/stack manipulation.
Ref ret_arg = block->jmp.arg;
block->jmp.type = Jret0;
RegisterUsage reg_usage = {0};
if (jmp_type == Jretc) {
Typ* type = &typ[func->retty];
if (type_is_by_copy(type)) {
assert(rtype(func->retr) == RTmp);
emit(Ocopy, Kl, TMP(RAX), func->retr, R);
emit(Oblit1, 0, R, INT(type->size), R);
emit(Oblit0, 0, R, ret_arg, func->retr);
} else {
emit(Oload, Kl, TMP(RAX), ret_arg, R);
}
reg_usage.rax_returned = true;
} else {
int k = jmp_type - Jretw;
if (is_integer_type(k)) {
emit(Ocopy, k, TMP(RAX), ret_arg, R);
reg_usage.rax_returned = true;
} else {
emit(Ocopy, k, TMP(XMM0), ret_arg, R);
reg_usage.xmm0_returned = true;
}
}
block->jmp.arg = CALL(register_usage_to_call_arg_value(reg_usage));
}
static void lower_vastart(Fn* func,
RegisterUsage* param_reg_usage,
Ref valist) {
assert(func->vararg);
// In varargs functions:
// 1. the int registers are already dumped to the shadow stack space;
// 2. any parameters passed in floating point registers have
// been duplicated to the integer registers
// 3. we ensure (later) that for varargs functions we're always using an rbp
// frame pointer.
// So, the ... argument is just indexed past rbp by the number of named values
// that were actually passed.
Ref offset = newtmp("abi.vastart", Kl, func);
emit(Ostorel, 0, R, offset, valist);
// *8 for sizeof(u64), +16 because the return address and rbp have been pushed
// by the time we get to the body of the function.
emit(Oadd, Kl, offset, TMP(RBP),
getcon(param_reg_usage->num_named_args_passed * 8 + 16, func));
}
static void lower_vaarg(Fn* func, Ins* vaarg_instr) {
// va_list is just a void** on winx64, so load the pointer, then load the
// argument from that pointer, then increment the pointer to the next arg.
// (All emitted backwards as usual.)
Ref inc = newtmp("abi.vaarg.inc", Kl, func);
Ref ptr = newtmp("abi.vaarg.ptr", Kl, func);
emit(Ostorel, 0, R, inc, vaarg_instr->arg[0]);
emit(Oadd, Kl, inc, ptr, getcon(8, func));
emit(Oload, vaarg_instr->cls, vaarg_instr->to, ptr, R);
emit(Oload, Kl, ptr, vaarg_instr->arg[0], R);
}
static void lower_args_for_block(Fn* func,
Blk* block,
RegisterUsage* param_reg_usage,
ExtraAlloc** pextra_alloc) {
// global temporary buffer used by emit. Reset to the end, and predecremented
// when adding to it.
curi = &insb[NIns];
lower_block_return(func, block);
if (block->nins) {
// Work backwards through the instructions, either copying them unchanged,
// or modifying as necessary.
for (Ins* instr = &block->ins[block->nins - 1]; instr >= block->ins;) {
switch (instr->op) {
case Ocall:
instr = lower_call(func, block, instr, pextra_alloc);
break;
case Ovastart:
lower_vastart(func, param_reg_usage, instr->arg[0]);
--instr;
break;
case Ovaarg:
lower_vaarg(func, instr);
--instr;
break;
case Oarg:
case Oargc:
die("unreachable");
default:
emiti(*instr);
--instr;
break;
}
}
}
// This it the start block, which is processed last. Add any allocas that
// other blocks needed.
bool is_start_block = block == func->start;
if (is_start_block) {
for (ExtraAlloc* ea = *pextra_alloc; ea; ea = ea->link) {
emiti(ea->instr);
}
}
// emit/emiti add instructions from the end to the beginning of the temporary
// global buffer. dup the final version into the final block storage.
block->nins = &insb[NIns] - curi;
idup(block, curi, block->nins);
}
static Ins* find_end_of_func_parameters(Blk* start_block) {
Ins* i;
for (i = start_block->ins; i < &start_block->ins[start_block->nins]; ++i) {
if (!ispar(i->op)) {
break;
}
}
return i;
}
// Copy from registers/stack into values.
static RegisterUsage lower_func_parameters(Fn* func) {
// This is half-open, so end points after the last Opar.
Blk* start_block = func->start;
Ins* start_of_params = start_block->ins;
Ins* end_of_params = find_end_of_func_parameters(start_block);
size_t num_params = end_of_params - start_of_params;
ArgClass* arg_classes = alloc(num_params * sizeof(ArgClass));
ArgClass arg_ret = {0};
// global temporary buffer used by emit. Reset to the end, and predecremented
// when adding to it.
curi = &insb[NIns];
int reg_counter = 0;
RegisterUsage reg_usage = {0};
if (func->retty >= 0) {
bool by_copy = type_is_by_copy(&typ[func->retty]);
if (by_copy) {
assign_register_or_stack(&reg_usage, &arg_ret, /*is_float=*/false,
by_copy);
Ref ret_ref = newtmp("abi.ret", Kl, func);
emit(Ocopy, Kl, ret_ref, TMP(RCX), R);
func->retr = ret_ref;
++reg_counter;
}
}
Ref env = R;
classify_arguments(&reg_usage, start_of_params, end_of_params, arg_classes,
&env);
func->reg = amd64_winabi_argregs(
CALL(register_usage_to_call_arg_value(reg_usage)), NULL);
// Copy from the registers or stack slots into the named parameters. Depending
// on how they're passed, they either need to be copied or loaded.
ArgClass* arg = arg_classes;
uint slot_offset = SHADOW_SPACE_SIZE / 4 + 4;
for (Ins* instr = start_of_params; instr < end_of_params; ++instr, ++arg) {
switch (arg->style) {
case APS_Register: {
Ref from = register_for_arg(arg->cls, reg_counter++);
// If it's a struct at the IL level, we need to copy the register into
// an alloca so we have something to point at (same for InlineOnStack).
if (instr->op == Oparc) {
arg->ref = newtmp("abi", Kl, func);
emit(Ostorel, 0, R, arg->ref, instr->to);
emit(Ocopy, instr->cls, arg->ref, from, R);
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
} else {
emit(Ocopy, instr->cls, instr->to, from, R);
}
break;
}
case APS_InlineOnStack:
if (instr->op == Oparc) {
arg->ref = newtmp("abi", Kl, func);
emit(Ostorel, 0, R, arg->ref, instr->to);
emit(Ocopy, instr->cls, arg->ref, SLOT(-slot_offset), R);
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
} else {
emit(Ocopy, Kl, instr->to, SLOT(-slot_offset), R);
}
slot_offset += 2;
break;
case APS_CopyAndPointerOnStack:
emit(Oload, Kl, instr->to, SLOT(-slot_offset), R);
slot_offset += 2;
break;
case APS_CopyAndPointerInRegister: {
// Because this has to be a copy (that we own), it is sufficient to just
// copy the register to the target.
Ref from = register_for_arg(Kl, reg_counter++);
emit(Ocopy, Kl, instr->to, from, R);
break;
}
case APS_EnvTag:
break;
case APS_VarargsTag:
case APS_Invalid:
die("unreachable");
}
}
// If there was an `env`, it was passed in RAX, so copy it into the env ref.
if (!req(R, env)) {
emit(Ocopy, Kl, env, TMP(RAX), R);
}
int num_created_instrs = &insb[NIns] - curi;
int num_other_after_instrs = (int)(start_block->nins - num_params);
int new_total_instrs = num_other_after_instrs + num_created_instrs;
Ins* new_instrs = vnew(new_total_instrs, sizeof(Ins), PFn);
Ins* instr_p = icpy(new_instrs, curi, num_created_instrs);
icpy(instr_p, end_of_params, num_other_after_instrs);
start_block->nins = new_total_instrs;
start_block->ins = new_instrs;
return reg_usage;
}
// The main job of this function is to lower generic instructions into the
// specific details of how arguments are passed, and parameters are
// interpreted for win x64. A useful reference is
// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention .
//
// Some of the major differences from SysV if you're comparing the code
// (non-exhaustive):
// - only 4 int and 4 float regs are used
// - when an int register is assigned a value, its associated float register is
// left unused (and vice versa). i.e. there's only one counter as you assign
// arguments to registers.
// - any structs that aren't 1/2/4/8 bytes in size are passed by pointer, not
// by copying them into the stack. So e.g. if you pass something like
// `struct { void*, int64_t }` by value, it first needs to be copied to
// another alloca (in order to maintain value semantics at the language
// level), then the pointer to that copy is treated as a regular integer
// argument (which then itself may *also* be copied to the stack in the case
// there's no integer register remaining.)
// - when calling a varargs functions, floating point values must be duplicated
// integer registers. Along with the above restrictions, this makes varargs
// handling simpler for the callee than SysV.
void amd64_winabi_abi(Fn* func) {
// The first thing to do is lower incoming parameters to this function.
RegisterUsage param_reg_usage = lower_func_parameters(func);
// This is the second larger part of the job. We walk all blocks, and rewrite
// instructions returns, calls, and handling of varargs into their win x64
// specific versions. Any other instructions are just passed through unchanged
// by using `emiti`.
// Skip over the entry block, and do it at the end so that our later
// modifications can add allocations to the start block. In particular, we
// need to add stack allocas for copies when structs are passed or returned by
// value.
ExtraAlloc* extra_alloc = NULL;
for (Blk* block = func->start->link; block; block = block->link) {
lower_args_for_block(func, block, &param_reg_usage, &extra_alloc);
}
lower_args_for_block(func, func->start, &param_reg_usage, &extra_alloc);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(func, stderr);
}
}

852
src/qbe/arm64/abi.c Normal file
View File

@@ -0,0 +1,852 @@
#include "all.h"
typedef struct Abi Abi;
typedef struct Class Class;
typedef struct Insl Insl;
typedef struct Params Params;
enum {
Cstk = 1, /* pass on the stack */
Cptr = 2, /* replaced by a pointer */
};
struct Class {
char class;
char ishfa;
struct {
char base;
uchar size;
} hfa;
uint size;
uint align;
Typ *t;
uchar nreg;
uchar ngp;
uchar nfp;
int reg[4];
int cls[4];
};
struct Insl {
Ins i;
Insl *link;
};
struct Params {
uint ngp;
uint nfp;
uint stk;
};
static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
static int store[] = {
[Kw] = Ostorew, [Kl] = Ostorel,
[Ks] = Ostores, [Kd] = Ostored
};
/* layout of call's second argument (RCall)
*
* 13
* 29 14 | 9 5 2 0
* |0.00|x|x|xxxx|xxxx|xxx|xx| range
* | | | | | ` gp regs returned (0..2)
* | | | | ` fp regs returned (0..4)
* | | | ` gp regs passed (0..8)
* | | ` fp regs passed (0..8)
* | ` indirect result register x8 used (0..1)
* ` env pointer passed in x9 (0..1)
*/
static int
isfloatv(Typ *t, char *cls)
{
Field *f;
uint n;
for (n=0; n<t->nunion; n++)
for (f=t->fields[n]; f->type != FEnd; f++)
switch (f->type) {
case Fs:
if (*cls == Kd)
return 0;
*cls = Ks;
break;
case Fd:
if (*cls == Ks)
return 0;
*cls = Kd;
break;
case FTyp:
if (isfloatv(&typ[f->len], cls))
break;
/* fall through */
default:
return 0;
}
return 1;
}
static void
typclass(Class *c, Typ *t, int *gp, int *fp)
{
uint64_t sz, hfasz;
uint n;
sz = (t->size + 7) & -8;
c->t = t;
c->class = 0;
c->ngp = 0;
c->nfp = 0;
c->align = 8;
if (t->align > 3)
err("alignments larger than 8 are not supported");
c->size = sz;
c->hfa.base = Kx;
c->ishfa = isfloatv(t, &c->hfa.base);
hfasz = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
c->ishfa &= !t->isdark && hfasz <= 4;
c->hfa.size = hfasz;
if (c->ishfa) {
for (n=0; n<hfasz; n++, c->nfp++) {
c->reg[n] = *fp++;
c->cls[n] = c->hfa.base;
}
c->nreg = n;
}
else if (t->isdark || sz > 16 || sz == 0) {
/* large structs are replaced by a
* pointer to some caller-allocated
* memory */
c->class |= Cptr;
c->size = 8;
c->ngp = 1;
*c->reg = *gp;
*c->cls = Kl;
}
else {
for (n=0; n<sz/8; n++, c->ngp++) {
c->reg[n] = *gp++;
c->cls[n] = Kl;
}
c->nreg = n;
}
}
static void
sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
{
uint n;
uint64_t off;
Ref r;
assert(nreg <= 4);
off = 0;
for (n=0; n<nreg; n++) {
tmp[n] = newtmp("abi", cls[n], fn);
r = newtmp("abi", Kl, fn);
emit(store[cls[n]], 0, R, tmp[n], r);
emit(Oadd, Kl, r, mem, getcon(off, fn));
off += KWIDE(cls[n]) ? 8 : 4;
}
}
/* todo, may read out of bounds */
static void
ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
{
int i;
uint64_t off;
Ref r;
off = 0;
for (i=0; i<n; i++) {
r = newtmp("abi", Kl, fn);
emit(Oload, cls[i], TMP(reg[i]), r, R);
emit(Oadd, Kl, r, mem, getcon(off, fn));
off += KWIDE(cls[i]) ? 8 : 4;
}
}
static void
selret(Blk *b, Fn *fn)
{
int j, k, cty;
Ref r;
Class cr;
j = b->jmp.type;
if (!isret(j) || j == Jret0)
return;
r = b->jmp.arg;
b->jmp.type = Jret0;
if (j == Jretc) {
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
if (cr.class & Cptr) {
assert(rtype(fn->retr) == RTmp);
emit(Oblit1, 0, R, INT(cr.t->size), R);
emit(Oblit0, 0, R, r, fn->retr);
cty = 0;
} else {
ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
cty = (cr.nfp << 2) | cr.ngp;
}
} else {
k = j - Jretw;
if (KBASE(k) == 0) {
emit(Ocopy, k, TMP(R0), r, R);
cty = 1;
} else {
emit(Ocopy, k, TMP(V0), r, R);
cty = 1 << 2;
}
}
b->jmp.arg = CALL(cty);
}
static int
argsclass(Ins *i0, Ins *i1, Class *carg)
{
int va, envc, ngp, nfp, *gp, *fp;
Class *c;
Ins *i;
va = 0;
envc = 0;
gp = gpreg;
fp = fpreg;
ngp = 8;
nfp = 8;
for (i=i0, c=carg; i<i1; i++, c++)
switch (i->op) {
case Oargsb:
case Oargub:
case Oparsb:
case Oparub:
c->size = 1;
goto Scalar;
case Oargsh:
case Oarguh:
case Oparsh:
case Oparuh:
c->size = 2;
goto Scalar;
case Opar:
case Oarg:
c->size = 8;
if (T.apple && !KWIDE(i->cls))
c->size = 4;
Scalar:
c->align = c->size;
*c->cls = i->cls;
if (va) {
c->class |= Cstk;
c->size = 8;
c->align = 8;
break;
}
if (KBASE(i->cls) == 0 && ngp > 0) {
ngp--;
*c->reg = *gp++;
break;
}
if (KBASE(i->cls) == 1 && nfp > 0) {
nfp--;
*c->reg = *fp++;
break;
}
c->class |= Cstk;
break;
case Oparc:
case Oargc:
typclass(c, &typ[i->arg[0].val], gp, fp);
if (c->ngp <= ngp) {
if (c->nfp <= nfp) {
ngp -= c->ngp;
nfp -= c->nfp;
gp += c->ngp;
fp += c->nfp;
break;
} else
nfp = 0;
} else
ngp = 0;
c->class |= Cstk;
break;
case Opare:
case Oarge:
*c->reg = R9;
*c->cls = Kl;
envc = 1;
break;
case Oargv:
va = T.apple != 0;
break;
default:
die("unreachable");
}
return envc << 14 | (gp-gpreg) << 5 | (fp-fpreg) << 9;
}
bits
arm64_retregs(Ref r, int p[2])
{
bits b;
int ngp, nfp;
assert(rtype(r) == RCall);
ngp = r.val & 3;
nfp = (r.val >> 2) & 7;
if (p) {
p[0] = ngp;
p[1] = nfp;
}
b = 0;
while (ngp--)
b |= BIT(R0+ngp);
while (nfp--)
b |= BIT(V0+nfp);
return b;
}
bits
arm64_argregs(Ref r, int p[2])
{
bits b;
int ngp, nfp, x8, x9;
assert(rtype(r) == RCall);
ngp = (r.val >> 5) & 15;
nfp = (r.val >> 9) & 15;
x8 = (r.val >> 13) & 1;
x9 = (r.val >> 14) & 1;
if (p) {
p[0] = ngp + x8 + x9;
p[1] = nfp;
}
b = 0;
while (ngp--)
b |= BIT(R0+ngp);
while (nfp--)
b |= BIT(V0+nfp);
return b | ((bits)x8 << R8) | ((bits)x9 << R9);
}
static void
stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
{
Insl *il;
int al;
uint64_t sz;
il = alloc(sizeof *il);
al = c->t->align - 2; /* NAlign == 3 */
if (al < 0)
al = 0;
sz = c->class & Cptr ? c->t->size : c->size;
il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
il->link = *ilp;
*ilp = il;
}
static uint
align(uint x, uint al)
{
return (x + al-1) & -al;
}
static void
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
{
Ins *i;
Class *ca, *c, cr;
int op, cty;
uint n, stk, off;;
Ref r, rstk, tmp[4];
ca = alloc((i1-i0) * sizeof ca[0]);
cty = argsclass(i0, i1, ca);
stk = 0;
for (i=i0, c=ca; i<i1; i++, c++) {
if (c->class & Cptr) {
i->arg[0] = newtmp("abi", Kl, fn);
stkblob(i->arg[0], c, fn, ilp);
i->op = Oarg;
}
if (c->class & Cstk) {
stk = align(stk, c->align);
stk += c->size;
}
}
stk = align(stk, 16);
rstk = getcon(stk, fn);
if (stk)
emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
if (!req(i1->arg[1], R)) {
typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
stkblob(i1->to, &cr, fn, ilp);
cty |= (cr.nfp << 2) | cr.ngp;
if (cr.class & Cptr) {
/* spill & rega expect calls to be
* followed by copies from regs,
* so we emit a dummy
*/
cty |= 1 << 13 | 1;
emit(Ocopy, Kw, R, TMP(R0), R);
} else {
sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
for (n=0; n<cr.nreg; n++) {
r = TMP(cr.reg[n]);
emit(Ocopy, cr.cls[n], tmp[n], r, R);
}
}
} else {
if (KBASE(i1->cls) == 0) {
emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
cty |= 1;
} else {
emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
cty |= 1 << 2;
}
}
emit(Ocall, 0, R, i1->arg[0], CALL(cty));
if (cty & (1 << 13))
/* struct return argument */
emit(Ocopy, Kl, TMP(R8), i1->to, R);
for (i=i0, c=ca; i<i1; i++, c++) {
if ((c->class & Cstk) != 0)
continue;
if (i->op == Oarg || i->op == Oarge || isargbh(i->op))
emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
if (i->op == Oargc)
ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
}
/* populate the stack */
off = 0;
for (i=i0, c=ca; i<i1; i++, c++) {
if ((c->class & Cstk) == 0)
continue;
off = align(off, c->align);
r = newtmp("abi", Kl, fn);
if (i->op == Oarg || isargbh(i->op)) {
switch (c->size) {
case 1: op = Ostoreb; break;
case 2: op = Ostoreh; break;
case 4:
case 8: op = store[*c->cls]; break;
default: die("unreachable");
}
emit(op, 0, R, i->arg[0], r);
} else {
assert(i->op == Oargc);
emit(Oblit1, 0, R, INT(c->size), R);
emit(Oblit0, 0, R, i->arg[1], r);
}
emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
off += c->size;
}
if (stk)
emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
for (i=i0, c=ca; i<i1; i++, c++)
if (c->class & Cptr) {
emit(Oblit1, 0, R, INT(c->t->size), R);
emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
}
}
static Params
selpar(Fn *fn, Ins *i0, Ins *i1)
{
Class *ca, *c, cr;
Insl *il;
Ins *i;
int op, n, cty;
uint off;
Ref r, tmp[16], *t;
ca = alloc((i1-i0) * sizeof ca[0]);
curi = &insb[NIns];
cty = argsclass(i0, i1, ca);
fn->reg = arm64_argregs(CALL(cty), 0);
il = 0;
t = tmp;
for (i=i0, c=ca; i<i1; i++, c++) {
if (i->op != Oparc || (c->class & (Cptr|Cstk)))
continue;
sttmps(t, c->cls, c->nreg, i->to, fn);
stkblob(i->to, c, fn, &il);
t += c->nreg;
}
for (; il; il=il->link)
emiti(il->i);
if (fn->retty >= 0) {
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
if (cr.class & Cptr) {
fn->retr = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, fn->retr, TMP(R8), R);
fn->reg |= BIT(R8);
}
}
t = tmp;
off = 0;
for (i=i0, c=ca; i<i1; i++, c++)
if (i->op == Oparc && !(c->class & Cptr)) {
if (c->class & Cstk) {
off = align(off, c->align);
fn->tmp[i->to.val].slot = -(off+2);
off += c->size;
} else
for (n=0; n<c->nreg; n++) {
r = TMP(c->reg[n]);
emit(Ocopy, c->cls[n], *t++, r, R);
}
} else if (c->class & Cstk) {
off = align(off, c->align);
if (isparbh(i->op))
op = Oloadsb + (i->op - Oparsb);
else
op = Oload;
emit(op, *c->cls, i->to, SLOT(-(off+2)), R);
off += c->size;
} else {
emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
}
return (Params){
.stk = align(off, 8),
.ngp = (cty >> 5) & 15,
.nfp = (cty >> 9) & 15
};
}
static Blk *
split(Fn *fn, Blk *b)
{
Blk *bn;
++fn->nblk;
bn = newblk();
idup(bn, curi, &insb[NIns]-curi);
curi = &insb[NIns];
bn->visit = ++b->visit;
strf(bn->name, "%s.%d", b->name, b->visit);
bn->loop = b->loop;
bn->link = b->link;
b->link = bn;
return bn;
}
static void
chpred(Blk *b, Blk *bp, Blk *bp1)
{
Phi *p;
uint a;
for (p=b->phi; p; p=p->link) {
for (a=0; p->blk[a]!=bp; a++)
assert(a+1<p->narg);
p->blk[a] = bp1;
}
}
static void
apple_selvaarg(Fn *fn, Blk *b, Ins *i)
{
Ref ap, stk, stk8, c8;
(void)b;
c8 = getcon(8, fn);
ap = i->arg[0];
stk8 = newtmp("abi", Kl, fn);
stk = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, stk8, ap);
emit(Oadd, Kl, stk8, stk, c8);
emit(Oload, i->cls, i->to, stk, R);
emit(Oload, Kl, stk, ap, R);
}
static void
arm64_selvaarg(Fn *fn, Blk *b, Ins *i)
{
Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
Blk *b0, *bstk, *breg;
int isgp;
c8 = getcon(8, fn);
c16 = getcon(16, fn);
c24 = getcon(24, fn);
c28 = getcon(28, fn);
ap = i->arg[0];
isgp = KBASE(i->cls) == 0;
/* @b [...]
r0 =l add ap, (24 or 28)
nr =l loadsw r0
r1 =w csltw nr, 0
jnz r1, @breg, @bstk
@breg
r0 =l add ap, (8 or 16)
r1 =l loadl r0
lreg =l add r1, nr
r0 =w add nr, (8 or 16)
r1 =l add ap, (24 or 28)
storew r0, r1
@bstk
lstk =l loadl ap
r0 =l add lstk, 8
storel r0, ap
@b0
%loc =l phi @breg %lreg, @bstk %lstk
i->to =(i->cls) load %loc
*/
loc = newtmp("abi", Kl, fn);
emit(Oload, i->cls, i->to, loc, R);
b0 = split(fn, b);
b0->jmp = b->jmp;
b0->s1 = b->s1;
b0->s2 = b->s2;
if (b->s1)
chpred(b->s1, b, b0);
if (b->s2 && b->s2 != b->s1)
chpred(b->s2, b, b0);
lreg = newtmp("abi", Kl, fn);
nr = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kw, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, r0, r1);
emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Oadd, Kl, lreg, r1, nr);
emit(Oload, Kl, r1, r0, R);
emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
breg = split(fn, b);
breg->jmp.type = Jjmp;
breg->s1 = b0;
lstk = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r0, ap);
emit(Oadd, Kl, r0, lstk, c8);
emit(Oload, Kl, lstk, ap, R);
bstk = split(fn, b);
bstk->jmp.type = Jjmp;
bstk->s1 = b0;
b0->phi = alloc(sizeof *b0->phi);
*b0->phi = (Phi){
.cls = Kl, .to = loc,
.narg = 2,
.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
};
b0->phi->blk[0] = bstk;
b0->phi->blk[1] = breg;
b0->phi->arg[0] = lstk;
b0->phi->arg[1] = lreg;
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kw, fn);
b->jmp.type = Jjnz;
b->jmp.arg = r1;
b->s1 = breg;
b->s2 = bstk;
emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
emit(Oloadsw, Kl, nr, r0, R);
emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
}
static void
apple_selvastart(Fn *fn, Params p, Ref ap)
{
Ref off, stk, arg;
off = getcon(p.stk, fn);
stk = newtmp("abi", Kl, fn);
arg = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, arg, ap);
emit(Oadd, Kl, arg, stk, off);
emit(Oaddr, Kl, stk, SLOT(-1), R);
}
static void
arm64_selvastart(Fn *fn, Params p, Ref ap)
{
Ref r0, r1, rsave;
rsave = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r0, ap);
emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn));
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, rsave, getcon(64, fn));
emit(Oadd, Kl, r0, ap, getcon(8, fn));
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, rsave, getcon(192, fn));
emit(Oaddr, Kl, rsave, SLOT(-1), R);
emit(Oadd, Kl, r0, ap, getcon(16, fn));
r0 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
emit(Oadd, Kl, r0, ap, getcon(24, fn));
r0 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
emit(Oadd, Kl, r0, ap, getcon(28, fn));
}
void
arm64_abi(Fn *fn)
{
Blk *b;
Ins *i, *i0;
Insl *il;
int n0, n1, ioff;
Params p;
for (b=fn->start; b; b=b->link)
b->visit = 0;
/* lower parameters */
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
if (!ispar(i->op))
break;
p = selpar(fn, b->ins, i);
n0 = &insb[NIns] - curi;
ioff = i - b->ins;
n1 = b->nins - ioff;
vgrow(&b->ins, n0+n1);
icpy(b->ins+n0, b->ins+ioff, n1);
icpy(b->ins, curi, n0);
b->nins = n0+n1;
/* lower calls, returns, and vararg instructions */
il = 0;
b = fn->start;
do {
if (!(b = b->link))
b = fn->start; /* do it last */
if (b->visit)
continue;
curi = &insb[NIns];
selret(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
switch ((--i)->op) {
default:
emiti(*i);
break;
case Ocall:
for (i0=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
selcall(fn, i0, i, &il);
i = i0;
break;
case Ovastart:
if (T.apple)
apple_selvastart(fn, p, i->arg[0]);
else
arm64_selvastart(fn, p, i->arg[0]);
break;
case Ovaarg:
if (T.apple)
apple_selvaarg(fn, b, i);
else
arm64_selvaarg(fn, b, i);
break;
case Oarg:
case Oargc:
die("unreachable");
}
if (b == fn->start)
for (; il; il=il->link)
emiti(il->i);
idup(b, curi, &insb[NIns]-curi);
} while (b != fn->start);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(fn, stderr);
}
}
/* abi0 for apple target; introduces
* necessary sign extensions in calls
* and returns
*/
void
apple_extsb(Fn *fn)
{
Blk *b;
Ins *i0, *i1, *i;
int j, op;
Ref r;
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
j = b->jmp.type;
if (isretbh(j)) {
r = newtmp("abi", Kw, fn);
op = Oextsb + (j - Jretsb);
emit(op, Kw, r, b->jmp.arg, R);
b->jmp.arg = r;
b->jmp.type = Jretw;
}
for (i=&b->ins[b->nins]; i>b->ins;) {
emiti(*--i);
if (i->op != Ocall)
continue;
for (i0=i1=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
for (i=i1; i>i0;) {
emiti(*--i);
if (isargbh(i->op)) {
i->to = newtmp("abi", Kl, fn);
curi->arg[0] = i->to;
}
}
for (i=i1; i>i0;)
if (isargbh((--i)->op)) {
op = Oextsb + (i->op - Oargsb);
emit(op, Kw, i->to, i->arg[0], R);
}
}
idup(b, curi, &insb[NIns]-curi);
}
if (debug['A']) {
fprintf(stderr, "\n> After Apple pre-ABI:\n");
printfn(fn, stderr);
}
}

38
src/qbe/arm64/all.h Normal file
View File

@@ -0,0 +1,38 @@
#include "../all.h"
enum Arm64Reg {
R0 = RXX + 1,
R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
IP0, IP1, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27, R28, FP, LR, SP,
V0, V1, V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13, V14, V15,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, /* V31, */
NFPR = V30 - V0 + 1,
NGPR = SP - R0 + 1,
NGPS = R18 - R0 + 1 /* LR */ + 1,
NFPS = (V7 - V0 + 1) + (V30 - V16 + 1),
NCLR = (R28 - R19 + 1) + (V15 - V8 + 1),
};
MAKESURE(reg_not_tmp, V30 < (int)Tmp0);
/* targ.c */
extern int arm64_rsave[];
extern int arm64_rclob[];
/* abi.c */
bits arm64_retregs(Ref, int[2]);
bits arm64_argregs(Ref, int[2]);
void arm64_abi(Fn *);
void apple_extsb(Fn *);
/* isel.c */
int arm64_logimm(uint64_t, int);
void arm64_isel(Fn *);
/* emit.c */
void arm64_emitfn(Fn *, FILE *);

679
src/qbe/arm64/emit.c Normal file
View File

@@ -0,0 +1,679 @@
#include "all.h"
typedef struct E E;
struct E {
FILE *f;
Fn *fn;
uint64_t frame;
uint padding;
};
#define CMP(X) \
X(Cieq, "eq") \
X(Cine, "ne") \
X(Cisge, "ge") \
X(Cisgt, "gt") \
X(Cisle, "le") \
X(Cislt, "lt") \
X(Ciuge, "cs") \
X(Ciugt, "hi") \
X(Ciule, "ls") \
X(Ciult, "cc") \
X(NCmpI+Cfeq, "eq") \
X(NCmpI+Cfge, "ge") \
X(NCmpI+Cfgt, "gt") \
X(NCmpI+Cfle, "ls") \
X(NCmpI+Cflt, "mi") \
X(NCmpI+Cfne, "ne") \
X(NCmpI+Cfo, "vc") \
X(NCmpI+Cfuo, "vs")
enum {
Ki = -1, /* matches Kw and Kl */
Ka = -2, /* matches all classes */
};
static struct {
short op;
short cls;
char *fmt;
} omap[] = {
{ Oadd, Ki, "add %=, %0, %1" },
{ Oadd, Ka, "fadd %=, %0, %1" },
{ Osub, Ki, "sub %=, %0, %1" },
{ Osub, Ka, "fsub %=, %0, %1" },
{ Oneg, Ki, "neg %=, %0" },
{ Oneg, Ka, "fneg %=, %0" },
{ Oand, Ki, "and %=, %0, %1" },
{ Oor, Ki, "orr %=, %0, %1" },
{ Oxor, Ki, "eor %=, %0, %1" },
{ Osar, Ki, "asr %=, %0, %1" },
{ Oshr, Ki, "lsr %=, %0, %1" },
{ Oshl, Ki, "lsl %=, %0, %1" },
{ Omul, Ki, "mul %=, %0, %1" },
{ Omul, Ka, "fmul %=, %0, %1" },
{ Odiv, Ki, "sdiv %=, %0, %1" },
{ Odiv, Ka, "fdiv %=, %0, %1" },
{ Oudiv, Ki, "udiv %=, %0, %1" },
{ Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
{ Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
{ Ocopy, Ki, "mov %=, %0" },
{ Ocopy, Ka, "fmov %=, %0" },
{ Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
{ Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
{ Ostoreb, Kw, "strb %W0, %M1" },
{ Ostoreh, Kw, "strh %W0, %M1" },
{ Ostorew, Kw, "str %W0, %M1" },
{ Ostorel, Kw, "str %L0, %M1" },
{ Ostores, Kw, "str %S0, %M1" },
{ Ostored, Kw, "str %D0, %M1" },
{ Oloadsb, Ki, "ldrsb %=, %M0" },
{ Oloadub, Ki, "ldrb %W=, %M0" },
{ Oloadsh, Ki, "ldrsh %=, %M0" },
{ Oloaduh, Ki, "ldrh %W=, %M0" },
{ Oloadsw, Kw, "ldr %=, %M0" },
{ Oloadsw, Kl, "ldrsw %=, %M0" },
{ Oloaduw, Ki, "ldr %W=, %M0" },
{ Oload, Ka, "ldr %=, %M0" },
{ Oextsb, Ki, "sxtb %=, %W0" },
{ Oextub, Ki, "uxtb %W=, %W0" },
{ Oextsh, Ki, "sxth %=, %W0" },
{ Oextuh, Ki, "uxth %W=, %W0" },
{ Oextsw, Ki, "sxtw %L=, %W0" },
{ Oextuw, Ki, "mov %W=, %W0" },
{ Oexts, Kd, "fcvt %=, %S0" },
{ Otruncd, Ks, "fcvt %=, %D0" },
{ Ocast, Kw, "fmov %=, %S0" },
{ Ocast, Kl, "fmov %=, %D0" },
{ Ocast, Ks, "fmov %=, %W0" },
{ Ocast, Kd, "fmov %=, %L0" },
{ Ostosi, Ka, "fcvtzs %=, %S0" },
{ Ostoui, Ka, "fcvtzu %=, %S0" },
{ Odtosi, Ka, "fcvtzs %=, %D0" },
{ Odtoui, Ka, "fcvtzu %=, %D0" },
{ Oswtof, Ka, "scvtf %=, %W0" },
{ Ouwtof, Ka, "ucvtf %=, %W0" },
{ Osltof, Ka, "scvtf %=, %L0" },
{ Oultof, Ka, "ucvtf %=, %L0" },
{ Ocall, Kw, "blr %L0" },
{ Oacmp, Ki, "cmp %0, %1" },
{ Oacmn, Ki, "cmn %0, %1" },
{ Oafcmp, Ka, "fcmpe %0, %1" },
#define X(c, str) \
{ Oflag+c, Ki, "cset %=, " str },
CMP(X)
#undef X
{ NOp, 0, 0 }
};
enum {
V31 = 0x1fffffff, /* local name for V31 */
};
static char *
rname(int r, int k)
{
static char buf[4];
if (r == SP) {
assert(k == Kl);
sprintf(buf, "sp");
}
else if (R0 <= r && r <= LR)
switch (k) {
default: die("invalid class");
case Kw: sprintf(buf, "w%d", r-R0); break;
case Kx:
case Kl: sprintf(buf, "x%d", r-R0); break;
}
else if (V0 <= r && r <= V30)
switch (k) {
default: die("invalid class");
case Ks: sprintf(buf, "s%d", r-V0); break;
case Kx:
case Kd: sprintf(buf, "d%d", r-V0); break;
}
else if (r == V31)
switch (k) {
default: die("invalid class");
case Ks: sprintf(buf, "s31"); break;
case Kd: sprintf(buf, "d31"); break;
}
else
die("invalid register");
return buf;
}
static uint64_t
slot(Ref r, E *e)
{
int s;
s = rsval(r);
if (s == -1)
return 16 + e->frame;
if (s < 0) {
if (e->fn->vararg && !T.apple)
return 16 + e->frame + 192 - (s+2);
else
return 16 + e->frame - (s+2);
} else
return 16 + e->padding + 4 * s;
}
static void
emitf(char *s, Ins *i, E *e)
{
Ref r;
int k, c;
Con *pc;
uint64_t n;
uint sp;
fputc('\t', e->f);
sp = 0;
for (;;) {
k = i->cls;
while ((c = *s++) != '%')
if (c == ' ' && !sp) {
fputc('\t', e->f);
sp = 1;
} else if (!c) {
fputc('\n', e->f);
return;
} else
fputc(c, e->f);
Switch:
switch ((c = *s++)) {
default:
die("invalid escape");
case 'W':
k = Kw;
goto Switch;
case 'L':
k = Kl;
goto Switch;
case 'S':
k = Ks;
goto Switch;
case 'D':
k = Kd;
goto Switch;
case '?':
if (KBASE(k) == 0)
fputs(rname(IP1, k), e->f);
else
fputs(rname(V31, k), e->f);
break;
case '=':
case '0':
r = c == '=' ? i->to : i->arg[0];
assert(isreg(r) || req(r, TMP(V31)));
fputs(rname(r.val, k), e->f);
break;
case '1':
r = i->arg[1];
switch (rtype(r)) {
default:
die("invalid second argument");
case RTmp:
assert(isreg(r));
fputs(rname(r.val, k), e->f);
break;
case RCon:
pc = &e->fn->con[r.val];
n = pc->bits.i;
assert(pc->type == CBits);
if (n >> 24) {
assert(arm64_logimm(n, k));
fprintf(e->f, "#%"PRIu64, n);
} else if (n & 0xfff000) {
assert(!(n & ~0xfff000ull));
fprintf(e->f, "#%"PRIu64", lsl #12",
n>>12);
} else {
assert(!(n & ~0xfffull));
fprintf(e->f, "#%"PRIu64, n);
}
break;
}
break;
case 'M':
c = *s++;
assert(c == '0' || c == '1' || c == '=');
r = c == '=' ? i->to : i->arg[c - '0'];
switch (rtype(r)) {
default:
die("todo (arm emit): unhandled ref");
case RTmp:
assert(isreg(r));
fprintf(e->f, "[%s]", rname(r.val, Kl));
break;
case RSlot:
fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e));
break;
}
break;
}
}
}
static void
loadaddr(Con *c, char *rn, E *e)
{
char *p, *l, *s;
switch (c->sym.type) {
default:
die("unreachable");
case SGlo:
if (T.apple)
s = "\tadrp\tR, S@pageO\n"
"\tadd\tR, R, S@pageoffO\n";
else
s = "\tadrp\tR, SO\n"
"\tadd\tR, R, #:lo12:SO\n";
break;
case SThr:
if (T.apple)
s = "\tadrp\tR, S@tlvppage\n"
"\tldr\tR, [R, S@tlvppageoff]\n";
else
s = "\tmrs\tR, tpidr_el0\n"
"\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n"
"\tadd\tR, R, #:tprel_lo12_nc:SO\n";
break;
}
l = str(c->sym.id);
p = l[0] == '"' ? "" : T.assym;
for (; *s; s++)
switch (*s) {
default:
fputc(*s, e->f);
break;
case 'R':
fputs(rn, e->f);
break;
case 'S':
fputs(p, e->f);
fputs(l, e->f);
break;
case 'O':
if (c->bits.i)
/* todo, handle large offsets */
fprintf(e->f, "+%"PRIi64, c->bits.i);
break;
}
}
static void
loadcon(Con *c, int r, int k, E *e)
{
char *rn;
int64_t n;
int w, sh;
w = KWIDE(k);
rn = rname(r, k);
n = c->bits.i;
if (c->type == CAddr) {
rn = rname(r, Kl);
loadaddr(c, rn, e);
return;
}
assert(c->type == CBits);
if (!w)
n = (int32_t)n;
if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
} else {
fprintf(e->f, "\tmov\t%s, #%d\n",
rn, (int)(n & 0xffff));
for (sh=16; n>>=16; sh+=16) {
if ((!w && sh == 32) || sh == 64)
break;
fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
rn, (uint)(n & 0xffff), sh);
}
}
}
static void emitins(Ins *, E *);
static int
fixarg(Ref *pr, int sz, int t, E *e)
{
Ins *i;
Ref r;
uint64_t s;
r = *pr;
if (rtype(r) == RSlot) {
s = slot(r, e);
if (s > sz * 4095u) {
if (t < 0)
return 1;
i = &(Ins){Oaddr, Kl, TMP(t), {r}};
emitins(i, e);
*pr = TMP(t);
}
}
return 0;
}
static void
emitins(Ins *i, E *e)
{
char *l, *p, *rn;
uint64_t s;
int o, t;
Ref r;
Con *c;
switch (i->op) {
default:
if (isload(i->op))
fixarg(&i->arg[0], loadsz(i), IP1, e);
if (isstore(i->op)) {
t = T.apple ? -1 : R18;
if (fixarg(&i->arg[1], storesz(i), t, e)) {
if (req(i->arg[0], TMP(IP1))) {
fprintf(e->f,
"\tfmov\t%c31, %c17\n",
"ds"[i->cls == Kw],
"xw"[i->cls == Kw]);
i->arg[0] = TMP(V31);
i->op = Ostores + (i->cls-Kw);
}
fixarg(&i->arg[1], storesz(i), IP1, e);
}
}
Table:
/* most instructions are just pulled out of
* the table omap[], some special cases are
* detailed below */
for (o=0;; o++) {
/* this linear search should really be a binary
* search */
if (omap[o].op == NOp)
die("no match for %s(%c)",
optab[i->op].name, "wlsd"[i->cls]);
if (omap[o].op == i->op)
if (omap[o].cls == i->cls || omap[o].cls == Ka
|| (omap[o].cls == Ki && KBASE(i->cls) == 0))
break;
}
emitf(omap[o].fmt, i, e);
break;
case Onop:
break;
case Ocopy:
if (req(i->to, i->arg[0]))
break;
if (rtype(i->to) == RSlot) {
r = i->to;
if (!isreg(i->arg[0])) {
i->to = TMP(IP1);
emitins(i, e);
i->arg[0] = i->to;
}
i->op = Ostorew + i->cls;
i->cls = Kw;
i->arg[1] = r;
emitins(i, e);
break;
}
assert(isreg(i->to));
switch (rtype(i->arg[0])) {
case RCon:
c = &e->fn->con[i->arg[0].val];
loadcon(c, i->to.val, i->cls, e);
break;
case RSlot:
i->op = Oload;
emitins(i, e);
break;
default:
assert(i->to.val != IP1);
goto Table;
}
break;
case Oaddr:
assert(rtype(i->arg[0]) == RSlot);
rn = rname(i->to.val, Kl);
s = slot(i->arg[0], e);
if (s <= 4095)
fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s);
else if (s <= 65535)
fprintf(e->f,
"\tmov\t%s, #%"PRIu64"\n"
"\tadd\t%s, x29, %s\n",
rn, s, rn, rn
);
else
fprintf(e->f,
"\tmov\t%s, #%"PRIu64"\n"
"\tmovk\t%s, #%"PRIu64", lsl #16\n"
"\tadd\t%s, x29, %s\n",
rn, s & 0xFFFF, rn, s >> 16, rn, rn
);
break;
case Ocall:
if (rtype(i->arg[0]) != RCon)
goto Table;
c = &e->fn->con[i->arg[0].val];
if (c->type != CAddr
|| c->sym.type != SGlo
|| c->bits.i)
die("invalid call argument");
l = str(c->sym.id);
p = l[0] == '"' ? "" : T.assym;
fprintf(e->f, "\tbl\t%s%s\n", p, l);
break;
case Osalloc:
emitf("sub sp, sp, %0", i, e);
if (!req(i->to, R))
emitf("mov %=, sp", i, e);
break;
case Odbgloc:
emitdbgloc(i->arg[0].val, i->arg[1].val, e->f);
break;
}
}
static void
framelayout(E *e)
{
int *r;
uint o;
uint64_t f;
for (o=0, r=arm64_rclob; *r>=0; r++)
o += 1 & (e->fn->reg >> *r);
f = e->fn->slot;
f = (f + 3) & -4;
o += o & 1;
e->padding = 4*(f-e->fn->slot);
e->frame = 4*f + 8*o;
}
/*
Stack-frame layout:
+=============+
| varargs |
| save area |
+-------------+
| callee-save | ^
| registers | |
+-------------+ |
| ... | |
| spill slots | |
| ... | | e->frame
+-------------+ |
| ... | |
| locals | |
| ... | |
+-------------+ |
| e->padding | v
+-------------+
| saved x29 |
| saved x30 |
+=============+ <- x29
*/
void
arm64_emitfn(Fn *fn, FILE *out)
{
static char *ctoa[] = {
#define X(c, s) [c] = s,
CMP(X)
#undef X
};
static int id0;
int s, n, c, lbl, *r;
uint64_t o;
Blk *b, *t;
Ins *i;
E *e;
e = &(E){.f = out, .fn = fn};
if (T.apple)
e->fn->lnk.align = 4;
emitfnlnk(e->fn->name, &e->fn->lnk, e->f);
fputs("\thint\t#34\n", e->f);
framelayout(e);
if (e->fn->vararg && !T.apple) {
for (n=7; n>=0; n--)
fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
for (n=7; n>=0; n-=2)
fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n);
}
if (e->frame + 16 <= 512)
fprintf(e->f,
"\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
e->frame + 16
);
else if (e->frame <= 4095)
fprintf(e->f,
"\tsub\tsp, sp, #%"PRIu64"\n"
"\tstp\tx29, x30, [sp, -16]!\n",
e->frame
);
else if (e->frame <= 65535)
fprintf(e->f,
"\tmov\tx16, #%"PRIu64"\n"
"\tsub\tsp, sp, x16\n"
"\tstp\tx29, x30, [sp, -16]!\n",
e->frame
);
else
fprintf(e->f,
"\tmov\tx16, #%"PRIu64"\n"
"\tmovk\tx16, #%"PRIu64", lsl #16\n"
"\tsub\tsp, sp, x16\n"
"\tstp\tx29, x30, [sp, -16]!\n",
e->frame & 0xFFFF, e->frame >> 16
);
fputs("\tmov\tx29, sp\n", e->f);
s = (e->frame - e->padding) / 4;
for (r=arm64_rclob; *r>=0; r++)
if (e->fn->reg & BIT(*r)) {
s -= 2;
i = &(Ins){.arg = {TMP(*r), SLOT(s)}};
i->op = *r >= V0 ? Ostored : Ostorel;
emitins(i, e);
}
for (lbl=0, b=e->fn->start; b; b=b->link) {
if (lbl || b->npred > 1)
fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id);
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(i, e);
lbl = 1;
switch (b->jmp.type) {
case Jhlt:
fprintf(e->f, "\tbrk\t#1000\n");
break;
case Jret0:
s = (e->frame - e->padding) / 4;
for (r=arm64_rclob; *r>=0; r++)
if (e->fn->reg & BIT(*r)) {
s -= 2;
i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}};
i->cls = *r >= V0 ? Kd : Kl;
emitins(i, e);
}
if (e->fn->dynalloc)
fputs("\tmov sp, x29\n", e->f);
o = e->frame + 16;
if (e->fn->vararg && !T.apple)
o += 192;
if (o <= 504)
fprintf(e->f,
"\tldp\tx29, x30, [sp], %"PRIu64"\n",
o
);
else if (o - 16 <= 4095)
fprintf(e->f,
"\tldp\tx29, x30, [sp], 16\n"
"\tadd\tsp, sp, #%"PRIu64"\n",
o - 16
);
else if (o - 16 <= 65535)
fprintf(e->f,
"\tldp\tx29, x30, [sp], 16\n"
"\tmov\tx16, #%"PRIu64"\n"
"\tadd\tsp, sp, x16\n",
o - 16
);
else
fprintf(e->f,
"\tldp\tx29, x30, [sp], 16\n"
"\tmov\tx16, #%"PRIu64"\n"
"\tmovk\tx16, #%"PRIu64", lsl #16\n"
"\tadd\tsp, sp, x16\n",
(o - 16) & 0xFFFF, (o - 16) >> 16
);
fprintf(e->f, "\tret\n");
break;
case Jjmp:
Jmp:
if (b->s1 != b->link)
fprintf(e->f,
"\tb\t%s%d\n",
T.asloc, id0+b->s1->id
);
else
lbl = 0;
break;
default:
c = b->jmp.type - Jjf;
if (c < 0 || c > NCmp)
die("unhandled jump %d", b->jmp.type);
if (b->link == b->s2) {
t = b->s1;
b->s1 = b->s2;
b->s2 = t;
} else
c = cmpneg(c);
fprintf(e->f,
"\tb%s\t%s%d\n",
ctoa[c], T.asloc, id0+b->s2->id
);
goto Jmp;
}
}
id0 += e->fn->nblk;
if (!T.apple)
elf_emitfnfin(fn->name, out);
}

316
src/qbe/arm64/isel.c Normal file
View File

@@ -0,0 +1,316 @@
#include "all.h"
enum Imm {
Iother,
Iplo12,
Iphi12,
Iplo24,
Inlo12,
Inhi12,
Inlo24
};
static enum Imm
imm(Con *c, int k, int64_t *pn)
{
int64_t n;
int i;
if (c->type != CBits)
return Iother;
n = c->bits.i;
if (k == Kw)
n = (int32_t)n;
i = Iplo12;
if (n < 0) {
i = Inlo12;
n = -(uint64_t)n;
}
*pn = n;
if ((n & 0x000fff) == n)
return i;
if ((n & 0xfff000) == n)
return i + 1;
if ((n & 0xffffff) == n)
return i + 2;
return Iother;
}
int
arm64_logimm(uint64_t x, int k)
{
uint64_t n;
if (k == Kw)
x = (x & 0xffffffff) | x << 32;
if (x & 1)
x = ~x;
if (x == 0)
return 0;
if (x == 0xaaaaaaaaaaaaaaaa)
return 1;
n = x & 0xf;
if (0x1111111111111111 * n == x)
goto Check;
n = x & 0xff;
if (0x0101010101010101 * n == x)
goto Check;
n = x & 0xffff;
if (0x0001000100010001 * n == x)
goto Check;
n = x & 0xffffffff;
if (0x0000000100000001 * n == x)
goto Check;
n = x;
Check:
return (n & (n + (n & -n))) == 0;
}
static void
fixarg(Ref *pr, int k, int phi, Fn *fn)
{
char buf[32];
Con *c, cc;
Ref r0, r1, r2, r3;
int s, n;
r0 = *pr;
switch (rtype(r0)) {
case RCon:
c = &fn->con[r0.val];
if (T.apple
&& c->type == CAddr
&& c->sym.type == SThr) {
r1 = newtmp("isel", Kl, fn);
*pr = r1;
if (c->bits.i) {
r2 = newtmp("isel", Kl, fn);
cc = (Con){.type = CBits};
cc.bits.i = c->bits.i;
r3 = newcon(&cc, fn);
emit(Oadd, Kl, r1, r2, r3);
r1 = r2;
}
emit(Ocopy, Kl, r1, TMP(R0), R);
r1 = newtmp("isel", Kl, fn);
r2 = newtmp("isel", Kl, fn);
emit(Ocall, 0, R, r1, CALL(33));
emit(Ocopy, Kl, TMP(R0), r2, R);
emit(Oload, Kl, r1, r2, R);
cc = *c;
cc.bits.i = 0;
r3 = newcon(&cc, fn);
emit(Ocopy, Kl, r2, r3, R);
break;
}
if (KBASE(k) == 0 && phi)
return;
r1 = newtmp("isel", k, fn);
if (KBASE(k) == 0) {
emit(Ocopy, k, r1, r0, R);
} else {
n = stashbits(c->bits.i, KWIDE(k) ? 8 : 4);
vgrow(&fn->con, ++fn->ncon);
c = &fn->con[fn->ncon-1];
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
*c = (Con){.type = CAddr};
c->sym.id = intern(buf);
r2 = newtmp("isel", Kl, fn);
emit(Oload, k, r1, r2, R);
emit(Ocopy, Kl, r2, CON(c-fn->con), R);
}
*pr = r1;
break;
case RTmp:
s = fn->tmp[r0.val].slot;
if (s == -1)
break;
r1 = newtmp("isel", Kl, fn);
emit(Oaddr, Kl, r1, SLOT(s), R);
*pr = r1;
break;
}
}
static int
selcmp(Ref arg[2], int k, Fn *fn)
{
Ref r, *iarg;
Con *c;
int swap, cmp, fix;
int64_t n;
if (KBASE(k) == 1) {
emit(Oafcmp, k, R, arg[0], arg[1]);
iarg = curi->arg;
fixarg(&iarg[0], k, 0, fn);
fixarg(&iarg[1], k, 0, fn);
return 0;
}
swap = rtype(arg[0]) == RCon;
if (swap) {
r = arg[1];
arg[1] = arg[0];
arg[0] = r;
}
fix = 1;
cmp = Oacmp;
r = arg[1];
if (rtype(r) == RCon) {
c = &fn->con[r.val];
switch (imm(c, k, &n)) {
default:
break;
case Iplo12:
case Iphi12:
fix = 0;
break;
case Inlo12:
case Inhi12:
cmp = Oacmn;
r = getcon(n, fn);
fix = 0;
break;
}
}
emit(cmp, k, R, arg[0], r);
iarg = curi->arg;
fixarg(&iarg[0], k, 0, fn);
if (fix)
fixarg(&iarg[1], k, 0, fn);
return swap;
}
static int
callable(Ref r, Fn *fn)
{
Con *c;
if (rtype(r) == RTmp)
return 1;
if (rtype(r) == RCon) {
c = &fn->con[r.val];
if (c->type == CAddr)
if (c->bits.i == 0)
return 1;
}
return 0;
}
static void
sel(Ins i, Fn *fn)
{
Ref *iarg;
Ins *i0;
int ck, cc;
if (INRANGE(i.op, Oalloc, Oalloc1)) {
i0 = curi - 1;
salloc(i.to, i.arg[0], fn);
fixarg(&i0->arg[0], Kl, 0, fn);
return;
}
if (iscmp(i.op, &ck, &cc)) {
emit(Oflag, i.cls, i.to, R, R);
i0 = curi;
if (selcmp(i.arg, ck, fn))
i0->op += cmpop(cc);
else
i0->op += cc;
return;
}
if (i.op == Ocall)
if (callable(i.arg[0], fn)) {
emiti(i);
return;
}
if (i.op != Onop) {
emiti(i);
iarg = curi->arg; /* fixarg() can change curi */
fixarg(&iarg[0], argcls(&i, 0), 0, fn);
fixarg(&iarg[1], argcls(&i, 1), 0, fn);
}
}
static void
seljmp(Blk *b, Fn *fn)
{
Ref r;
Ins *i, *ir;
int ck, cc, use;
if (b->jmp.type == Jret0
|| b->jmp.type == Jjmp
|| b->jmp.type == Jhlt)
return;
assert(b->jmp.type == Jjnz);
r = b->jmp.arg;
use = -1;
b->jmp.arg = R;
ir = 0;
i = &b->ins[b->nins];
while (i > b->ins)
if (req((--i)->to, r)) {
use = fn->tmp[r.val].nuse;
ir = i;
break;
}
if (ir && use == 1
&& iscmp(ir->op, &ck, &cc)) {
if (selcmp(ir->arg, ck, fn))
cc = cmpop(cc);
b->jmp.type = Jjf + cc;
*ir = (Ins){.op = Onop};
}
else {
selcmp((Ref[]){r, CON_Z}, Kw, fn);
b->jmp.type = Jjfine;
}
}
void
arm64_isel(Fn *fn)
{
Blk *b, **sb;
Ins *i;
Phi *p;
uint n, al;
int64_t sz;
/* assign slots to fast allocs */
b = fn->start;
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op == al) {
if (rtype(i->arg[0]) != RCon)
break;
sz = fn->con[i->arg[0].val].bits.i;
if (sz < 0 || sz >= INT_MAX-15)
err("invalid alloc size %"PRId64, sz);
sz = (sz + n-1) & -n;
sz /= 4;
fn->tmp[i->to.val].slot = fn->slot;
fn->slot += sz;
*i = (Ins){.op = Onop};
}
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
for (p=(*sb)->phi; p; p=p->link) {
for (n=0; p->blk[n] != b; n++)
assert(n+1 < p->narg);
fixarg(&p->arg[n], p->cls, 1, fn);
}
seljmp(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
sel(*--i, fn);
idup(b, curi, &insb[NIns]-curi);
}
if (debug['I']) {
fprintf(stderr, "\n> After instruction selection:\n");
printfn(fn, stderr);
}
}

69
src/qbe/arm64/targ.c Normal file
View File

@@ -0,0 +1,69 @@
#include "all.h"
int arm64_rsave[] = {
R0, R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
IP0, IP1, R18, LR,
V0, V1, V2, V3, V4, V5, V6, V7,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30,
-1
};
int arm64_rclob[] = {
R19, R20, R21, R22, R23, R24, R25, R26,
R27, R28,
V8, V9, V10, V11, V12, V13, V14, V15,
-1
};
#define RGLOB (BIT(FP) | BIT(SP) | BIT(IP1) | BIT(R18))
static int
arm64_memargs(int op)
{
(void)op;
return 0;
}
#define ARM64_COMMON \
.gpr0 = R0, \
.ngpr = NGPR, \
.fpr0 = V0, \
.nfpr = NFPR, \
.rglob = RGLOB, \
.nrglob = 4, \
.rsave = arm64_rsave, \
.nrsave = {NGPS, NFPS}, \
.retregs = arm64_retregs, \
.argregs = arm64_argregs, \
.memargs = arm64_memargs, \
.isel = arm64_isel, \
.abi1 = arm64_abi, \
.emitfn = arm64_emitfn, \
.cansel = 0, \
Target T_arm64 = {
.name = "arm64",
.abi0 = elimsb,
.emitfin = elf_emitfin,
.asloc = ".L",
ARM64_COMMON
};
Target T_arm64_apple = {
.name = "arm64_apple",
.apple = 1,
.abi0 = apple_extsb,
.emitfin = macho_emitfin,
.asloc = "L",
.assym = "_",
ARM64_COMMON
};
MAKESURE(globals_are_not_arguments,
(RGLOB & (BIT(R8+1) - 1)) == 0
);
MAKESURE(arrays_size_ok,
sizeof arm64_rsave == (NGPS+NFPS+1) * sizeof(int) &&
sizeof arm64_rclob == (NCLR+1) * sizeof(int)
);

567
src/qbe/cfg.c Normal file
View File

@@ -0,0 +1,567 @@
#include "all.h"
Blk *
newblk()
{
static Blk z;
Blk *b;
b = alloc(sizeof *b);
*b = z;
b->ins = vnew(0, sizeof b->ins[0], PFn);
b->pred = vnew(0, sizeof b->pred[0], PFn);
return b;
}
static void
fixphis(Fn *f)
{
Blk *b, *bp;
Phi *p;
uint n, n0;
for (b=f->start; b; b=b->link) {
assert(b->id < f->nblk);
for (p=b->phi; p; p=p->link) {
for (n=n0=0; n<p->narg; n++) {
bp = p->blk[n];
if (bp->id != -1u)
if (bp->s1 == b || bp->s2 == b) {
p->blk[n0] = bp;
p->arg[n0] = p->arg[n];
n0++;
}
}
assert(n0 > 0);
p->narg = n0;
}
}
}
static void
addpred(Blk *bp, Blk *b)
{
vgrow(&b->pred, ++b->npred);
b->pred[b->npred-1] = bp;
}
void
fillpreds(Fn *f)
{
Blk *b;
for (b=f->start; b; b=b->link)
b->npred = 0;
for (b=f->start; b; b=b->link) {
if (b->s1)
addpred(b, b->s1);
if (b->s2 && b->s2 != b->s1)
addpred(b, b->s2);
}
}
static void
porec(Blk *b, uint *npo)
{
Blk *s1, *s2;
if (!b || b->id != -1u)
return;
b->id = 0; /* marker */
s1 = b->s1;
s2 = b->s2;
if (s1 && s2 && s1->loop > s2->loop) {
s1 = b->s2;
s2 = b->s1;
}
porec(s1, npo);
porec(s2, npo);
b->id = (*npo)++;
}
static void
fillrpo(Fn *f)
{
Blk *b, **p;
for (b=f->start; b; b=b->link)
b->id = -1u;
f->nblk = 0;
porec(f->start, &f->nblk);
vgrow(&f->rpo, f->nblk);
for (p=&f->start; (b=*p);) {
if (b->id == -1u) {
*p = b->link;
} else {
b->id = f->nblk-b->id-1;
f->rpo[b->id] = b;
p = &b->link;
}
}
}
/* fill rpo, preds; prune dead blks */
void
fillcfg(Fn *f)
{
fillrpo(f);
fillpreds(f);
fixphis(f);
}
/* for dominators computation, read
* "A Simple, Fast Dominance Algorithm"
* by K. Cooper, T. Harvey, and K. Kennedy.
*/
static Blk *
inter(Blk *b1, Blk *b2)
{
Blk *bt;
if (b1 == 0)
return b2;
while (b1 != b2) {
if (b1->id < b2->id) {
bt = b1;
b1 = b2;
b2 = bt;
}
while (b1->id > b2->id) {
b1 = b1->idom;
assert(b1);
}
}
return b1;
}
void
filldom(Fn *fn)
{
Blk *b, *d;
int ch;
uint n, p;
for (b=fn->start; b; b=b->link) {
b->idom = 0;
b->dom = 0;
b->dlink = 0;
}
do {
ch = 0;
for (n=1; n<fn->nblk; n++) {
b = fn->rpo[n];
d = 0;
for (p=0; p<b->npred; p++)
if (b->pred[p]->idom
|| b->pred[p] == fn->start)
d = inter(d, b->pred[p]);
if (d != b->idom) {
ch++;
b->idom = d;
}
}
} while (ch);
for (b=fn->start; b; b=b->link)
if ((d=b->idom)) {
assert(d != b);
b->dlink = d->dom;
d->dom = b;
}
}
int
sdom(Blk *b1, Blk *b2)
{
assert(b1 && b2);
if (b1 == b2)
return 0;
while (b2->id > b1->id)
b2 = b2->idom;
return b1 == b2;
}
int
dom(Blk *b1, Blk *b2)
{
return b1 == b2 || sdom(b1, b2);
}
static void
addfron(Blk *a, Blk *b)
{
uint n;
for (n=0; n<a->nfron; n++)
if (a->fron[n] == b)
return;
if (!a->nfron)
a->fron = vnew(++a->nfron, sizeof a->fron[0], PFn);
else
vgrow(&a->fron, ++a->nfron);
a->fron[a->nfron-1] = b;
}
/* fill the dominance frontier */
void
fillfron(Fn *fn)
{
Blk *a, *b;
for (b=fn->start; b; b=b->link)
b->nfron = 0;
for (b=fn->start; b; b=b->link) {
if (b->s1)
for (a=b; !sdom(a, b->s1); a=a->idom)
addfron(a, b->s1);
if (b->s2)
for (a=b; !sdom(a, b->s2); a=a->idom)
addfron(a, b->s2);
}
}
static void
loopmark(Blk *hd, Blk *b, void f(Blk *, Blk *))
{
uint p;
if (b->id < hd->id || b->visit == hd->id)
return;
b->visit = hd->id;
f(hd, b);
for (p=0; p<b->npred; ++p)
loopmark(hd, b->pred[p], f);
}
void
loopiter(Fn *fn, void f(Blk *, Blk *))
{
uint n, p;
Blk *b;
for (b=fn->start; b; b=b->link)
b->visit = -1u;
for (n=0; n<fn->nblk; ++n) {
b = fn->rpo[n];
for (p=0; p<b->npred; ++p)
if (b->pred[p]->id >= n)
loopmark(b, b->pred[p], f);
}
}
/* dominator tree depth */
void
filldepth(Fn *fn)
{
Blk *b, *d;
int depth;
for (b=fn->start; b; b=b->link)
b->depth = -1;
fn->start->depth = 0;
for (b=fn->start; b; b=b->link) {
if (b->depth != -1)
continue;
depth = 1;
for (d=b->idom; d->depth==-1; d=d->idom)
depth++;
depth += d->depth;
b->depth = depth;
for (d=b->idom; d->depth==-1; d=d->idom)
d->depth = --depth;
}
}
/* least common ancestor in dom tree */
Blk *
lca(Blk *b1, Blk *b2)
{
if (!b1)
return b2;
if (!b2)
return b1;
while (b1->depth > b2->depth)
b1 = b1->idom;
while (b2->depth > b1->depth)
b2 = b2->idom;
while (b1 != b2) {
b1 = b1->idom;
b2 = b2->idom;
}
return b1;
}
void
multloop(Blk *hd, Blk *b)
{
(void)hd;
b->loop *= 10;
}
void
fillloop(Fn *fn)
{
Blk *b;
for (b=fn->start; b; b=b->link)
b->loop = 1;
loopiter(fn, multloop);
}
static void
uffind(Blk **pb, Blk **uf)
{
Blk **pb1;
pb1 = &uf[(*pb)->id];
if (*pb1) {
uffind(pb1, uf);
*pb = *pb1;
}
}
/* requires rpo and no phis, breaks cfg */
void
simpljmp(Fn *fn)
{
Blk **uf; /* union-find */
Blk **p, *b, *ret;
ret = newblk();
ret->id = fn->nblk++;
ret->jmp.type = Jret0;
uf = emalloc(fn->nblk * sizeof uf[0]);
for (b=fn->start; b; b=b->link) {
assert(!b->phi);
if (b->jmp.type == Jret0) {
b->jmp.type = Jjmp;
b->s1 = ret;
}
if (b->nins == 0)
if (b->jmp.type == Jjmp) {
uffind(&b->s1, uf);
if (b->s1 != b)
uf[b->id] = b->s1;
}
}
for (p=&fn->start; (b=*p); p=&b->link) {
if (b->s1)
uffind(&b->s1, uf);
if (b->s2)
uffind(&b->s2, uf);
if (b->s1 && b->s1 == b->s2) {
b->jmp.type = Jjmp;
b->s2 = 0;
}
}
*p = ret;
free(uf);
}
static int
reachrec(Blk *b, Blk *to)
{
if (b == to)
return 1;
if (!b || b->visit)
return 0;
b->visit = 1;
if (reachrec(b->s1, to))
return 1;
if (reachrec(b->s2, to))
return 1;
return 0;
}
/* Blk.visit needs to be clear at entry */
int
reaches(Fn *fn, Blk *b, Blk *to)
{
int r;
assert(to);
r = reachrec(b, to);
for (b=fn->start; b; b=b->link)
b->visit = 0;
return r;
}
/* can b reach 'to' not through excl
* Blk.visit needs to be clear at entry */
int
reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl)
{
excl->visit = 1;
return reaches(fn, b, to);
}
int
ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb)
{
Blk *s1, *s2, **t;
if (ifb->jmp.type != Jjnz)
return 0;
s1 = ifb->s1;
s2 = ifb->s2;
if (s1->id > s2->id) {
s1 = ifb->s2;
s2 = ifb->s1;
t = pthenb;
pthenb = pelseb;
pelseb = t;
}
if (s1 == s2)
return 0;
if (s1->jmp.type != Jjmp || s1->npred != 1)
return 0;
if (s1->s1 == s2) {
/* if-then / if-else */
if (s2->npred != 2)
return 0;
*pthenb = s1;
*pelseb = ifb;
*pjoinb = s2;
return 1;
}
if (s2->jmp.type != Jjmp || s2->npred != 1)
return 0;
if (s1->s1 != s2->s1 || s1->s1->npred != 2)
return 0;
assert(s1->s1 != ifb);
*pthenb = s1;
*pelseb = s2;
*pjoinb = s1->s1;
return 1;
}
typedef struct Jmp Jmp;
struct Jmp {
int type;
Ref arg;
Blk *s1, *s2;
};
static int
jmpeq(Jmp *a, Jmp *b)
{
return a->type == b->type && req(a->arg, b->arg)
&& a->s1 == b->s1 && a->s2 == b->s2;
}
static int
jmpnophi(Jmp *j)
{
if (j->s1 && j->s1->phi)
return 0;
if (j->s2 && j->s2->phi)
return 0;
return 1;
}
/* require cfg rpo, breaks use */
void
simplcfg(Fn *fn)
{
Ins cpy, *i;
Blk *b, *bb, **pb;
Jmp *jmp, *j, *jj;
Phi *p;
int *empty, done;
uint n;
if (debug['C']) {
fprintf(stderr, "\n> Before CFG simplification:\n");
printfn(fn, stderr);
}
cpy = (Ins){.op = Ocopy};
for (b=fn->start; b; b=b->link)
if (b->npred == 1) {
bb = b->pred[0];
for (p=b->phi; p; p=p->link) {
cpy.cls = p->cls;
cpy.to = p->to;
cpy.arg[0] = phiarg(p, bb);
addins(&bb->ins, &bb->nins, &cpy);
}
b->phi = 0;
}
jmp = emalloc(fn->nblk * sizeof jmp[0]);
empty = emalloc(fn->nblk * sizeof empty[0]);
for (b=fn->start; b; b=b->link) {
jmp[b->id].type = b->jmp.type;
jmp[b->id].arg = b->jmp.arg;
jmp[b->id].s1 = b->s1;
jmp[b->id].s2 = b->s2;
empty[b->id] = !b->phi;
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op != Onop && i->op != Odbgloc) {
empty[b->id] = 0;
break;
}
}
do {
done = 1;
for (b=fn->start; b; b=b->link) {
if (b->id == -1u)
continue;
j = &jmp[b->id];
if (j->type == Jjmp && j->s1->npred == 1) {
assert(!j->s1->phi);
addbins(&b->ins, &b->nins, j->s1);
empty[b->id] &= empty[j->s1->id];
jj = &jmp[j->s1->id];
pb = (Blk*[]){jj->s1, jj->s2, 0};
for (; (bb=*pb); pb++)
for (p=bb->phi; p; p=p->link) {
n = phiargn(p, j->s1);
p->blk[n] = b;
}
j->s1->id = -1u;
*j = *jj;
done = 0;
}
else if (j->type == Jjnz
&& empty[j->s1->id] && empty[j->s2->id]
&& jmpeq(&jmp[j->s1->id], &jmp[j->s2->id])
&& jmpnophi(&jmp[j->s1->id])) {
*j = jmp[j->s1->id];
done = 0;
}
}
} while (!done);
for (b=fn->start; b; b=b->link)
if (b->id != -1u) {
j = &jmp[b->id];
b->jmp.type = j->type;
b->jmp.arg = j->arg;
b->s1 = j->s1;
b->s2 = j->s2;
assert(!j->s1 || j->s1->id != -1u);
assert(!j->s2 || j->s2->id != -1u);
}
fillcfg(fn);
free(empty);
free(jmp);
if (debug['C']) {
fprintf(stderr, "\n> After CFG simplification:\n");
printfn(fn, stderr);
}
}

18
src/qbe/config.h Normal file
View File

@@ -0,0 +1,18 @@
/* Auto-generated default target for QBE.
The qbe_backend.c #ifdef chain handles all common platforms;
this file is only reached by the #else fallback. */
#if defined(__aarch64__) && defined(__APPLE__)
#define Deftgt T_arm64_apple
#elif defined(__aarch64__)
#define Deftgt T_arm64
#elif defined(__x86_64__) && defined(__APPLE__)
#define Deftgt T_amd64_apple
#elif defined(__x86_64__) && defined(_WIN32)
#define Deftgt T_amd64_win
#elif defined(__x86_64__)
#define Deftgt T_amd64_sysv
#elif defined(__riscv) && __riscv_xlen == 64
#define Deftgt T_rv64
#else
#error "unsupported target for QBE"
#endif

408
src/qbe/copy.c Normal file
View File

@@ -0,0 +1,408 @@
#include "all.h"
typedef struct Ext Ext;
struct Ext {
char zext;
char nopw; /* is a no-op if arg width is <= nopw */
char usew; /* uses only the low usew bits of arg */
};
static int
ext(Ins *i, Ext *e)
{
static Ext tbl[] = {
/*extsb*/ {0, 7, 8},
/*extub*/ {1, 8, 8},
/*extsh*/ {0, 15, 16},
/*extuh*/ {1, 16, 16},
/*extsw*/ {0, 31, 32},
/*extuw*/ {1, 32, 32},
};
if (!isext(i->op))
return 0;
*e = tbl[i->op - Oextsb];
return 1;
}
static int
bitwidth(uint64_t v)
{
int n;
n = 0;
if (v >> 32) { n += 32; v >>= 32; }
if (v >> 16) { n += 16; v >>= 16; }
if (v >> 8) { n += 8; v >>= 8; }
if (v >> 4) { n += 4; v >>= 4; }
if (v >> 2) { n += 2; v >>= 2; }
if (v >> 1) { n += 1; v >>= 1; }
return n+v;
}
/* no more than w bits are used */
static int
usewidthle(Fn *fn, Ref r, int w)
{
Ext e;
Tmp *t;
Use *u;
Phi *p;
Ins *i;
Ref rc;
int64_t v;
int b;
assert(rtype(r) == RTmp);
t = &fn->tmp[r.val];
for (u=t->use; u<&t->use[t->nuse]; u++) {
switch (u->type) {
case UPhi:
p = u->u.phi;
/* during gvn, phi nodes may be
* replaced by other temps; in
* this case, the replaced phi
* uses are added to the
* replacement temp uses and
* Phi.to is set to R */
if (p->visit || req(p->to, R))
continue;
p->visit = 1;
b = usewidthle(fn, p->to, w);
p->visit = 0;
if (b)
continue;
break;
case UIns:
i = u->u.ins;
assert(i != 0);
if (i->op == Ocopy)
if (usewidthle(fn, i->to, w))
continue;
if (ext(i, &e)) {
if (e.usew <= w)
continue;
if (usewidthle(fn, i->to, w))
continue;
}
if (i->op == Oand) {
if (req(r, i->arg[0]))
rc = i->arg[1];
else {
assert(req(r, i->arg[1]));
rc = i->arg[0];
}
if (isconbits(fn, rc, &v)
&& bitwidth(v) <= w)
continue;
break;
}
break;
default:
break;
}
return 0;
}
return 1;
}
static int
min(int v1, int v2)
{
return v1 < v2 ? v1 : v2;
}
/* is the ref narrower than w bits */
static int
defwidthle(Fn *fn, Ref r, int w)
{
Ext e;
Tmp *t;
Phi *p;
Ins *i;
uint n;
int64_t v;
int x;
if (isconbits(fn, r, &v)
&& bitwidth(v) <= w)
return 1;
if (rtype(r) != RTmp)
return 0;
t = &fn->tmp[r.val];
if (t->cls != Kw)
return 0;
if (!t->def) {
/* phi def */
for (p=fn->rpo[t->bid]->phi; p; p=p->link)
if (req(p->to, r))
break;
assert(p);
if (p->visit)
return 1;
p->visit = 1;
for (n=0; n<p->narg; n++)
if (!defwidthle(fn, p->arg[n], w)) {
p->visit = 0;
return 0;
}
p->visit = 0;
return 1;
}
i = t->def;
if (i->op == Ocopy)
return defwidthle(fn, i->arg[0], w);
if (i->op == Oshr || i->op == Osar) {
if (isconbits(fn, i->arg[1], &v))
if (0 < v && v <= 32) {
if (i->op == Oshr && w+v >= 32)
return 1;
if (w < 32) {
if (i->op == Osar)
w = min(31, w+v);
else
w = min(32, w+v);
}
}
return defwidthle(fn, i->arg[0], w);
}
if (iscmp(i->op, &x, &x))
return w >= 1;
if (i->op == Oand) {
if (defwidthle(fn, i->arg[0], w)
|| defwidthle(fn, i->arg[1], w))
return 1;
return 0;
}
if (i->op == Oor || i->op == Oxor) {
if (defwidthle(fn, i->arg[0], w)
&& defwidthle(fn, i->arg[1], w))
return 1;
return 0;
}
if (ext(i, &e)) {
if (e.zext && e.usew <= w)
return 1;
w = min(w, e.nopw);
return defwidthle(fn, i->arg[0], w);
}
return 0;
}
static int
isw1(Fn *fn, Ref r)
{
return defwidthle(fn, r, 1);
}
/* insert early extub/extuh instructions
* for pars used only narrowly; this
* helps factoring extensions out of
* loops
*
* needs use; breaks use
*/
void
narrowpars(Fn *fn)
{
Blk *b;
int loop;
Ins ext, *i, *ins;
uint npar, nins;
Ref r;
/* only useful for functions with loops */
loop = 0;
for (b=fn->start; b; b=b->link)
if (b->loop > 1) {
loop = 1;
break;
}
if (!loop)
return;
b = fn->start;
npar = 0;
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (!ispar(i->op))
break;
npar++;
}
if (npar == 0)
return;
nins = b->nins + npar;
ins = vnew(nins, sizeof ins[0], PFn);
icpy(ins, b->ins, npar);
icpy(ins + 2*npar, b->ins+npar, b->nins-npar);
b->ins = ins;
b->nins = nins;
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (!ispar(i->op))
break;
ext = (Ins){.op = Onop};
if (i->cls == Kw)
if (usewidthle(fn, i->to, 16)) {
ext.op = Oextuh;
if (usewidthle(fn, i->to, 8))
ext.op = Oextub;
r = newtmp("vw", i->cls, fn);
ext.cls = i->cls;
ext.to = i->to;
ext.arg[0] = r;
i->to = r;
}
*(i+npar) = ext;
}
}
Ref
copyref(Fn *fn, Blk *b, Ins *i)
{
/* which extensions are copies for a given
* argument width */
static bits extcpy[] = {
[WFull] = 0,
[Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw),
[Wub] = BIT(Wub) | BIT(Wuh) | BIT(Wuw),
[Wsh] = BIT(Wsh) | BIT(Wsw),
[Wuh] = BIT(Wuh) | BIT(Wuw),
[Wsw] = BIT(Wsw),
[Wuw] = BIT(Wuw),
};
Ext e;
Tmp *t;
int64_t v;
int w, z;
if (i->op == Ocopy)
return i->arg[0];
/* op identity value */
if (optab[i->op].hasid
&& KBASE(i->cls) == 0 /* integer only - fp NaN! */
&& req(i->arg[1], con01[optab[i->op].idval])
&& (!optab[i->op].cmpeqwl || isw1(fn, i->arg[0])))
return i->arg[0];
/* idempotent op with identical args */
if (optab[i->op].idemp
&& req(i->arg[0], i->arg[1]))
return i->arg[0];
/* integer cmp with identical args */
if ((optab[i->op].cmpeqwl || optab[i->op].cmplgtewl)
&& req(i->arg[0], i->arg[1]))
return con01[optab[i->op].eqval];
/* cmpeq/ne 0 with 0/non-0 inference */
if (optab[i->op].cmpeqwl
&& req(i->arg[1], CON_Z)
&& zeroval(fn, b, i->arg[0], argcls(i, 0), &z))
return con01[optab[i->op].eqval^z^1];
/* redundant and mask */
if (i->op == Oand
&& isconbits(fn, i->arg[1], &v)
&& (v > 0 && ((v+1) & v) == 0)
&& defwidthle(fn, i->arg[0], bitwidth(v)))
return i->arg[0];
if (i->cls == Kw
&& (i->op == Oextsw || i->op == Oextuw))
return i->arg[0];
if (ext(i, &e) && rtype(i->arg[0]) == RTmp) {
t = &fn->tmp[i->arg[0].val];
assert(KBASE(t->cls) == 0);
/* do not break typing by returning
* a narrower temp */
if (KWIDE(i->cls) > KWIDE(t->cls))
return R;
w = Wsb + (i->op - Oextsb);
if (BIT(w) & extcpy[t->width])
return i->arg[0];
/* avoid eliding extensions of params
* inserted in the start block; their
* point is to make further extensions
* redundant */
if ((!t->def || !ispar(t->def->op))
&& usewidthle(fn, i->to, e.usew))
return i->arg[0];
if (defwidthle(fn, i->arg[0], e.nopw))
return i->arg[0];
}
return R;
}
static int
phieq(Phi *pa, Phi *pb)
{
Ref r;
uint n;
assert(pa->narg == pb->narg);
for (n=0; n<pa->narg; n++) {
r = phiarg(pb, pa->blk[n]);
if (!req(pa->arg[n], r))
return 0;
}
return 1;
}
Ref
phicopyref(Fn *fn, Blk *b, Phi *p)
{
Blk *d, **s;
Phi *p1;
uint n, c;
/* identical args */
for (n=0; n<p->narg-1; n++)
if (!req(p->arg[n], p->arg[n+1]))
break;
if (n == p->narg-1)
return p->arg[n];
/* same as a previous phi */
for (p1=b->phi; p1!=p; p1=p1->link) {
assert(p1);
if (phieq(p1, p))
return p1->to;
}
/* can be replaced by a
* dominating jnz arg */
d = b->idom;
if (p->narg != 2
|| d->jmp.type != Jjnz
|| !isw1(fn, d->jmp.arg))
return R;
s = (Blk*[]){0, 0};
for (n=0; n<2; n++)
for (c=0; c<2; c++)
if (req(p->arg[n], con01[c]))
s[c] = p->blk[n];
/* if s1 ends with a jnz on either b
* or s2; the inference below is wrong
* without the jump type checks */
if (d->s1 == s[1] && d->s2 == s[0]
&& d->s1->jmp.type == Jjmp
&& d->s2->jmp.type == Jjmp)
return d->jmp.arg;
return R;
}

141
src/qbe/doc/abi.txt Normal file
View File

@@ -0,0 +1,141 @@
==================
System V ABI AMD64
==================
This document describes concisely the subset of the amd64
ABI as it is implemented in QBE. The subset can handle
correctly arbitrary standard C-like structs containing
float and integer types. Structs that have unaligned
members are also supported through opaque types, see
the IL description document for more information about
them.
- ABI Subset Implemented
------------------------
Data classes of interest as defined by the ABI:
* INTEGER
* SSE
* MEMORY
~ Classification
1. The size of each argument gets rounded up to eightbytes.
(It keeps the stack always 8 bytes aligned.)
2. _Bool, char, short, int, long, long long and pointers
are in the INTEGER class. In the context of QBE, it
means that 'l' and 'w' are in the INTEGER class.
3. float and double are in the SSE class. In the context
of QBE, it means that 's' and 'd' are in the SSE class.
4. If the size of an object is larger than two eightbytes
or if contains unaligned fields, it has class MEMORY.
In the context of QBE, those are big aggregate types
and opaque types.
5. Otherwise, recursively classify fields and determine
the class of the two eightbytes using the classes of
their components. If any is INTEGER the result is
INTEGER, otherwise the result is SSE.
~ Passing
* Classify arguments in order.
* INTEGER arguments use in order `%rdi` `%rsi` `%rdx`
`%rcx` `%r8` `%r9`.
* SSE arguments use in order `%xmm0` - `%xmm7`.
* MEMORY gets passed on the stack. They are "pushed"
in the right-to-left order, so from the callee's
point of view, the left-most argument appears first
on the stack.
* When we run out of registers for an aggregate, revert
the assignment for the first eightbytes and pass it
on the stack.
* When all registers are taken, write arguments on the
stack from right to left.
* When calling a variadic function, %al stores the number
of vector registers used to pass arguments (it must be
an upper bound and does not have to be exact).
* Registers `%rbx`, `%r12` - `%r15` are callee-save.
~ Returning
* Classify the return type.
* Use `%rax` and `%rdx` in order for INTEGER return
values.
* Use `%xmm0` and `%xmm1` in order for SSE return values.
* If the return value's class is MEMORY, the first
argument of the function `%rdi` was a pointer to an
area big enough to fit the return value. The function
writes the return value there and returns the address
(that was in `%rdi`) in `%rax`.
- Alignment on the Stack
------------------------
The ABI is unclear on the alignment requirement of the
stack. What must be ensured is that, right before
executing a 'call' instruction, the stack pointer `%rsp`
is aligned on 16 bytes. On entry of the called
function, the stack pointer is 8 modulo 16. Since most
functions will have a prelude pushing `%rbp`, the frame
pointer, upon entry of the body code of the function is
also aligned on 16 bytes (== 0 mod 16).
Here is a diagram of the stack layout after a call from
g() to f().
| |
| g() locals |
+-------------+
^ | | \
| | stack arg 2 | '
| |xxxxxxxxxxxxx| | f()'s MEMORY
growing | +-------------+ | arguments
addresses | | stack arg 1 | ,
| |xxxxxxxxxxxxx| /
| +-------------+ -> 0 mod 16
| | ret addr |
+-------------+
| saved %rbp |
+-------------+ -> f()'s %rbp
| f() locals | 0 mod 16
| ... |
-> %rsp
Legend:
* `xxxxx` Optional padding.
- Remarks
---------
* A struct can be returned in registers in one of three
ways. Either `%rax`, `%rdx` are used, or `%xmm0`,
`%xmm1`, or finally `%rax`, `%xmm0`. The last case
happens when a struct is returned with one half
classified as INTEGER and the other as SSE. This
is a consequence of the <@Returning> section above.
* The size of the arguments area of the stack needs to
be computed first, then arguments are packed starting
from the bottom of the argument area, respecting
alignment constraints. The ABI mentions "pushing"
arguments in right-to-left order, but I think it's a
mistaken view because of the alignment constraints.
Example: If three 8 bytes MEMORY arguments are passed
to the callee and the caller's stack pointer is 16 bytes
algined, the layout will be like this.
+-------------+
|xxxxxxxxxxxxx| padding
| stack arg 3 |
| stack arg 2 |
| stack arg 1 |
+-------------+ -> 0 mod 16
The padding must not be at the end of the stack area.
A "pushing" logic would put it at the end.

1196
src/qbe/doc/il.txt Normal file

File diff suppressed because it is too large Load Diff

98
src/qbe/doc/llvm.txt Normal file
View File

@@ -0,0 +1,98 @@
===========
QBE vs LLVM
===========
Both QBE and LLVM are compiler backends using an SSA
representation. This document will explain why LLVM
does not make QBE a redundant project. Obviously,
everything following is biased, because written by me.
- Scope
-------
QBE is a much smaller scale project with different goals
than LLVM.
* QBE is for amateur language designers.
It does not address all the problems faced when
conceiving an industry-grade language. If you are
toying with some language ideas, using LLVM will
be like hauling your backpack with a truck, but
using QBE will feel more like riding a bicycle.
* QBE is about the first 70%, not the last 30%.
It attempts to pinpoint, in the extremely vast
compilation literature, the optimizations that get
you 70% of the performance in 10% of the code of
full blown compilers.
For example, copy propagation on SSA form is
implemented in 160 lines of code in QBE!
* QBE is extremely hackable.
First, it is, and will remain, a small project
(less than 8 kloc). Second, it is programmed in
non-fancy C99 without any dependencies. Third,
it is able to dump the IL and debug information in
a uniform format after each pass.
On my Core 2 Duo machine, QBE compiles in half a
second (without optimizations).
- Features
----------
LLVM is definitely more packed with features, but there
are a few things provided in QBE to consider.
* LLVM does NOT provide full C compatibility for you.
In more technical terms, any language that provides
good C compatibility and uses LLVM as a backend
needs to reimplement large chunks of the ABI in
its frontend! This well known issue in the LLVM
community causes a great deal of duplication
and bugs.
Implementing a complete C ABI (with struct arguments
and returns) is incredibly tricky, and not really
a lot of fun. QBE provides you with IL operations
to call in (and be called by) C with no pain.
Moreover the ABI implementation in QBE has been
thoroughly tested by fuzzing and manual tests.
* LLVM IL is more cluttered with memory operations.
Implementing SSA construction is hard. To save its
users from having to implement it, LLVM provides
stack slots. This means that one increment of
a variable `v` will be composed of three LLVM
instructions: one load, one add, and one store.
QBE provides simple non-SSA temporaries, so
incrementing `v` is simply done with one instruction
`%v =w add %v, 1`.
This could seem cosmetic, but dividing the size of
the IL by three makes it easier for the frontend
writers to spot bugs in the generated code.
* LLVM IL is more cluttered with type annotations and
casts.
For the sake of advanced optimizations and
correctness, LLVM has complex IL types. However,
only a few types are really first class and many
operations of source languages require casts to be
compiled.
Because QBE makes a much lighter use of types, the
IL is more readable and shorter. It can of course be
argued back that the correctness of QBE is jeopardized,
but remember that, in practice, the large amount
of casts necessary in LLVM IL is undermining the
overall effectiveness of the type system.

View File

@@ -0,0 +1,15 @@
There is an experimental amd64_win (native Windows ABI and calling
convention).
In tree, this is currently only tested via cross-compilation from a
Linux host, and using wine to run the tests.
You'll need something like:
sudo apt install mingw64-w64 dos2unix wine
and then
make check-amd64_win
should pass.

20
src/qbe/doc/rv64.txt Normal file
View File

@@ -0,0 +1,20 @@
=========
RISC-V 64
=========
- Known issues
--------------
ABI with structs containing floats is not yet supported.
- Possible improvements
-----------------------
rv64_isel() could turn compare used only with jnz into b{lt,ge}[u].
- Helpful links
---------------
RISC-V spec: https://github.com/riscv/riscv-isa-manual/releases/latest/download/riscv-spec.pdf
ASM manual: https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
ABI: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc

23
src/qbe/doc/win.txt Normal file
View File

@@ -0,0 +1,23 @@
===================
Windows Quick Start
===================
Only 64-bit versions of windows are supported. To compile
this software you will need to get a normal UNIX toolchain.
There are several ways to get one, but I will only describe
how I did it.
1. Download and install [@1 MSYS2] (the x86_64 version).
2. In an MSYS2 terminal, run the following command.
pacman -S git make mingw-w64-x86_64-gcc mingw-w64-x86_64-gdb
3. Restart the MSYS2 terminal.
4. In the new terminal, clone QBE.
git clone git://c9x.me/qbe.git
5. Compile using `make`.
[1] http://www.msys2.org

271
src/qbe/emit.c Normal file
View File

@@ -0,0 +1,271 @@
#include "all.h"
enum {
SecText,
SecData,
SecBss,
};
void
emitlnk(char *n, Lnk *l, int s, FILE *f)
{
static char *sec[2][3] = {
[0][SecText] = ".text",
[0][SecData] = ".data",
[0][SecBss] = ".bss",
[1][SecText] = ".abort \"unreachable\"",
[1][SecData] = ".section .tdata,\"awT\"",
[1][SecBss] = ".section .tbss,\"awT\"",
};
char *pfx, *sfx;
pfx = n[0] == '"' ? "" : T.assym;
sfx = "";
if (T.apple && l->thread) {
l->sec = "__DATA";
l->secf = "__thread_data,thread_local_regular";
sfx = "$tlv$init";
fputs(
".section __DATA,__thread_vars,"
"thread_local_variables\n",
f
);
fprintf(f, "%s%s:\n", pfx, n);
fprintf(f,
"\t.quad __tlv_bootstrap\n"
"\t.quad 0\n"
"\t.quad %s%s%s\n\n",
pfx, n, sfx
);
}
if (l->sec) {
fprintf(f, ".section %s", l->sec);
if (l->secf)
fprintf(f, ",%s", l->secf);
} else
fputs(sec[l->thread != 0][s], f);
fputc('\n', f);
if (l->align)
fprintf(f, ".balign %d\n", l->align);
if (l->export)
fprintf(f, ".globl %s%s\n", pfx, n);
fprintf(f, "%s%s%s:\n", pfx, n, sfx);
}
void
emitfnlnk(char *n, Lnk *l, FILE *f)
{
emitlnk(n, l, SecText, f);
}
void
emitdat(Dat *d, FILE *f)
{
static struct {
char decl[8];
int64_t mask;
} di[] = {
[DB] = {"\t.byte", 0xffL},
[DH] = {"\t.short", 0xffffL},
[DW] = {"\t.int", 0xffffffffL},
[DL] = {"\t.quad", -1L},
};
static int64_t zero;
char *p;
switch (d->type) {
case DStart:
zero = 0;
break;
case DEnd:
if (d->lnk->common) {
if (zero == -1)
die("invalid common data definition");
p = d->name[0] == '"' ? "" : T.assym;
fprintf(f, ".comm %s%s,%"PRId64,
p, d->name, zero);
if (d->lnk->align)
fprintf(f, ",%d", d->lnk->align);
fputc('\n', f);
}
else if (zero != -1) {
emitlnk(d->name, d->lnk, SecBss, f);
fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
}
break;
case DZ:
if (zero != -1)
zero += d->u.num;
else
fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
break;
default:
if (zero != -1) {
emitlnk(d->name, d->lnk, SecData, f);
if (zero > 0)
fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
zero = -1;
}
if (d->isstr) {
if (d->type != DB)
err("strings only supported for 'b' currently");
fprintf(f, "\t.ascii %s\n", d->u.str);
}
else if (d->isref) {
p = d->u.ref.name[0] == '"' ? "" : T.assym;
fprintf(f, "%s %s%s%+"PRId64"\n",
di[d->type].decl, p, d->u.ref.name,
d->u.ref.off);
}
else {
fprintf(f, "%s %"PRId64"\n",
di[d->type].decl,
d->u.num & di[d->type].mask);
}
break;
}
}
typedef struct Asmbits Asmbits;
struct Asmbits {
bits n;
int size;
Asmbits *link;
};
static Asmbits *stash;
int
stashbits(bits n, int size)
{
Asmbits **pb, *b;
int i;
assert(size == 4 || size == 8 || size == 16);
for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
if (size <= b->size && b->n == n)
return i;
b = emalloc(sizeof *b);
b->n = n;
b->size = size;
b->link = 0;
*pb = b;
return i;
}
static void
emitfin(FILE *f, char *sec[3])
{
Asmbits *b;
int lg, i;
union { int32_t i; float f; } u;
if (!stash)
return;
fprintf(f, "/* floating point constants */\n");
for (lg=4; lg>=2; lg--)
for (b=stash, i=0; b; b=b->link, i++) {
if (b->size == (1<<lg)) {
fprintf(f,
".section %s\n"
".p2align %d\n"
"%sfp%d:",
sec[lg-2], lg, T.asloc, i
);
if (lg == 4)
fprintf(f,
"\n\t.quad %"PRId64
"\n\t.quad 0\n\n",
(int64_t)b->n);
else if (lg == 3)
fprintf(f,
"\n\t.quad %"PRId64
" /* %f */\n\n",
(int64_t)b->n,
*(double *)&b->n);
else if (lg == 2) {
u.i = b->n;
fprintf(f,
"\n\t.int %"PRId32
" /* %f */\n\n",
u.i, (double)u.f);
}
}
}
while ((b=stash)) {
stash = b->link;
free(b);
}
}
void
elf_emitfin(FILE *f)
{
static char *sec[3] = { ".rodata", ".rodata", ".rodata" };
emitfin(f ,sec);
fprintf(f, ".section .note.GNU-stack,\"\",@progbits\n");
}
void
elf_emitfnfin(char *fn, FILE *f)
{
fprintf(f, ".type %s, @function\n", fn);
fprintf(f, ".size %s, .-%s\n", fn, fn);
}
void
macho_emitfin(FILE *f)
{
static char *sec[3] = {
"__TEXT,__literal4,4byte_literals",
"__TEXT,__literal8,8byte_literals",
".abort \"unreachable\"",
};
emitfin(f, sec);
}
void
pe_emitfin(FILE *f)
{
static char *sec[3] = { ".rodata", ".rodata", ".rodata" };
emitfin(f, sec);
}
static uint32_t *file;
static uint nfile;
static uint curfile;
void
emitdbgfile(char *fn, FILE *f)
{
uint32_t id;
uint n;
id = intern(fn);
for (n=0; n<nfile; n++)
if (file[n] == id) {
/* gas requires positive
* file numbers */
curfile = n + 1;
return;
}
if (!file)
file = vnew(0, sizeof *file, PHeap);
vgrow(&file, ++nfile);
file[nfile-1] = id;
curfile = nfile;
fprintf(f, ".file %u %s\n", curfile, fn);
}
void
emitdbgloc(uint line, uint col, FILE *f)
{
if (col != 0)
fprintf(f, "\t.loc %u %u %u\n", curfile, line, col);
else
fprintf(f, "\t.loc %u %u\n", curfile, line);
}

246
src/qbe/fold.c Normal file
View File

@@ -0,0 +1,246 @@
#include "all.h"
/* boring folding code */
static int
iscon(Con *c, int w, uint64_t k)
{
if (c->type != CBits)
return 0;
if (w)
return (uint64_t)c->bits.i == k;
else
return (uint32_t)c->bits.i == (uint32_t)k;
}
int
foldint(Con *res, int op, int w, Con *cl, Con *cr)
{
union {
int64_t s;
uint64_t u;
float fs;
double fd;
} l, r;
uint64_t x;
Sym sym;
int typ;
memset(&sym, 0, sizeof sym);
typ = CBits;
l.s = cl->bits.i;
r.s = cr->bits.i;
if (op == Oadd) {
if (cl->type == CAddr) {
if (cr->type == CAddr)
return 1;
typ = CAddr;
sym = cl->sym;
}
else if (cr->type == CAddr) {
typ = CAddr;
sym = cr->sym;
}
}
else if (op == Osub) {
if (cl->type == CAddr) {
if (cr->type != CAddr) {
typ = CAddr;
sym = cl->sym;
} else if (!symeq(cl->sym, cr->sym))
return 1;
}
else if (cr->type == CAddr)
return 1;
}
else if (cl->type == CAddr || cr->type == CAddr)
return 1;
if (op == Odiv || op == Orem || op == Oudiv || op == Ourem) {
if (iscon(cr, w, 0))
return 1;
if (op == Odiv || op == Orem) {
x = w ? INT64_MIN : INT32_MIN;
if (iscon(cr, w, -1))
if (iscon(cl, w, x))
return 1;
}
}
switch (op) {
case Oadd: x = l.u + r.u; break;
case Osub: x = l.u - r.u; break;
case Oneg: x = -l.u; break;
case Odiv: x = w ? l.s / r.s : (int32_t)l.s / (int32_t)r.s; break;
case Orem: x = w ? l.s % r.s : (int32_t)l.s % (int32_t)r.s; break;
case Oudiv: x = w ? l.u / r.u : (uint32_t)l.u / (uint32_t)r.u; break;
case Ourem: x = w ? l.u % r.u : (uint32_t)l.u % (uint32_t)r.u; break;
case Omul: x = l.u * r.u; break;
case Oand: x = l.u & r.u; break;
case Oor: x = l.u | r.u; break;
case Oxor: x = l.u ^ r.u; break;
case Osar: x = (w ? l.s : (int32_t)l.s) >> (r.u & (31|w<<5)); break;
case Oshr: x = (w ? l.u : (uint32_t)l.u) >> (r.u & (31|w<<5)); break;
case Oshl: x = l.u << (r.u & (31|w<<5)); break;
case Oextsb: x = (int8_t)l.u; break;
case Oextub: x = (uint8_t)l.u; break;
case Oextsh: x = (int16_t)l.u; break;
case Oextuh: x = (uint16_t)l.u; break;
case Oextsw: x = (int32_t)l.u; break;
case Oextuw: x = (uint32_t)l.u; break;
case Ostosi: x = w ? (int64_t)cl->bits.s : (int32_t)cl->bits.s; break;
case Ostoui: x = w ? (uint64_t)cl->bits.s : (uint32_t)cl->bits.s; break;
case Odtosi: x = w ? (int64_t)cl->bits.d : (int32_t)cl->bits.d; break;
case Odtoui: x = w ? (uint64_t)cl->bits.d : (uint32_t)cl->bits.d; break;
case Ocast:
x = l.u;
if (cl->type == CAddr) {
typ = CAddr;
sym = cl->sym;
}
break;
default:
if (Ocmpw <= op && op <= Ocmpl1) {
if (op <= Ocmpw1) {
l.u = (int32_t)l.u;
r.u = (int32_t)r.u;
} else
op -= Ocmpl - Ocmpw;
switch (op - Ocmpw) {
case Ciule: x = l.u <= r.u; break;
case Ciult: x = l.u < r.u; break;
case Cisle: x = l.s <= r.s; break;
case Cislt: x = l.s < r.s; break;
case Cisgt: x = l.s > r.s; break;
case Cisge: x = l.s >= r.s; break;
case Ciugt: x = l.u > r.u; break;
case Ciuge: x = l.u >= r.u; break;
case Cieq: x = l.u == r.u; break;
case Cine: x = l.u != r.u; break;
default: die("unreachable");
}
}
else if (Ocmps <= op && op <= Ocmps1) {
switch (op - Ocmps) {
case Cfle: x = l.fs <= r.fs; break;
case Cflt: x = l.fs < r.fs; break;
case Cfgt: x = l.fs > r.fs; break;
case Cfge: x = l.fs >= r.fs; break;
case Cfne: x = l.fs != r.fs; break;
case Cfeq: x = l.fs == r.fs; break;
case Cfo: x = l.fs < r.fs || l.fs >= r.fs; break;
case Cfuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
default: die("unreachable");
}
}
else if (Ocmpd <= op && op <= Ocmpd1) {
switch (op - Ocmpd) {
case Cfle: x = l.fd <= r.fd; break;
case Cflt: x = l.fd < r.fd; break;
case Cfgt: x = l.fd > r.fd; break;
case Cfge: x = l.fd >= r.fd; break;
case Cfne: x = l.fd != r.fd; break;
case Cfeq: x = l.fd == r.fd; break;
case Cfo: x = l.fd < r.fd || l.fd >= r.fd; break;
case Cfuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
default: die("unreachable");
}
}
else
die("unreachable");
}
*res = (Con){.type=typ, .sym=sym, .bits={.i=x}};
return 0;
}
static void
foldflt(Con *res, int op, int w, Con *cl, Con *cr)
{
float xs, ls, rs;
double xd, ld, rd;
if (cl->type != CBits || cr->type != CBits)
err("invalid address operand for '%s'", optab[op].name);
*res = (Con){.type = CBits};
memset(&res->bits, 0, sizeof(res->bits));
if (w) {
ld = cl->bits.d;
rd = cr->bits.d;
switch (op) {
case Oadd: xd = ld + rd; break;
case Osub: xd = ld - rd; break;
case Oneg: xd = -ld; break;
case Odiv: xd = ld / rd; break;
case Omul: xd = ld * rd; break;
case Oswtof: xd = (int32_t)cl->bits.i; break;
case Ouwtof: xd = (uint32_t)cl->bits.i; break;
case Osltof: xd = (int64_t)cl->bits.i; break;
case Oultof: xd = (uint64_t)cl->bits.i; break;
case Oexts: xd = cl->bits.s; break;
case Ocast: xd = ld; break;
default: die("unreachable");
}
res->bits.d = xd;
res->flt = 2;
} else {
ls = cl->bits.s;
rs = cr->bits.s;
switch (op) {
case Oadd: xs = ls + rs; break;
case Osub: xs = ls - rs; break;
case Oneg: xs = -ls; break;
case Odiv: xs = ls / rs; break;
case Omul: xs = ls * rs; break;
case Oswtof: xs = (int32_t)cl->bits.i; break;
case Ouwtof: xs = (uint32_t)cl->bits.i; break;
case Osltof: xs = (int64_t)cl->bits.i; break;
case Oultof: xs = (uint64_t)cl->bits.i; break;
case Otruncd: xs = cl->bits.d; break;
case Ocast: xs = ls; break;
default: die("unreachable");
}
res->bits.s = xs;
res->flt = 1;
}
}
static Ref
opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
{
Ref r;
Con c;
if (cls == Kw || cls == Kl) {
if (foldint(&c, op, cls == Kl, cl, cr))
return R;
} else
foldflt(&c, op, cls == Kd, cl, cr);
if (!KWIDE(cls))
c.bits.i &= 0xffffffff;
r = newcon(&c, fn);
assert(!(cls == Ks || cls == Kd) || c.flt);
return r;
}
/* used by GVN */
Ref
foldref(Fn *fn, Ins *i)
{
Ref rr;
Con *cl, *cr;
if (rtype(i->to) != RTmp)
return R;
if (optab[i->op].canfold) {
if (rtype(i->arg[0]) != RCon)
return R;
cl = &fn->con[i->arg[0].val];
rr = i->arg[1];
if (req(rr, R))
rr = CON_Z;
if (rtype(rr) != RCon)
return R;
cr = &fn->con[rr.val];
return opfold(i->op, i->cls, cl, cr, fn);
}
return R;
}

460
src/qbe/gcm.c Normal file
View File

@@ -0,0 +1,460 @@
#include "all.h"
#define NOBID (-1u)
static int
isdivwl(Ins *i)
{
switch (i->op) {
case Odiv:
case Orem:
case Oudiv:
case Ourem:
return KBASE(i->cls) == 0;
default:
return 0;
}
}
int
pinned(Ins *i)
{
return optab[i->op].pinned || isdivwl(i);
}
/* pinned ins that can be eliminated if unused */
static int
canelim(Ins *i)
{
return isload(i->op) || isalloc(i->op) || isdivwl(i);
}
static uint earlyins(Fn *, Blk *, Ins *);
static uint
schedearly(Fn *fn, Ref r)
{
Tmp *t;
Blk *b;
if (rtype(r) != RTmp)
return 0;
t = &fn->tmp[r.val];
if (t->gcmbid != NOBID)
return t->gcmbid;
b = fn->rpo[t->bid];
if (t->def) {
assert(b->ins <= t->def && t->def < &b->ins[b->nins]);
t->gcmbid = 0; /* mark as visiting */
t->gcmbid = earlyins(fn, b, t->def);
} else {
/* phis do not move */
t->gcmbid = t->bid;
}
return t->gcmbid;
}
static uint
earlyins(Fn *fn, Blk *b, Ins *i)
{
uint b0, b1;
b0 = schedearly(fn, i->arg[0]);
assert(b0 != NOBID);
b1 = schedearly(fn, i->arg[1]);
assert(b1 != NOBID);
if (fn->rpo[b0]->depth < fn->rpo[b1]->depth) {
assert(dom(fn->rpo[b0], fn->rpo[b1]));
b0 = b1;
}
return pinned(i) ? b->id : b0;
}
static void
earlyblk(Fn *fn, uint bid)
{
Blk *b;
Phi *p;
Ins *i;
uint n;
b = fn->rpo[bid];
for (p=b->phi; p; p=p->link)
for (n=0; n<p->narg; n++)
schedearly(fn, p->arg[n]);
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (pinned(i)) {
schedearly(fn, i->arg[0]);
schedearly(fn, i->arg[1]);
}
schedearly(fn, b->jmp.arg);
}
/* least common ancestor in dom tree */
static uint
lcabid(Fn *fn, uint bid1, uint bid2)
{
Blk *b;
if (bid1 == NOBID)
return bid2;
if (bid2 == NOBID)
return bid1;
b = lca(fn->rpo[bid1], fn->rpo[bid2]);
assert(b);
return b->id;
}
static uint
bestbid(Fn *fn, uint earlybid, uint latebid)
{
Blk *curb, *earlyb, *bestb;
if (latebid == NOBID)
return NOBID; /* unused */
assert(earlybid != NOBID);
earlyb = fn->rpo[earlybid];
bestb = curb = fn->rpo[latebid];
assert(dom(earlyb, curb));
while (curb != earlyb) {
curb = curb->idom;
if (curb->loop < bestb->loop)
bestb = curb;
}
return bestb->id;
}
static uint lateins(Fn *, Blk *, Ins *, Ref r);
static uint latephi(Fn *, Phi *, Ref r);
static uint latejmp(Blk *, Ref r);
/* return lca bid of ref uses */
static uint
schedlate(Fn *fn, Ref r)
{
Tmp *t;
Blk *b;
Use *u;
uint earlybid;
uint latebid;
uint uselatebid;
if (rtype(r) != RTmp)
return NOBID;
t = &fn->tmp[r.val];
if (t->visit)
return t->gcmbid;
t->visit = 1;
earlybid = t->gcmbid;
if (earlybid == NOBID)
return NOBID; /* not used */
/* reuse gcmbid for late bid */
t->gcmbid = t->bid;
latebid = NOBID;
for (u=t->use; u<&t->use[t->nuse]; u++) {
assert(u->bid < fn->nblk);
b = fn->rpo[u->bid];
switch (u->type) {
case UXXX:
die("unreachable");
break;
case UPhi:
uselatebid = latephi(fn, u->u.phi, r);
break;
case UIns:
uselatebid = lateins(fn, b, u->u.ins, r);
break;
case UJmp:
uselatebid = latejmp(b, r);
break;
}
latebid = lcabid(fn, latebid, uselatebid);
}
/* latebid may be NOBID if the temp is used
* in fixed instructions that may be eliminated
* and are themselves unused transitively */
if (t->def && !pinned(t->def))
t->gcmbid = bestbid(fn, earlybid, latebid);
/* else, keep the early one */
/* now, gcmbid is the best bid */
return t->gcmbid;
}
/* returns lca bid of uses or NOBID if
* the definition can be eliminated */
static uint
lateins(Fn *fn, Blk *b, Ins *i, Ref r)
{
uint latebid;
assert(b->ins <= i && i < &b->ins[b->nins]);
assert(req(i->arg[0], r) || req(i->arg[1], r));
latebid = schedlate(fn, i->to);
if (pinned(i)) {
if (latebid == NOBID)
if (canelim(i))
return NOBID;
return b->id;
}
return latebid;
}
static uint
latephi(Fn *fn, Phi *p, Ref r)
{
uint n;
uint latebid;
if (!p->narg)
return NOBID; /* marked as unused */
latebid = NOBID;
for (n = 0; n < p->narg; n++)
if (req(p->arg[n], r))
latebid = lcabid(fn, latebid, p->blk[n]->id);
assert(latebid != NOBID);
return latebid;
}
static uint
latejmp(Blk *b, Ref r)
{
if (req(b->jmp.arg, R))
return NOBID;
else {
assert(req(b->jmp.arg, r));
return b->id;
}
}
static void
lateblk(Fn *fn, uint bid)
{
Blk *b;
Phi **pp;
Ins *i;
b = fn->rpo[bid];
for (pp=&b->phi; *(pp);)
if (schedlate(fn, (*pp)->to) == NOBID) {
(*pp)->narg = 0; /* mark unused */
*pp = (*pp)->link; /* remove phi */
} else
pp = &(*pp)->link;
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (pinned(i))
schedlate(fn, i->to);
}
static void
addgcmins(Fn *fn, Ins *vins, uint nins)
{
Ins *i;
Tmp *t;
Blk *b;
for (i=vins; i<&vins[nins]; i++) {
assert(rtype(i->to) == RTmp);
t = &fn->tmp[i->to.val];
b = fn->rpo[t->gcmbid];
addins(&b->ins, &b->nins, i);
}
}
/* move live instructions to the
* end of their target block; use-
* before-def errors are fixed by
* schedblk */
static void
gcmmove(Fn *fn)
{
Tmp *t;
Ins *vins, *i;
uint nins;
nins = 0;
vins = vnew(nins, sizeof vins[0], PFn);
for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) {
if (t->def == 0)
continue;
if (t->bid == t->gcmbid)
continue;
i = t->def;
if (pinned(i) && !canelim(i))
continue;
assert(rtype(i->to) == RTmp);
assert(t == &fn->tmp[i->to.val]);
if (t->gcmbid != NOBID)
addins(&vins, &nins, i);
*i = (Ins){.op = Onop};
}
addgcmins(fn, vins, nins);
}
/* dfs ordering */
static Ins *
schedins(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins)
{
Ins *i0, *i1;
Tmp *t;
uint n;
igroup(b, i, &i0, &i1);
for (i=i0; i<i1; i++)
for (n=0; n<2; n++) {
if (rtype(i->arg[n]) != RTmp)
continue;
t = &fn->tmp[i->arg[n].val];
if (t->bid != b->id || !t->def)
continue;
schedins(fn, b, t->def, pvins, pnins);
}
for (i=i0; i<i1; i++) {
addins(pvins, pnins, i);
*i = (Ins){.op = Onop};
}
return i1;
}
/* order ins within a block */
static void
schedblk(Fn *fn)
{
Blk *b;
Ins *i, *vins;
uint nins;
vins = vnew(0, sizeof vins[0], PHeap);
for (b=fn->start; b; b=b->link) {
nins = 0;
for (i=b->ins; i<&b->ins[b->nins];)
i = schedins(fn, b, i, &vins, &nins);
idup(b, vins, nins);
}
vfree(vins);
}
static int
cheap(Ins *i)
{
int x;
if (KBASE(i->cls) != 0)
return 0;
switch (i->op) {
case Oneg:
case Oadd:
case Osub:
case Omul:
case Oand:
case Oor:
case Oxor:
case Osar:
case Oshr:
case Oshl:
return 1;
default:
return iscmp(i->op, &x, &x);
}
}
static void
sinkref(Fn *fn, Blk *b, Ref *pr)
{
Ins i;
Tmp *t;
Ref r;
if (rtype(*pr) != RTmp)
return;
t = &fn->tmp[pr->val];
if (!t->def
|| t->bid == b->id
|| pinned(t->def)
|| !cheap(t->def))
return;
/* sink t->def to b */
i = *t->def;
r = newtmp("snk", t->cls, fn);
t = 0; /* invalidated */
*pr = r;
i.to = r;
fn->tmp[r.val].gcmbid = b->id;
emiti(i);
sinkref(fn, b, &i.arg[0]);
sinkref(fn, b, &i.arg[1]);
}
/* redistribute trivial ops to point of
* use to reduce register pressure
* requires rpo, use; breaks use
*/
static void
sink(Fn *fn)
{
Blk *b;
Ins *i;
for (b=fn->start; b; b=b->link) {
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (isload(i->op))
sinkref(fn, b, &i->arg[0]);
else if (isstore(i->op))
sinkref(fn, b, &i->arg[1]);
sinkref(fn, b, &b->jmp.arg);
}
addgcmins(fn, curi, &insb[NIns] - curi);
}
/* requires use dom
* maintains rpo pred dom
* breaks use
*/
void
gcm(Fn *fn)
{
Tmp *t;
uint bid;
filldepth(fn);
fillloop(fn);
for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) {
t->visit = 0;
t->gcmbid = NOBID;
}
for (bid=0; bid<fn->nblk; bid++)
earlyblk(fn, bid);
for (bid=0; bid<fn->nblk; bid++)
lateblk(fn, bid);
gcmmove(fn);
filluse(fn);
curi = &insb[NIns];
sink(fn);
filluse(fn);
schedblk(fn);
if (debug['G']) {
fprintf(stderr, "\n> After GCM:\n");
printfn(fn, stderr);
}
}

508
src/qbe/gvn.c Normal file
View File

@@ -0,0 +1,508 @@
#include "all.h"
Ref con01[2];
static inline uint
mix(uint x0, uint x1)
{
return x0 + 17*x1;
}
static inline uint
rhash(Ref r)
{
return mix(r.type, r.val);
}
static uint
ihash(Ins *i)
{
uint h;
h = mix(i->op, i->cls);
h = mix(h, rhash(i->arg[0]));
h = mix(h, rhash(i->arg[1]));
return h;
}
static int
ieq(Ins *ia, Ins *ib)
{
if (ia->op == ib->op)
if (ia->cls == ib->cls)
if (req(ia->arg[0], ib->arg[0]))
if (req(ia->arg[1], ib->arg[1]))
return 1;
return 0;
}
static Ins **gvntbl;
static uint gvntbln;
static Ins *
gvndup(Ins *i, int insert)
{
uint idx, n;
Ins *ii;
idx = ihash(i) % gvntbln;
for (n=1;; n++) {
ii = gvntbl[idx];
if (!ii)
break;
if (ieq(i, ii))
return ii;
idx++;
if (gvntbln <= idx)
idx = 0;
}
if (insert)
gvntbl[idx] = i;
return 0;
}
static void
replaceuse(Fn *fn, Use *u, Ref r1, Ref r2)
{
Blk *b;
Ins *i;
Phi *p;
Ref *pr;
Tmp *t2;
int n;
t2 = 0;
if (rtype(r2) == RTmp)
t2 = &fn->tmp[r2.val];
b = fn->rpo[u->bid];
switch (u->type) {
case UPhi:
p = u->u.phi;
for (pr=p->arg; pr<&p->arg[p->narg]; pr++)
if (req(*pr, r1))
*pr = r2;
if (t2)
adduse(t2, UPhi, b, p);
break;
case UIns:
i = u->u.ins;
for (n=0; n<2; n++)
if (req(i->arg[n], r1))
i->arg[n] = r2;
if (t2)
adduse(t2, UIns, b, i);
break;
case UJmp:
if (req(b->jmp.arg, r1))
b->jmp.arg = r2;
if (t2)
adduse(t2, UJmp, b);
break;
case UXXX:
die("unreachable");
}
}
static void
replaceuses(Fn *fn, Ref r1, Ref r2)
{
Tmp *t1;
Use *u;
assert(rtype(r1) == RTmp);
t1 = &fn->tmp[r1.val];
for (u=t1->use; u<&t1->use[t1->nuse]; u++)
replaceuse(fn, u, r1, r2);
t1->nuse = 0;
}
static void
dedupphi(Fn *fn, Blk *b)
{
Phi *p, **pp;
Ref r;
for (pp=&b->phi; (p=*pp);) {
r = phicopyref(fn, b, p);
if (!req(r, R)) {
replaceuses(fn, p->to, r);
p->to = R;
*pp = p->link;
} else
pp = &p->link;
}
}
static int
rcmp(Ref a, Ref b)
{
if (rtype(a) != rtype(b))
return rtype(a) - rtype(b);
return a.val - b.val;
}
static void
normins(Fn *fn, Ins *i)
{
uint n;
int64_t v;
Ref r;
/* truncate constant bits to
* 32 bits for s/w uses */
for (n=0; n<2; n++) {
if (!KWIDE(argcls(i, n)))
if (isconbits(fn, i->arg[n], &v))
if ((v & 0xffffffff) != v)
i->arg[n] = getcon(v & 0xffffffff, fn);
}
/* order arg[0] <= arg[1] for
* commutative ops, preferring
* RTmp in arg[0] */
if (optab[i->op].commutes)
if (rcmp(i->arg[0], i->arg[1]) > 0) {
r = i->arg[1];
i->arg[1] = i->arg[0];
i->arg[0] = r;
}
}
static int
negcon(int cls, Con *c)
{
static Con z = {.type = CBits, .bits.i = 0};
return foldint(c, Osub, cls, &z, c);
}
static void
assoccon(Fn *fn, Blk *b, Ins *i1)
{
Tmp *t2;
Ins *i2;
int op, fail;
Con c, c1, c2;
op = i1->op;
if (op == Osub)
op = Oadd;
if (!optab[op].assoc
|| KBASE(i1->cls) != 0
|| rtype(i1->arg[0]) != RTmp
|| rtype(i1->arg[1]) != RCon)
return;
c1 = fn->con[i1->arg[1].val];
t2 = &fn->tmp[i1->arg[0].val];
if (t2->def == 0)
return;
i2 = t2->def;
if (op != (i2->op == Osub ? Oadd : i2->op)
|| rtype(i2->arg[1]) != RCon)
return;
c2 = fn->con[i2->arg[1].val];
assert(KBASE(i2->cls) == 0);
assert(KWIDE(i2->cls) >= KWIDE(i1->cls));
if (i1->op == Osub && negcon(i1->cls, &c1))
return;
if (i2->op == Osub && negcon(i2->cls, &c2))
return;
if (foldint(&c, op, i1->cls, &c1, &c2))
return;
if (op == Oadd && c.type == CBits)
if ((i1->cls == Kl && c.bits.i < 0)
|| (i1->cls == Kw && (int32_t)c.bits.i < 0)) {
fail = negcon(i1->cls, &c);
assert(fail == 0);
op = Osub;
}
i1->op = op;
i1->arg[0] = i2->arg[0];
i1->arg[1] = newcon(&c, fn);
adduse(&fn->tmp[i1->arg[0].val], UIns, b, i1);
}
static void
killins(Fn *fn, Ins *i, Ref r)
{
replaceuses(fn, i->to, r);
*i = (Ins){.op = Onop};
}
static void
dedupins(Fn *fn, Blk *b, Ins *i)
{
Ref r;
Ins *i1;
normins(fn, i);
if (i->op == Onop || pinned(i))
return;
/* when sel instructions are inserted
* before gvn, we may want to optimize
* them here */
assert(i->op != Osel0);
assert(!req(i->to, R));
assoccon(fn, b, i);
r = copyref(fn, b, i);
if (!req(r, R)) {
killins(fn, i, r);
return;
}
r = foldref(fn, i);
if (!req(r, R)) {
killins(fn, i, r);
return;
}
i1 = gvndup(i, 1);
if (i1) {
killins(fn, i, i1->to);
return;
}
}
int
cmpeqz(Fn *fn, Ref r, Ref *arg, int *cls, int *eqval)
{
Ins *i;
if (rtype(r) != RTmp)
return 0;
i = fn->tmp[r.val].def;
if (i)
if (optab[i->op].cmpeqwl)
if (req(i->arg[1], CON_Z)) {
*arg = i->arg[0];
*cls = argcls(i, 0);
*eqval = optab[i->op].eqval;
return 1;
}
return 0;
}
static int
branchdom(Fn *fn, Blk *bif, Blk *bbr1, Blk *bbr2, Blk *b)
{
assert(bif->jmp.type == Jjnz);
if (b != bif
&& dom(bbr1, b)
&& !reachesnotvia(fn, bbr2, b, bif))
return 1;
return 0;
}
static int
domzero(Fn *fn, Blk *d, Blk *b, int *z)
{
if (branchdom(fn, d, d->s1, d->s2, b)) {
*z = 0;
return 1;
}
if (branchdom(fn, d, d->s2, d->s1, b)) {
*z = 1;
return 1;
}
return 0;
}
/* infer 0/non-0 value from dominating jnz */
int
zeroval(Fn *fn, Blk *b, Ref r, int cls, int *z)
{
Blk *d;
Ref arg;
int cls1, eqval;
for (d=b->idom; d; d=d->idom) {
if (d->jmp.type != Jjnz)
continue;
if (req(r, d->jmp.arg)
&& cls == Kw
&& domzero(fn, d, b, z)) {
return 1;
}
if (cmpeqz(fn, d->jmp.arg, &arg, &cls1, &eqval)
&& req(r, arg)
&& cls == cls1
&& domzero(fn, d, b, z)) {
*z ^= eqval;
return 1;
}
}
return 0;
}
static int
usecls(Use *u, Ref r, int cls)
{
int k;
switch (u->type) {
case UIns:
k = Kx; /* widest use */
if (req(u->u.ins->arg[0], r))
k = argcls(u->u.ins, 0);
if (req(u->u.ins->arg[1], r))
if (k == Kx || !KWIDE(k))
k = argcls(u->u.ins, 1);
return k == Kx ? cls : k;
case UPhi:
if (req(u->u.phi->to, R))
return cls; /* eliminated */
return u->u.phi->cls;
case UJmp:
return Kw;
default:
break;
}
die("unreachable");
}
static void
propjnz0(Fn *fn, Blk *bif, Blk *s0, Blk *snon0, Ref r, int cls)
{
Blk *b;
Tmp *t;
Use *u;
if (s0->npred != 1 || rtype(r) != RTmp)
return;
t = &fn->tmp[r.val];
for (u=t->use; u<&t->use[t->nuse]; u++) {
b = fn->rpo[u->bid];
/* we may compare an l temp with a w
* comparison; so check that the use
* does not involve high bits */
if (usecls(u, r, cls) == cls)
if (branchdom(fn, bif, s0, snon0, b))
replaceuse(fn, u, r, CON_Z);
}
}
static void
dedupjmp(Fn *fn, Blk *b)
{
Blk **ps;
int64_t v;
Ref arg;
int cls, eqval, z;
if (b->jmp.type != Jjnz)
return;
/* propagate jmp arg as 0 through s2 */
propjnz0(fn, b, b->s2, b->s1, b->jmp.arg, Kw);
/* propagate cmp eq/ne 0 def of jmp arg as 0 */
if (cmpeqz(fn, b->jmp.arg, &arg, &cls, &eqval)) {
ps = (Blk*[]){b->s1, b->s2};
propjnz0(fn, b, ps[eqval^1], ps[eqval], arg, cls);
}
/* collapse trivial/constant jnz to jmp */
v = 1;
z = 0;
if (b->s1 == b->s2
|| isconbits(fn, b->jmp.arg, &v)
|| zeroval(fn, b, b->jmp.arg, Kw, &z)) {
if (v == 0 || z)
b->s1 = b->s2;
/* we later move active ins out of dead blks */
b->s2 = 0;
b->jmp.type = Jjmp;
b->jmp.arg = R;
}
}
static void
rebuildcfg(Fn *fn)
{
uint n, nblk;
Blk *b, *s, **rpo;
Ins *i;
nblk = fn->nblk;
rpo = emalloc(nblk * sizeof rpo[0]);
memcpy(rpo, fn->rpo, nblk * sizeof rpo[0]);
fillcfg(fn);
/* move instructions that were in
* killed blocks and may be active
* in the computation in the start
* block */
s = fn->start;
for (n=0; n<nblk; n++) {
b = rpo[n];
if (b->id != -1u)
continue;
/* blk unreachable after GVN */
assert(b != s);
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (!optab[i->op].pinned)
if (gvndup(i, 0) == i)
addins(&s->ins, &s->nins, i);
}
free(rpo);
}
/* requires rpo pred ssa use
* recreates rpo preds
* breaks pred use dom ssa (GCM fixes ssa)
*/
void
gvn(Fn *fn)
{
Blk *b;
Phi *p;
Ins *i;
uint n, nins;
con01[0] = getcon(0, fn);
con01[1] = getcon(1, fn);
/* copy.c uses the visit bit */
for (b=fn->start; b; b=b->link)
for (p=b->phi; p; p=p->link)
p->visit = 0;
fillloop(fn);
narrowpars(fn);
filluse(fn);
ssacheck(fn);
nins = 0;
for (b=fn->start; b; b=b->link) {
b->visit = 0;
nins += b->nins;
}
gvntbln = nins + nins/2;
gvntbl = emalloc(gvntbln * sizeof gvntbl[0]);
for (n=0; n<fn->nblk; n++) {
b = fn->rpo[n];
dedupphi(fn, b);
for (i=b->ins; i<&b->ins[b->nins]; i++)
dedupins(fn, b, i);
dedupjmp(fn, b);
}
rebuildcfg(fn);
free(gvntbl);
gvntbl = 0;
if (debug['G']) {
fprintf(stderr, "\n> After GVN:\n");
printfn(fn, stderr);
}
}

121
src/qbe/ifopt.c Normal file
View File

@@ -0,0 +1,121 @@
#include "all.h"
enum {
MaxIns = 2,
MaxPhis = 2,
};
static int
okbranch(Blk *b)
{
Ins *i;
int n;
n = 0;
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op != Odbgloc) {
if (pinned(i))
return 0;
if (i->op != Onop)
n++;
}
return n <= MaxIns;
}
static int
okjoin(Blk *b)
{
Phi *p;
int n;
n = 0;
for (p=b->phi; p; p=p->link) {
if (KBASE(p->cls) != 0)
return 0;
n++;
}
return n <= MaxPhis;
}
static int
okgraph(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb)
{
if (joinb->npred != 2 || !okjoin(joinb))
return 0;
assert(thenb != elseb);
if (thenb != ifb && !okbranch(thenb))
return 0;
if (elseb != ifb && !okbranch(elseb))
return 0;
return 1;
}
static void
convert(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb)
{
Ins *ins, sel;
Phi *p;
uint nins;
ins = vnew(0, sizeof ins[0], PHeap);
nins = 0;
addbins(&ins, &nins, ifb);
if (thenb != ifb)
addbins(&ins, &nins, thenb);
if (elseb != ifb)
addbins(&ins, &nins, elseb);
assert(joinb->npred == 2);
if (joinb->phi) {
sel = (Ins){
.op = Osel0, .cls = Kw,
.arg = {ifb->jmp.arg},
};
addins(&ins, &nins, &sel);
}
sel = (Ins){.op = Osel1};
for (p=joinb->phi; p; p=p->link) {
sel.to = p->to;
sel.cls = p->cls;
sel.arg[0] = phiarg(p, thenb);
sel.arg[1] = phiarg(p, elseb);
addins(&ins, &nins, &sel);
}
idup(ifb, ins, nins);
ifb->jmp.type = Jjmp;
ifb->jmp.arg = R;
ifb->s1 = joinb;
ifb->s2 = 0;
joinb->npred = 1;
joinb->pred[0] = ifb;
joinb->phi = 0;
vfree(ins);
}
/* eliminate if-then[-else] graphlets
* using sel instructions
* needs rpo pred use; breaks cfg use
*/
void
ifconvert(Fn *fn)
{
Blk *ifb, *thenb, *elseb, *joinb;
if (debug['K'])
fputs("\n> If-conversion:\n", stderr);
for (ifb=fn->start; ifb; ifb=ifb->link)
if (ifgraph(ifb, &thenb, &elseb, &joinb))
if (okgraph(ifb, thenb, elseb, joinb)) {
if (debug['K'])
fprintf(stderr,
" @%s -> @%s, @%s -> @%s\n",
ifb->name, thenb->name, elseb->name,
joinb->name);
convert(ifb, thenb, elseb, joinb);
}
if (debug['K']) {
fprintf(stderr, "\n> After if-conversion:\n");
printfn(fn, stderr);
}
}

144
src/qbe/live.c Normal file
View File

@@ -0,0 +1,144 @@
#include "all.h"
void
liveon(BSet *v, Blk *b, Blk *s)
{
Phi *p;
uint a;
bscopy(v, s->in);
for (p=s->phi; p; p=p->link)
if (rtype(p->to) == RTmp)
bsclr(v, p->to.val);
for (p=s->phi; p; p=p->link)
for (a=0; a<p->narg; a++)
if (p->blk[a] == b)
if (rtype(p->arg[a]) == RTmp) {
bsset(v, p->arg[a].val);
bsset(b->gen, p->arg[a].val);
}
}
static void
bset(Ref r, Blk *b, int *nlv, Tmp *tmp)
{
if (rtype(r) != RTmp)
return;
bsset(b->gen, r.val);
if (!bshas(b->in, r.val)) {
nlv[KBASE(tmp[r.val].cls)]++;
bsset(b->in, r.val);
}
}
/* liveness analysis
* requires rpo computation
*/
void
filllive(Fn *f)
{
Blk *b;
Ins *i;
int k, t, m[2], n, chg, nlv[2];
BSet u[1], v[1];
Mem *ma;
bsinit(u, f->ntmp);
bsinit(v, f->ntmp);
for (b=f->start; b; b=b->link) {
bsinit(b->in, f->ntmp);
bsinit(b->out, f->ntmp);
bsinit(b->gen, f->ntmp);
}
chg = 1;
Again:
for (n=f->nblk-1; n>=0; n--) {
b = f->rpo[n];
bscopy(u, b->out);
if (b->s1) {
liveon(v, b, b->s1);
bsunion(b->out, v);
}
if (b->s2) {
liveon(v, b, b->s2);
bsunion(b->out, v);
}
chg |= !bsequal(b->out, u);
memset(nlv, 0, sizeof nlv);
b->out->t[0] |= T.rglob;
bscopy(b->in, b->out);
for (t=0; bsiter(b->in, &t); t++)
nlv[KBASE(f->tmp[t].cls)]++;
if (rtype(b->jmp.arg) == RCall) {
assert((int)bscount(b->in) == T.nrglob &&
b->in->t[0] == T.rglob);
b->in->t[0] |= T.retregs(b->jmp.arg, nlv);
} else
bset(b->jmp.arg, b, nlv, f->tmp);
for (k=0; k<2; k++)
b->nlive[k] = nlv[k];
for (i=&b->ins[b->nins]; i!=b->ins;) {
if ((--i)->op == Ocall && rtype(i->arg[1]) == RCall) {
b->in->t[0] &= ~T.retregs(i->arg[1], m);
for (k=0; k<2; k++) {
nlv[k] -= m[k];
/* caller-save registers are used
* by the callee, in that sense,
* right in the middle of the call,
* they are live: */
nlv[k] += T.nrsave[k];
if (nlv[k] > b->nlive[k])
b->nlive[k] = nlv[k];
}
b->in->t[0] |= T.argregs(i->arg[1], m);
for (k=0; k<2; k++) {
nlv[k] -= T.nrsave[k];
nlv[k] += m[k];
}
}
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
t = i->to.val;
if (bshas(b->in, t))
nlv[KBASE(f->tmp[t].cls)]--;
bsset(b->gen, t);
bsclr(b->in, t);
}
for (k=0; k<2; k++)
switch (rtype(i->arg[k])) {
case RMem:
ma = &f->mem[i->arg[k].val];
bset(ma->base, b, nlv, f->tmp);
bset(ma->index, b, nlv, f->tmp);
break;
default:
bset(i->arg[k], b, nlv, f->tmp);
break;
}
for (k=0; k<2; k++)
if (nlv[k] > b->nlive[k])
b->nlive[k] = nlv[k];
}
}
if (chg) {
chg = 0;
goto Again;
}
if (debug['L']) {
fprintf(stderr, "\n> Liveness analysis:\n");
for (b=f->start; b; b=b->link) {
fprintf(stderr, "\t%-10sin: ", b->name);
dumpts(b->in, f->tmp, stderr);
fprintf(stderr, "\t out: ");
dumpts(b->out, f->tmp, stderr);
fprintf(stderr, "\t gen: ");
dumpts(b->gen, f->tmp, stderr);
fprintf(stderr, "\t live: ");
fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]);
}
}
}

493
src/qbe/load.c Normal file
View File

@@ -0,0 +1,493 @@
#include "all.h"
#define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
typedef struct Loc Loc;
typedef struct Slice Slice;
typedef struct Insert Insert;
struct Loc {
enum {
LRoot, /* right above the original load */
LLoad, /* inserting a load is allowed */
LNoLoad, /* only scalar operations allowed */
} type;
uint off;
Blk *blk;
};
struct Slice {
Ref ref;
int off;
short sz;
short cls; /* load class */
};
struct Insert {
uint isphi:1;
uint num:31;
uint bid;
uint off;
union {
Ins ins;
struct {
Slice m;
Phi *p;
} phi;
} new;
};
static Fn *curf;
static uint inum; /* current insertion number */
static Insert *ilog; /* global insertion log */
static uint nlog; /* number of entries in the log */
int
loadsz(Ins *l)
{
switch (l->op) {
case Oloadsb: case Oloadub: return 1;
case Oloadsh: case Oloaduh: return 2;
case Oloadsw: case Oloaduw: return 4;
case Oload: return KWIDE(l->cls) ? 8 : 4;
}
die("unreachable");
}
int
storesz(Ins *s)
{
switch (s->op) {
case Ostoreb: return 1;
case Ostoreh: return 2;
case Ostorew: case Ostores: return 4;
case Ostorel: case Ostored: return 8;
}
die("unreachable");
}
static Ref
iins(int cls, int op, Ref a0, Ref a1, Loc *l)
{
Insert *ist;
vgrow(&ilog, ++nlog);
ist = &ilog[nlog-1];
ist->isphi = 0;
ist->num = inum++;
ist->bid = l->blk->id;
ist->off = l->off;
ist->new.ins = (Ins){op, cls, R, {a0, a1}};
return ist->new.ins.to = newtmp("ld", cls, curf);
}
static void
cast(Ref *r, int cls, Loc *l)
{
int cls0;
if (rtype(*r) == RCon)
return;
assert(rtype(*r) == RTmp);
cls0 = curf->tmp[r->val].cls;
if (cls0 == cls || (cls == Kw && cls0 == Kl))
return;
if (KWIDE(cls0) < KWIDE(cls)) {
if (cls0 == Ks)
*r = iins(Kw, Ocast, *r, R, l);
*r = iins(Kl, Oextuw, *r, R, l);
if (cls == Kd)
*r = iins(Kd, Ocast, *r, R, l);
} else {
if (cls0 == Kd && cls != Kl)
*r = iins(Kl, Ocast, *r, R, l);
if (cls0 != Kd || cls != Kw)
*r = iins(cls, Ocast, *r, R, l);
}
}
static inline void
mask(int cls, Ref *r, bits msk, Loc *l)
{
cast(r, cls, l);
*r = iins(cls, Oand, *r, getcon(msk, curf), l);
}
static Ref
load(Slice sl, bits msk, Loc *l)
{
Alias *a;
Ref r, r1;
int ld, cls, all;
Con c;
ld = (int[]){
[1] = Oloadub,
[2] = Oloaduh,
[4] = Oloaduw,
[8] = Oload
}[sl.sz];
all = msk == MASK(sl.sz);
if (all)
cls = sl.cls;
else
cls = sl.sz > 4 ? Kl : Kw;
r = sl.ref;
/* sl.ref might not be live here,
* but its alias base ref will be
* (see killsl() below) */
if (rtype(r) == RTmp) {
a = &curf->tmp[r.val].alias;
switch (a->type) {
default:
die("unreachable");
case ALoc:
case AEsc:
case AUnk:
r = TMP(a->base);
if (!a->offset)
break;
r1 = getcon(a->offset, curf);
r = iins(Kl, Oadd, r, r1, l);
break;
case ACon:
case ASym:
memset(&c, 0, sizeof c);
c.type = CAddr;
c.sym = a->u.sym;
c.bits.i = a->offset;
r = newcon(&c, curf);
break;
}
}
r = iins(cls, ld, r, R, l);
if (!all)
mask(cls, &r, msk, l);
return r;
}
static int
killsl(Ref r, Slice sl)
{
Alias *a;
if (rtype(sl.ref) != RTmp)
return 0;
a = &curf->tmp[sl.ref.val].alias;
switch (a->type) {
default: die("unreachable");
case ALoc:
case AEsc:
case AUnk: return req(TMP(a->base), r);
case ACon:
case ASym: return 0;
}
}
/* returns a ref containing the contents of the slice
* passed as argument, all the bits set to 0 in the
* mask argument are zeroed in the result;
* the returned ref has an integer class when the
* mask does not cover all the bits of the slice,
* otherwise, it has class sl.cls
* the procedure returns R when it fails */
static Ref
def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
{
Slice sl1;
Blk *bp;
bits msk1, msks;
int off, cls, cls1, op, sz, ld;
uint np, oldl, oldt;
Ref r, r1;
Phi *p;
Insert *ist;
Loc l;
/* invariants:
* -1- b dominates il->blk; so we can use
* temporaries of b in il->blk
* -2- if il->type != LNoLoad, then il->blk
* postdominates the original load; so it
* is safe to load in il->blk
* -3- if il->type != LNoLoad, then b
* postdominates il->blk (and by 2, the
* original load)
*/
assert(dom(b, il->blk));
oldl = nlog;
oldt = curf->ntmp;
if (0) {
Load:
curf->ntmp = oldt;
nlog = oldl;
if (il->type != LLoad)
return R;
return load(sl, msk, il);
}
if (!i)
i = &b->ins[b->nins];
cls = sl.sz > 4 ? Kl : Kw;
msks = MASK(sl.sz);
while (i > b->ins) {
--i;
if (killsl(i->to, sl)
|| (i->op == Ocall && escapes(sl.ref, curf)))
goto Load;
ld = isload(i->op);
if (ld) {
sz = loadsz(i);
r1 = i->arg[0];
r = i->to;
} else if (isstore(i->op)) {
sz = storesz(i);
r1 = i->arg[1];
r = i->arg[0];
} else if (i->op == Oblit1) {
assert(rtype(i->arg[0]) == RInt);
sz = abs(rsval(i->arg[0]));
assert(i > b->ins);
--i;
assert(i->op == Oblit0);
r1 = i->arg[1];
} else
continue;
switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) {
case MustAlias:
if (i->op == Oblit0) {
sl1 = sl;
sl1.ref = i->arg[0];
if (off >= 0) {
assert(off < sz);
sl1.off = off;
sz -= off;
off = 0;
} else {
sl1.off = 0;
sl1.sz += off;
}
if (sz > sl1.sz)
sz = sl1.sz;
assert(sz <= 8);
sl1.sz = sz;
}
if (off < 0) {
off = -off;
msk1 = (MASK(sz) << 8*off) & msks;
op = Oshl;
} else {
msk1 = (MASK(sz) >> 8*off) & msks;
op = Oshr;
}
if ((msk1 & msk) == 0)
continue;
if (i->op == Oblit0) {
r = def(sl1, MASK(sz), b, i, il);
if (req(r, R))
goto Load;
}
if (off) {
cls1 = cls;
if (op == Oshr && off + sl.sz > 4)
cls1 = Kl;
cast(&r, cls1, il);
r1 = getcon(8*off, curf);
r = iins(cls1, op, r, r1, il);
}
if ((msk1 & msk) != msk1 || off + sz < sl.sz)
mask(cls, &r, msk1 & msk, il);
if ((msk & ~msk1) != 0) {
r1 = def(sl, msk & ~msk1, b, i, il);
if (req(r1, R))
goto Load;
r = iins(cls, Oor, r, r1, il);
}
if (msk == msks)
cast(&r, sl.cls, il);
return r;
case MayAlias:
if (ld)
continue;
else
goto Load;
case NoAlias:
continue;
default:
die("unreachable");
}
}
for (ist=ilog; ist<&ilog[nlog]; ++ist)
if (ist->isphi && ist->bid == b->id)
if (req(ist->new.phi.m.ref, sl.ref))
if (ist->new.phi.m.off == sl.off)
if (ist->new.phi.m.sz == sl.sz) {
r = ist->new.phi.p->to;
if (msk != msks)
mask(cls, &r, msk, il);
else
cast(&r, sl.cls, il);
return r;
}
for (p=b->phi; p; p=p->link)
if (killsl(p->to, sl))
/* scanning predecessors in that
* case would be unsafe */
goto Load;
if (b->npred == 0)
goto Load;
if (b->npred == 1) {
bp = b->pred[0];
assert(bp->loop >= il->blk->loop);
l = *il;
if (bp->s2)
l.type = LNoLoad;
r1 = def(sl, msk, bp, 0, &l);
if (req(r1, R))
goto Load;
return r1;
}
r = newtmp("ld", sl.cls, curf);
p = alloc(sizeof *p);
vgrow(&ilog, ++nlog);
ist = &ilog[nlog-1];
ist->isphi = 1;
ist->bid = b->id;
ist->new.phi.m = sl;
ist->new.phi.p = p;
p->to = r;
p->cls = sl.cls;
p->narg = b->npred;
p->arg = vnew(p->narg, sizeof p->arg[0], PFn);
p->blk = vnew(p->narg, sizeof p->blk[0], PFn);
for (np=0; np<b->npred; ++np) {
bp = b->pred[np];
if (!bp->s2
&& il->type != LNoLoad
&& bp->loop < il->blk->loop)
l.type = LLoad;
else
l.type = LNoLoad;
l.blk = bp;
l.off = bp->nins;
r1 = def(sl, msks, bp, 0, &l);
if (req(r1, R))
goto Load;
p->arg[np] = r1;
p->blk[np] = bp;
/* XXX - multiplicity in predecessors!!! */
}
if (msk != msks)
mask(cls, &r, msk, il);
return r;
}
static int
icmp(const void *pa, const void *pb)
{
Insert *a, *b;
int c;
a = (Insert *)pa;
b = (Insert *)pb;
if ((c = a->bid - b->bid))
return c;
if (a->isphi && b->isphi)
return 0;
if (a->isphi)
return -1;
if (b->isphi)
return +1;
if ((c = a->off - b->off))
return c;
return a->num - b->num;
}
/* require rpo ssa alias */
void
loadopt(Fn *fn)
{
Ins *i, *ib;
Blk *b;
int sz;
uint n, ni, ext, nt;
Insert *ist;
Slice sl;
Loc l;
curf = fn;
ilog = vnew(0, sizeof ilog[0], PHeap);
nlog = 0;
inum = 0;
for (b=fn->start; b; b=b->link)
for (i=b->ins; i<&b->ins[b->nins]; ++i) {
if (!isload(i->op))
continue;
sz = loadsz(i);
sl = (Slice){i->arg[0], 0, sz, i->cls};
l = (Loc){LRoot, i-b->ins, b};
i->arg[1] = def(sl, MASK(sz), b, i, &l);
}
qsort(ilog, nlog, sizeof ilog[0], icmp);
vgrow(&ilog, nlog+1);
ilog[nlog].bid = fn->nblk; /* add a sentinel */
ib = vnew(0, sizeof(Ins), PHeap);
for (ist=ilog, n=0; n<fn->nblk; ++n) {
b = fn->rpo[n];
for (; ist->bid == n && ist->isphi; ++ist) {
ist->new.phi.p->link = b->phi;
b->phi = ist->new.phi.p;
}
ni = 0;
nt = 0;
for (;;) {
if (ist->bid == n && ist->off == ni)
i = &ist++->new.ins;
else {
if (ni == b->nins)
break;
i = &b->ins[ni++];
if (isload(i->op)
&& !req(i->arg[1], R)) {
ext = Oextsb + i->op - Oloadsb;
switch (i->op) {
default:
die("unreachable");
case Oloadsb:
case Oloadub:
case Oloadsh:
case Oloaduh:
i->op = ext;
break;
case Oloadsw:
case Oloaduw:
if (i->cls == Kl) {
i->op = ext;
break;
}
/* fall through */
case Oload:
i->op = Ocopy;
break;
}
i->arg[0] = i->arg[1];
i->arg[1] = R;
}
}
vgrow(&ib, ++nt);
ib[nt-1] = *i;
}
idup(b, ib, nt);
}
vfree(ib);
vfree(ilog);
if (debug['M']) {
fprintf(stderr, "\n> After load elimination:\n");
printfn(fn, stderr);
}
}

212
src/qbe/main.c Normal file
View File

@@ -0,0 +1,212 @@
#include "all.h"
#include "config.h"
#include <ctype.h>
#include <getopt.h>
Target T;
char debug['Z'+1] = {
['P'] = 0, /* parsing */
['M'] = 0, /* memory optimization */
['N'] = 0, /* ssa construction */
['C'] = 0, /* copy elimination */
['F'] = 0, /* constant folding */
['K'] = 0, /* if-conversion */
['A'] = 0, /* abi lowering */
['I'] = 0, /* instruction selection */
['L'] = 0, /* liveness */
['S'] = 0, /* spilling */
['R'] = 0, /* reg. allocation */
};
extern Target T_amd64_sysv;
extern Target T_amd64_apple;
extern Target T_amd64_win;
extern Target T_arm64;
extern Target T_arm64_apple;
extern Target T_rv64;
static Target *tlist[] = {
&T_amd64_sysv,
&T_amd64_apple,
&T_amd64_win,
&T_arm64,
&T_arm64_apple,
&T_rv64,
0
};
static FILE *outf;
static int dbg;
static void
data(Dat *d)
{
if (dbg)
return;
emitdat(d, outf);
if (d->type == DEnd) {
fputs("/* end data */\n\n", outf);
freeall();
}
}
static void
func(Fn *fn)
{
uint n;
if (dbg)
fprintf(stderr, "**** Function %s ****", fn->name);
if (debug['P']) {
fprintf(stderr, "\n> After parsing:\n");
printfn(fn, stderr);
}
T.abi0(fn);
fillcfg(fn);
filluse(fn);
promote(fn);
filluse(fn);
ssa(fn);
filluse(fn);
ssacheck(fn);
fillalias(fn);
loadopt(fn);
filluse(fn);
fillalias(fn);
coalesce(fn);
filluse(fn);
filldom(fn);
ssacheck(fn);
gvn(fn);
fillcfg(fn);
simplcfg(fn);
filluse(fn);
filldom(fn);
gcm(fn);
filluse(fn);
ssacheck(fn);
if (T.cansel) {
ifconvert(fn);
fillcfg(fn);
filluse(fn);
filldom(fn);
ssacheck(fn);
}
T.abi1(fn);
simpl(fn);
fillcfg(fn);
filluse(fn);
T.isel(fn);
fillcfg(fn);
filllive(fn);
fillloop(fn);
fillcost(fn);
spill(fn);
rega(fn);
fillcfg(fn);
simpljmp(fn);
fillcfg(fn);
assert(fn->rpo[0] == fn->start);
for (n=0;; n++)
if (n == fn->nblk-1) {
fn->rpo[n]->link = 0;
break;
} else
fn->rpo[n]->link = fn->rpo[n+1];
if (!dbg) {
T.emitfn(fn, outf);
fprintf(outf, "/* end function %s */\n\n", fn->name);
} else
fprintf(stderr, "\n");
freeall();
}
static void
dbgfile(char *fn)
{
emitdbgfile(fn, outf);
}
int
main(int ac, char *av[])
{
Target **t;
FILE *inf, *hf;
char *f, *sep;
int c;
T = Deftgt;
outf = stdout;
while ((c = getopt(ac, av, "hd:o:t:")) != -1)
switch (c) {
case 'd':
for (; *optarg; optarg++)
if (isalpha(*optarg)) {
debug[toupper(*optarg)] = 1;
dbg = 1;
}
break;
case 'o':
if (strcmp(optarg, "-") != 0) {
outf = fopen(optarg, "w");
if (!outf) {
fprintf(stderr, "cannot open '%s'\n", optarg);
exit(1);
}
}
break;
case 't':
if (strcmp(optarg, "?") == 0) {
puts(T.name);
exit(0);
}
for (t=tlist;; t++) {
if (!*t) {
fprintf(stderr, "unknown target '%s'\n", optarg);
exit(1);
}
if (strcmp(optarg, (*t)->name) == 0) {
T = **t;
break;
}
}
break;
case 'h':
default:
hf = c != 'h' ? stderr : stdout;
fprintf(hf, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
fprintf(hf, "\t%-11s prints this help\n", "-h");
fprintf(hf, "\t%-11s output to file\n", "-o file");
fprintf(hf, "\t%-11s generate for a target among:\n", "-t <target>");
fprintf(hf, "\t%-11s ", "");
for (t=tlist, sep=""; *t; t++, sep=", ") {
fprintf(hf, "%s%s", sep, (*t)->name);
if (*t == &Deftgt)
fputs(" (default)", hf);
}
fprintf(hf, "\n");
fprintf(hf, "\t%-11s dump debug information\n", "-d <flags>");
exit(c != 'h');
}
do {
f = av[optind];
if (!f || strcmp(f, "-") == 0) {
inf = stdin;
f = "-";
} else {
inf = fopen(f, "r");
if (!inf) {
fprintf(stderr, "cannot open '%s'\n", f);
exit(1);
}
}
parse(inf, f, dbgfile, data, func);
fclose(inf);
} while (++optind < ac);
if (!dbg)
T.emitfin(outf);
exit(0);
}

488
src/qbe/mem.c Normal file
View File

@@ -0,0 +1,488 @@
#include "all.h"
typedef struct Range Range;
typedef struct Store Store;
typedef struct Slot Slot;
/* require use, maintains use counts */
void
promote(Fn *fn)
{
Blk *b;
Ins *i, *l;
Tmp *t;
Use *u, *ue;
int s, k;
/* promote uniform stack slots to temporaries */
b = fn->start;
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (Oalloc > i->op || i->op > Oalloc1)
continue;
/* specific to NAlign == 3 */
assert(rtype(i->to) == RTmp);
t = &fn->tmp[i->to.val];
if (t->ndef != 1)
goto Skip;
k = -1;
s = -1;
for (u=t->use; u<&t->use[t->nuse]; u++) {
if (u->type != UIns)
goto Skip;
l = u->u.ins;
if (isload(l->op))
if (s == -1 || s == loadsz(l)) {
s = loadsz(l);
continue;
}
if (isstore(l->op))
if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0]))
if (s == -1 || s == storesz(l))
if (k == -1 || k == optab[l->op].argcls[0][0]) {
s = storesz(l);
k = optab[l->op].argcls[0][0];
continue;
}
goto Skip;
}
/* get rid of the alloc and replace uses */
*i = (Ins){.op = Onop};
t->ndef--;
ue = &t->use[t->nuse];
for (u=t->use; u!=ue; u++) {
l = u->u.ins;
if (isstore(l->op)) {
l->cls = k;
l->op = Ocopy;
l->to = l->arg[1];
l->arg[1] = R;
t->nuse--;
t->ndef++;
} else {
if (k == -1)
err("slot %%%s is read but never stored to",
fn->tmp[l->arg[0].val].name);
/* try to turn loads into copies so we
* can eliminate them later */
switch(l->op) {
case Oloadsw:
case Oloaduw:
if (k == Kl)
goto Extend;
/* fall through */
case Oload:
if (KBASE(k) != KBASE(l->cls))
l->op = Ocast;
else
l->op = Ocopy;
break;
default:
Extend:
l->op = Oextsb + (l->op - Oloadsb);
break;
}
}
}
Skip:;
}
if (debug['M']) {
fprintf(stderr, "\n> After slot promotion:\n");
printfn(fn, stderr);
}
}
/* [a, b) with 0 <= a */
struct Range {
int a, b;
};
struct Store {
int ip;
Ins *i;
};
struct Slot {
int t;
int sz;
bits m;
bits l;
Range r;
Slot *s;
Store *st;
int nst;
};
static inline int
rin(Range r, int n)
{
return r.a <= n && n < r.b;
}
static inline int
rovlap(Range r0, Range r1)
{
return r0.b && r1.b && r0.a < r1.b && r1.a < r0.b;
}
static void
radd(Range *r, int n)
{
if (!r->b)
*r = (Range){n, n+1};
else if (n < r->a)
r->a = n;
else if (n >= r->b)
r->b = n+1;
}
static int
slot(Slot **ps, int64_t *off, Ref r, Fn *fn, Slot *sl)
{
Alias a;
Tmp *t;
getalias(&a, r, fn);
if (a.type != ALoc)
return 0;
t = &fn->tmp[a.base];
if (t->visit < 0)
return 0;
*off = a.offset;
*ps = &sl[t->visit];
return 1;
}
static void
load(Ref r, bits x, int ip, Fn *fn, Slot *sl)
{
int64_t off;
Slot *s;
if (slot(&s, &off, r, fn, sl)) {
s->l |= x << off;
s->l &= s->m;
if (s->l)
radd(&s->r, ip);
}
}
static void
store(Ref r, bits x, int ip, Ins *i, Fn *fn, Slot *sl)
{
int64_t off;
Slot *s;
if (slot(&s, &off, r, fn, sl)) {
if (s->l) {
radd(&s->r, ip);
s->l &= ~(x << off);
} else {
vgrow(&s->st, ++s->nst);
s->st[s->nst-1].ip = ip;
s->st[s->nst-1].i = i;
}
}
}
static int
scmp(const void *pa, const void *pb)
{
Slot *a, *b;
a = (Slot *)pa, b = (Slot *)pb;
if (a->sz != b->sz)
return b->sz - a->sz;
return a->r.a - b->r.a;
}
static void
maxrpo(Blk *hd, Blk *b)
{
if (hd->loop < (int)b->id)
hd->loop = b->id;
}
void
coalesce(Fn *fn)
{
Range r, *br;
Slot *s, *s0, *sl;
Blk *b, **ps, *succ[3];
Ins *i, **bl;
Use *u;
Tmp *t, *ts;
Ref *arg;
bits x;
int64_t off0, off1;
int n, m, ip, sz, nsl, nbl, *stk;
uint total, freed, fused;
/* minimize the stack usage
* by coalescing slots
*/
nsl = 0;
sl = vnew(0, sizeof sl[0], PHeap);
for (n=Tmp0; n<fn->ntmp; n++) {
t = &fn->tmp[n];
t->visit = -1;
if (t->alias.type == ALoc)
if (t->alias.slot == &t->alias)
if (t->bid == fn->start->id)
if (t->alias.u.loc.sz != -1) {
t->visit = nsl;
vgrow(&sl, ++nsl);
s = &sl[nsl-1];
s->t = n;
s->sz = t->alias.u.loc.sz;
s->m = t->alias.u.loc.m;
s->s = 0;
s->st = vnew(0, sizeof s->st[0], PHeap);
s->nst = 0;
}
}
/* one-pass liveness analysis */
for (b=fn->start; b; b=b->link)
b->loop = -1;
loopiter(fn, maxrpo);
nbl = 0;
bl = vnew(0, sizeof bl[0], PHeap);
br = emalloc(fn->nblk * sizeof br[0]);
ip = INT_MAX - 1;
for (n=fn->nblk-1; n>=0; n--) {
b = fn->rpo[n];
succ[0] = b->s1;
succ[1] = b->s2;
succ[2] = 0;
br[n].b = ip--;
for (s=sl; s<&sl[nsl]; s++) {
s->l = 0;
for (ps=succ; *ps; ps++) {
m = (*ps)->id;
if (m > n && rin(s->r, br[m].a)) {
s->l = s->m;
radd(&s->r, ip);
}
}
}
if (b->jmp.type == Jretc)
load(b->jmp.arg, -1, --ip, fn, sl);
for (i=&b->ins[b->nins]; i!=b->ins;) {
--i;
arg = i->arg;
if (i->op == Oargc) {
load(arg[1], -1, --ip, fn, sl);
}
if (isload(i->op)) {
x = BIT(loadsz(i)) - 1;
load(arg[0], x, --ip, fn, sl);
}
if (isstore(i->op)) {
x = BIT(storesz(i)) - 1;
store(arg[1], x, ip--, i, fn, sl);
}
if (i->op == Oblit0) {
assert((i+1)->op == Oblit1);
assert(rtype((i+1)->arg[0]) == RInt);
sz = abs(rsval((i+1)->arg[0]));
x = sz >= NBit ? (bits)-1 : BIT(sz) - 1;
store(arg[1], x, ip--, i, fn, sl);
load(arg[0], x, ip, fn, sl);
vgrow(&bl, ++nbl);
bl[nbl-1] = i;
}
}
for (s=sl; s<&sl[nsl]; s++)
if (s->l) {
radd(&s->r, ip);
if (b->loop != -1) {
assert(b->loop >= n);
radd(&s->r, br[b->loop].b - 1);
}
}
br[n].a = ip;
}
free(br);
/* kill dead stores */
for (s=sl; s<&sl[nsl]; s++)
for (n=0; n<s->nst; n++)
if (!rin(s->r, s->st[n].ip)) {
i = s->st[n].i;
if (i->op == Oblit0)
*(i+1) = (Ins){.op = Onop};
*i = (Ins){.op = Onop};
}
/* kill slots with an empty live range */
total = 0;
freed = 0;
stk = vnew(0, sizeof stk[0], PHeap);
n = 0;
for (s=s0=sl; s<&sl[nsl]; s++) {
total += s->sz;
if (!s->r.b) {
vfree(s->st);
vgrow(&stk, ++n);
stk[n-1] = s->t;
freed += s->sz;
} else
*s0++ = *s;
}
nsl = s0-sl;
if (debug['M']) {
fputs("\n> Slot coalescing:\n", stderr);
if (n) {
fputs("\tkill [", stderr);
for (m=0; m<n; m++)
fprintf(stderr, " %%%s",
fn->tmp[stk[m]].name);
fputs(" ]\n", stderr);
}
}
while (n--) {
t = &fn->tmp[stk[n]];
assert(t->ndef == 1 && t->def);
i = t->def;
if (isload(i->op)) {
i->op = Ocopy;
i->arg[0] = UNDEF;
continue;
}
*i = (Ins){.op = Onop};
for (u=t->use; u<&t->use[t->nuse]; u++) {
if (u->type == UJmp) {
b = fn->rpo[u->bid];
assert(isret(b->jmp.type));
b->jmp.type = Jret0;
b->jmp.arg = R;
continue;
}
assert(u->type == UIns);
i = u->u.ins;
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
vgrow(&stk, ++n);
stk[n-1] = i->to.val;
} else if (isarg(i->op)) {
assert(i->op == Oargc);
i->arg[1] = CON_Z; /* crash */
} else {
if (i->op == Oblit0)
*(i+1) = (Ins){.op = Onop};
*i = (Ins){.op = Onop};
}
}
}
vfree(stk);
/* fuse slots by decreasing size */
qsort(sl, nsl, sizeof *sl, scmp);
fused = 0;
for (n=0; n<nsl; n++) {
s0 = &sl[n];
if (s0->s)
continue;
s0->s = s0;
r = s0->r;
for (s=s0+1; s<&sl[nsl]; s++) {
if (s->s || !s->r.b)
goto Skip;
if (rovlap(r, s->r))
/* O(n); can be approximated
* by 'goto Skip;' if need be
*/
for (m=n; &sl[m]<s; m++)
if (sl[m].s == s0)
if (rovlap(sl[m].r, s->r))
goto Skip;
radd(&r, s->r.a);
radd(&r, s->r.b - 1);
s->s = s0;
fused += s->sz;
Skip:;
}
}
/* substitute fused slots */
for (s=sl; s<&sl[nsl]; s++) {
t = &fn->tmp[s->t];
/* the visit link is stale,
* reset it before the slot()
* calls below
*/
t->visit = s-sl;
assert(t->ndef == 1 && t->def);
if (s->s == s)
continue;
*t->def = (Ins){.op = Onop};
ts = &fn->tmp[s->s->t];
assert(t->bid == ts->bid);
if (t->def < ts->def) {
/* make sure the slot we
* selected has a def that
* dominates its new uses
*/
*t->def = *ts->def;
*ts->def = (Ins){.op = Onop};
ts->def = t->def;
}
for (u=t->use; u<&t->use[t->nuse]; u++) {
if (u->type == UJmp) {
b = fn->rpo[u->bid];
b->jmp.arg = TMP(s->s->t);
continue;
}
assert(u->type == UIns);
arg = u->u.ins->arg;
for (n=0; n<2; n++)
if (req(arg[n], TMP(s->t)))
arg[n] = TMP(s->s->t);
}
}
/* fix newly overlapping blits */
for (n=0; n<nbl; n++) {
i = bl[n];
if (i->op == Oblit0)
if (slot(&s, &off0, i->arg[0], fn, sl))
if (slot(&s0, &off1, i->arg[1], fn, sl))
if (s->s == s0->s) {
if (off0 < off1) {
sz = rsval((i+1)->arg[0]);
assert(sz >= 0);
(i+1)->arg[0] = INT(-sz);
} else if (off0 == off1) {
*i = (Ins){.op = Onop};
*(i+1) = (Ins){.op = Onop};
}
}
}
vfree(bl);
if (debug['M']) {
for (s0=sl; s0<&sl[nsl]; s0++) {
if (s0->s != s0)
continue;
fprintf(stderr, "\tfuse (% 3db) [", s0->sz);
for (s=s0; s<&sl[nsl]; s++) {
if (s->s != s0)
continue;
fprintf(stderr, " %%%s", fn->tmp[s->t].name);
if (s->r.b)
fprintf(stderr, "[%d,%d)",
s->r.a-ip, s->r.b-ip);
else
fputs("{}", stderr);
}
fputs(" ]\n", stderr);
}
fprintf(stderr, "\tsums %u/%u/%u (killed/fused/total)\n\n",
freed, fused, total);
printfn(fn, stderr);
}
for (s=sl; s<&sl[nsl]; s++)
vfree(s->st);
vfree(sl);
}

4
src/qbe/minic/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
minic
yacc
y.*
*.out

12
src/qbe/minic/Makefile Normal file
View File

@@ -0,0 +1,12 @@
BIN = minic
CFLAGS += -g -Wall
$(BIN): yacc minic.y
./yacc minic.y
$(CC) $(CFLAGS) -o $@ y.tab.c
clean:
rm -f yacc minic y.*
.PHONY: clean

44
src/qbe/minic/mcc Executable file
View File

@@ -0,0 +1,44 @@
#!/bin/sh
DIR=`cd $(dirname $0); pwd`
QBE=$DIR/../qbe
usage()
{
echo "usage: mcc [LDFLAGS] file.c" >&2
exit 1
}
for i
do
case $i in
-*)
flags="$flags $i"
;;
*)
if ! test -z $file
then
usage
fi
file=$i
;;
esac
done
if test -z $file
then
usage
fi
$DIR/minic < $file > /tmp/minic.ssa &&
$QBE < /tmp/minic.ssa > /tmp/minic.s &&
cc /tmp/minic.s $flags
if test $? -ne 0
then
echo "error processing file $file" >&2
exit 1
fi

951
src/qbe/minic/minic.y Normal file
View File

@@ -0,0 +1,951 @@
%{
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum {
NString = 32,
NGlo = 256,
NVar = 512,
NStr = 256,
};
enum { /* minic types */
NIL,
INT,
LNG,
PTR,
FUN,
};
#define IDIR(x) (((x) << 3) + PTR)
#define FUNC(x) (((x) << 3) + FUN)
#define DREF(x) ((x) >> 3)
#define KIND(x) ((x) & 7)
#define SIZE(x) \
(x == NIL ? (die("void has no size"), 0) : \
x == INT ? 4 : 8)
typedef struct Node Node;
typedef struct Symb Symb;
typedef struct Stmt Stmt;
struct Symb {
enum {
Con,
Tmp,
Var,
Glo,
} t;
union {
int n;
char v[NString];
} u;
unsigned long ctyp;
};
struct Node {
char op;
union {
int n;
char v[NString];
Symb s;
} u;
Node *l, *r;
};
struct Stmt {
enum {
If,
While,
Seq,
Expr,
Break,
Ret,
} t;
void *p1, *p2, *p3;
};
int yylex(void), yyerror(char *);
Symb expr(Node *), lval(Node *);
void branch(Node *, int, int);
FILE *of;
int line;
int lbl, tmp, nglo;
char *ini[NGlo];
struct {
char v[NString];
unsigned ctyp;
int glo;
} varh[NVar];
void
die(char *s)
{
fprintf(stderr, "error:%d: %s\n", line, s);
exit(1);
}
void *
alloc(size_t s)
{
void *p;
p = malloc(s);
if (!p)
die("out of memory");
return p;
}
unsigned
hash(char *s)
{
unsigned h;
h = 42;
while (*s)
h += 11 * h + *s++;
return h % NVar;
}
void
varclr()
{
unsigned h;
for (h=0; h<NVar; h++)
if (!varh[h].glo)
varh[h].v[0] = 0;
}
void
varadd(char *v, int glo, unsigned ctyp)
{
unsigned h0, h;
h0 = hash(v);
h = h0;
do {
if (varh[h].v[0] == 0) {
strcpy(varh[h].v, v);
varh[h].glo = glo;
varh[h].ctyp = ctyp;
return;
}
if (strcmp(varh[h].v, v) == 0)
die("double definition");
h = (h+1) % NVar;
} while(h != h0);
die("too many variables");
}
Symb *
varget(char *v)
{
static Symb s;
unsigned h0, h;
h0 = hash(v);
h = h0;
do {
if (strcmp(varh[h].v, v) == 0) {
if (!varh[h].glo) {
s.t = Var;
strcpy(s.u.v, v);
} else {
s.t = Glo;
s.u.n = varh[h].glo;
}
s.ctyp = varh[h].ctyp;
return &s;
}
h = (h+1) % NVar;
} while (h != h0 && varh[h].v[0] != 0);
return 0;
}
char
irtyp(unsigned ctyp)
{
return SIZE(ctyp) == 8 ? 'l' : 'w';
}
void
psymb(Symb s)
{
switch (s.t) {
case Tmp:
fprintf(of, "%%t%d", s.u.n);
break;
case Var:
fprintf(of, "%%%s", s.u.v);
break;
case Glo:
fprintf(of, "$glo%d", s.u.n);
break;
case Con:
fprintf(of, "%d", s.u.n);
break;
}
}
void
sext(Symb *s)
{
fprintf(of, "\t%%t%d =l extsw ", tmp);
psymb(*s);
fprintf(of, "\n");
s->t = Tmp;
s->ctyp = LNG;
s->u.n = tmp++;
}
unsigned
prom(int op, Symb *l, Symb *r)
{
Symb *t;
int sz;
if (l->ctyp == r->ctyp && KIND(l->ctyp) != PTR)
return l->ctyp;
if (l->ctyp == LNG && r->ctyp == INT) {
sext(r);
return LNG;
}
if (l->ctyp == INT && r->ctyp == LNG) {
sext(l);
return LNG;
}
if (op == '+') {
if (KIND(r->ctyp) == PTR) {
t = l;
l = r;
r = t;
}
if (KIND(r->ctyp) == PTR)
die("pointers added");
goto Scale;
}
if (op == '-') {
if (KIND(l->ctyp) != PTR)
die("pointer substracted from integer");
if (KIND(r->ctyp) != PTR)
goto Scale;
if (l->ctyp != r->ctyp)
die("non-homogeneous pointers in substraction");
return LNG;
}
Scale:
sz = SIZE(DREF(l->ctyp));
if (r->t == Con)
r->u.n *= sz;
else {
if (irtyp(r->ctyp) != 'l')
sext(r);
fprintf(of, "\t%%t%d =l mul %d, ", tmp, sz);
psymb(*r);
fprintf(of, "\n");
r->u.n = tmp++;
}
return l->ctyp;
}
void
load(Symb d, Symb s)
{
char t;
fprintf(of, "\t");
psymb(d);
t = irtyp(d.ctyp);
fprintf(of, " =%c load%c ", t, t);
psymb(s);
fprintf(of, "\n");
}
void
call(Node *n, Symb *sr)
{
Node *a;
char *f;
unsigned ft;
f = n->l->u.v;
if (varget(f)) {
ft = varget(f)->ctyp;
if (KIND(ft) != FUN)
die("invalid call");
} else
ft = FUNC(INT);
sr->ctyp = DREF(ft);
for (a=n->r; a; a=a->r)
a->u.s = expr(a->l);
fprintf(of, "\t");
psymb(*sr);
fprintf(of, " =%c call $%s(", irtyp(sr->ctyp), f);
for (a=n->r; a; a=a->r) {
fprintf(of, "%c ", irtyp(a->u.s.ctyp));
psymb(a->u.s);
fprintf(of, ", ");
}
fprintf(of, "...)\n");
}
Symb
expr(Node *n)
{
static char *otoa[] = {
['+'] = "add",
['-'] = "sub",
['*'] = "mul",
['/'] = "div",
['%'] = "rem",
['&'] = "and",
['<'] = "cslt", /* meeeeh, wrong for pointers! */
['l'] = "csle",
['e'] = "ceq",
['n'] = "cne",
};
Symb sr, s0, s1, sl;
int o, l;
char ty[2];
sr.t = Tmp;
sr.u.n = tmp++;
switch (n->op) {
case 0:
abort();
case 'o':
case 'a':
l = lbl;
lbl += 3;
branch(n, l, l+1);
fprintf(of, "@l%d\n", l);
fprintf(of, "\tjmp @l%d\n", l+2);
fprintf(of, "@l%d\n", l+1);
fprintf(of, "\tjmp @l%d\n", l+2);
fprintf(of, "@l%d\n", l+2);
fprintf(of, "\t");
sr.ctyp = INT;
psymb(sr);
fprintf(of, " =w phi @l%d 1, @l%d 0\n", l, l+1);
break;
case 'V':
s0 = lval(n);
sr.ctyp = s0.ctyp;
load(sr, s0);
break;
case 'N':
sr.t = Con;
sr.u.n = n->u.n;
sr.ctyp = INT;
break;
case 'S':
sr.t = Glo;
sr.u.n = n->u.n;
sr.ctyp = IDIR(INT);
break;
case 'C':
call(n, &sr);
break;
case '@':
s0 = expr(n->l);
if (KIND(s0.ctyp) != PTR)
die("dereference of a non-pointer");
sr.ctyp = DREF(s0.ctyp);
load(sr, s0);
break;
case 'A':
sr = lval(n->l);
sr.ctyp = IDIR(sr.ctyp);
break;
case '=':
s0 = expr(n->r);
s1 = lval(n->l);
sr = s0;
if (s1.ctyp == LNG && s0.ctyp == INT)
sext(&s0);
if (s0.ctyp != IDIR(NIL) || KIND(s1.ctyp) != PTR)
if (s1.ctyp != IDIR(NIL) || KIND(s0.ctyp) != PTR)
if (s1.ctyp != s0.ctyp)
die("invalid assignment");
fprintf(of, "\tstore%c ", irtyp(s1.ctyp));
goto Args;
case 'P':
case 'M':
o = n->op == 'P' ? '+' : '-';
sl = lval(n->l);
s0.t = Tmp;
s0.u.n = tmp++;
s0.ctyp = sl.ctyp;
load(s0, sl);
s1.t = Con;
s1.u.n = 1;
s1.ctyp = INT;
goto Binop;
default:
s0 = expr(n->l);
s1 = expr(n->r);
o = n->op;
Binop:
sr.ctyp = prom(o, &s0, &s1);
if (strchr("ne<l", n->op)) {
sprintf(ty, "%c", irtyp(sr.ctyp));
sr.ctyp = INT;
} else
strcpy(ty, "");
fprintf(of, "\t");
psymb(sr);
fprintf(of, " =%c", irtyp(sr.ctyp));
fprintf(of, " %s%s ", otoa[o], ty);
Args:
psymb(s0);
fprintf(of, ", ");
psymb(s1);
fprintf(of, "\n");
break;
}
if (n->op == '-'
&& KIND(s0.ctyp) == PTR
&& KIND(s1.ctyp) == PTR) {
fprintf(of, "\t%%t%d =l div ", tmp);
psymb(sr);
fprintf(of, ", %d\n", SIZE(DREF(s0.ctyp)));
sr.u.n = tmp++;
}
if (n->op == 'P' || n->op == 'M') {
fprintf(of, "\tstore%c ", irtyp(sl.ctyp));
psymb(sr);
fprintf(of, ", ");
psymb(sl);
fprintf(of, "\n");
sr = s0;
}
return sr;
}
Symb
lval(Node *n)
{
Symb sr;
switch (n->op) {
default:
die("invalid lvalue");
case 'V':
if (!varget(n->u.v))
die("undefined variable");
sr = *varget(n->u.v);
break;
case '@':
sr = expr(n->l);
if (KIND(sr.ctyp) != PTR)
die("dereference of a non-pointer");
sr.ctyp = DREF(sr.ctyp);
break;
}
return sr;
}
void
branch(Node *n, int lt, int lf)
{
Symb s;
int l;
switch (n->op) {
default:
s = expr(n); /* TODO: insert comparison to 0 with proper type */
fprintf(of, "\tjnz ");
psymb(s);
fprintf(of, ", @l%d, @l%d\n", lt, lf);
break;
case 'o':
l = lbl;
lbl += 1;
branch(n->l, lt, l);
fprintf(of, "@l%d\n", l);
branch(n->r, lt, lf);
break;
case 'a':
l = lbl;
lbl += 1;
branch(n->l, l, lf);
fprintf(of, "@l%d\n", l);
branch(n->r, lt, lf);
break;
}
}
int
stmt(Stmt *s, int b)
{
int l, r;
Symb x;
if (!s)
return 0;
switch (s->t) {
case Ret:
x = expr(s->p1);
fprintf(of, "\tret ");
psymb(x);
fprintf(of, "\n");
return 1;
case Break:
if (b < 0)
die("break not in loop");
fprintf(of, "\tjmp @l%d\n", b);
return 1;
case Expr:
expr(s->p1);
return 0;
case Seq:
return stmt(s->p1, b) || stmt(s->p2, b);
case If:
l = lbl;
lbl += 3;
branch(s->p1, l, l+1);
fprintf(of, "@l%d\n", l);
if (!(r=stmt(s->p2, b)))
if (s->p3)
fprintf(of, "\tjmp @l%d\n", l+2);
fprintf(of, "@l%d\n", l+1);
if (s->p3)
if (!(r &= stmt(s->p3, b)))
fprintf(of, "@l%d\n", l+2);
return s->p3 && r;
case While:
l = lbl;
lbl += 3;
fprintf(of, "@l%d\n", l);
branch(s->p1, l+1, l+2);
fprintf(of, "@l%d\n", l+1);
if (!stmt(s->p2, l+2))
fprintf(of, "\tjmp @l%d\n", l);
fprintf(of, "@l%d\n", l+2);
return 0;
}
}
Node *
mknode(char op, Node *l, Node *r)
{
Node *n;
n = alloc(sizeof *n);
n->op = op;
n->l = l;
n->r = r;
return n;
}
Node *
mkidx(Node *a, Node *i)
{
Node *n;
n = mknode('+', a, i);
n = mknode('@', n, 0);
return n;
}
Node *
mkneg(Node *n)
{
static Node *z;
if (!z) {
z = mknode('N', 0, 0);
z->u.n = 0;
}
return mknode('-', z, n);
}
Stmt *
mkstmt(int t, void *p1, void *p2, void *p3)
{
Stmt *s;
s = alloc(sizeof *s);
s->t = t;
s->p1 = p1;
s->p2 = p2;
s->p3 = p3;
return s;
}
Node *
param(char *v, unsigned ctyp, Node *pl)
{
Node *n;
if (ctyp == NIL)
die("invalid void declaration");
n = mknode(0, 0, pl);
varadd(v, 0, ctyp);
strcpy(n->u.v, v);
return n;
}
Stmt *
mkfor(Node *ini, Node *tst, Node *inc, Stmt *s)
{
Stmt *s1, *s2;
if (ini)
s1 = mkstmt(Expr, ini, 0, 0);
else
s1 = 0;
if (inc) {
s2 = mkstmt(Expr, inc, 0, 0);
s2 = mkstmt(Seq, s, s2, 0);
} else
s2 = s;
if (!tst) {
tst = mknode('N', 0, 0);
tst->u.n = 1;
}
s2 = mkstmt(While, tst, s2, 0);
if (s1)
return mkstmt(Seq, s1, s2, 0);
else
return s2;
}
%}
%union {
Node *n;
Stmt *s;
unsigned u;
}
%token <n> NUM
%token <n> STR
%token <n> IDENT
%token PP MM LE GE SIZEOF
%token TVOID TINT TLNG
%token IF ELSE WHILE FOR BREAK RETURN
%right '='
%left OR
%left AND
%left '&'
%left EQ NE
%left '<' '>' LE GE
%left '+' '-'
%left '*' '/' '%'
%type <u> type
%type <s> stmt stmts
%type <n> expr exp0 pref post arg0 arg1 par0 par1
%%
prog: func prog | fdcl prog | idcl prog | ;
fdcl: type IDENT '(' ')' ';'
{
varadd($2->u.v, 1, FUNC($1));
};
idcl: type IDENT ';'
{
if ($1 == NIL)
die("invalid void declaration");
if (nglo == NGlo)
die("too many string literals");
ini[nglo] = alloc(sizeof "{ x 0 }");
sprintf(ini[nglo], "{ %c 0 }", irtyp($1));
varadd($2->u.v, nglo++, $1);
};
init:
{
varclr();
tmp = 0;
};
func: init prot '{' dcls stmts '}'
{
if (!stmt($5, -1))
fprintf(of, "\tret 0\n");
fprintf(of, "}\n\n");
};
prot: IDENT '(' par0 ')'
{
Symb *s;
Node *n;
int t, m;
varadd($1->u.v, 1, FUNC(INT));
fprintf(of, "export function w $%s(", $1->u.v);
n = $3;
if (n)
for (;;) {
s = varget(n->u.v);
fprintf(of, "%c ", irtyp(s->ctyp));
fprintf(of, "%%t%d", tmp++);
n = n->r;
if (n)
fprintf(of, ", ");
else
break;
}
fprintf(of, ") {\n");
fprintf(of, "@l%d\n", lbl++);
for (t=0, n=$3; n; t++, n=n->r) {
s = varget(n->u.v);
m = SIZE(s->ctyp);
fprintf(of, "\t%%%s =l alloc%d %d\n", n->u.v, m, m);
fprintf(of, "\tstore%c %%t%d", irtyp(s->ctyp), t);
fprintf(of, ", %%%s\n", n->u.v);
}
};
par0: par1
| { $$ = 0; }
;
par1: type IDENT ',' par1 { $$ = param($2->u.v, $1, $4); }
| type IDENT { $$ = param($2->u.v, $1, 0); }
;
dcls: | dcls type IDENT ';'
{
int s;
char *v;
if ($2 == NIL)
die("invalid void declaration");
v = $3->u.v;
s = SIZE($2);
varadd(v, 0, $2);
fprintf(of, "\t%%%s =l alloc%d %d\n", v, s, s);
};
type: type '*' { $$ = IDIR($1); }
| TINT { $$ = INT; }
| TLNG { $$ = LNG; }
| TVOID { $$ = NIL; }
;
stmt: ';' { $$ = 0; }
| '{' stmts '}' { $$ = $2; }
| BREAK ';' { $$ = mkstmt(Break, 0, 0, 0); }
| RETURN expr ';' { $$ = mkstmt(Ret, $2, 0, 0); }
| expr ';' { $$ = mkstmt(Expr, $1, 0, 0); }
| WHILE '(' expr ')' stmt { $$ = mkstmt(While, $3, $5, 0); }
| IF '(' expr ')' stmt ELSE stmt { $$ = mkstmt(If, $3, $5, $7); }
| IF '(' expr ')' stmt { $$ = mkstmt(If, $3, $5, 0); }
| FOR '(' exp0 ';' exp0 ';' exp0 ')' stmt
{ $$ = mkfor($3, $5, $7, $9); }
;
stmts: stmts stmt { $$ = mkstmt(Seq, $1, $2, 0); }
| { $$ = 0; }
;
expr: pref
| expr '=' expr { $$ = mknode('=', $1, $3); }
| expr '+' expr { $$ = mknode('+', $1, $3); }
| expr '-' expr { $$ = mknode('-', $1, $3); }
| expr '*' expr { $$ = mknode('*', $1, $3); }
| expr '/' expr { $$ = mknode('/', $1, $3); }
| expr '%' expr { $$ = mknode('%', $1, $3); }
| expr '<' expr { $$ = mknode('<', $1, $3); }
| expr '>' expr { $$ = mknode('<', $3, $1); }
| expr LE expr { $$ = mknode('l', $1, $3); }
| expr GE expr { $$ = mknode('l', $3, $1); }
| expr EQ expr { $$ = mknode('e', $1, $3); }
| expr NE expr { $$ = mknode('n', $1, $3); }
| expr '&' expr { $$ = mknode('&', $1, $3); }
| expr AND expr { $$ = mknode('a', $1, $3); }
| expr OR expr { $$ = mknode('o', $1, $3); }
;
exp0: expr
| { $$ = 0; }
;
pref: post
| '-' pref { $$ = mkneg($2); }
| '*' pref { $$ = mknode('@', $2, 0); }
| '&' pref { $$ = mknode('A', $2, 0); }
;
post: NUM
| STR
| IDENT
| SIZEOF '(' type ')' { $$ = mknode('N', 0, 0); $$->u.n = SIZE($3); }
| '(' expr ')' { $$ = $2; }
| IDENT '(' arg0 ')' { $$ = mknode('C', $1, $3); }
| post '[' expr ']' { $$ = mkidx($1, $3); }
| post PP { $$ = mknode('P', $1, 0); }
| post MM { $$ = mknode('M', $1, 0); }
;
arg0: arg1
| { $$ = 0; }
;
arg1: expr { $$ = mknode(0, $1, 0); }
| expr ',' arg1 { $$ = mknode(0, $1, $3); }
;
%%
int
yylex()
{
struct {
char *s;
int t;
} kwds[] = {
{ "void", TVOID },
{ "int", TINT },
{ "long", TLNG },
{ "if", IF },
{ "else", ELSE },
{ "for", FOR },
{ "while", WHILE },
{ "return", RETURN },
{ "break", BREAK },
{ "sizeof", SIZEOF },
{ 0, 0 }
};
int i, c, c1, n;
char v[NString], *p;
do {
c = getchar();
if (c == '#')
while ((c = getchar()) != '\n')
;
if (c == '\n')
line++;
} while (isspace(c));
if (c == EOF)
return 0;
if (isdigit(c)) {
n = 0;
do {
n *= 10;
n += c-'0';
c = getchar();
} while (isdigit(c));
ungetc(c, stdin);
yylval.n = mknode('N', 0, 0);
yylval.n->u.n = n;
return NUM;
}
if (isalpha(c)) {
p = v;
do {
if (p == &v[NString-1])
die("ident too long");
*p++ = c;
c = getchar();
} while (isalpha(c) || c == '_');
*p = 0;
ungetc(c, stdin);
for (i=0; kwds[i].s; i++)
if (strcmp(v, kwds[i].s) == 0)
return kwds[i].t;
yylval.n = mknode('V', 0, 0);
strcpy(yylval.n->u.v, v);
return IDENT;
}
if (c == '"') {
i = 0;
n = 32;
p = alloc(n);
strcpy(p, "{ b \"");
for (i=5;; i++) {
c = getchar();
if (c == EOF)
die("unclosed string literal");
if (i+8 >= n) {
p = memcpy(alloc(n*2), p, n);
n *= 2;
}
p[i] = c;
if (c == '"' && p[i-1]!='\\')
break;
}
strcpy(&p[i], "\", b 0 }");
if (nglo == NGlo)
die("too many globals");
ini[nglo] = p;
yylval.n = mknode('S', 0, 0);
yylval.n->u.n = nglo++;
return STR;
}
c1 = getchar();
#define DI(a, b) a + b*256
switch (DI(c,c1)) {
case DI('!','='): return NE;
case DI('=','='): return EQ;
case DI('<','='): return LE;
case DI('>','='): return GE;
case DI('+','+'): return PP;
case DI('-','-'): return MM;
case DI('&','&'): return AND;
case DI('|','|'): return OR;
}
#undef DI
ungetc(c1, stdin);
return c;
}
int
yyerror(char *err)
{
die("parse error");
return 0;
}
int
main()
{
int i;
of = stdout;
nglo = 1;
if (yyparse() != 0)
die("parse error");
for (i=1; i<nglo; i++)
fprintf(of, "data $glo%d = %s\n", i, ini[i]);
return 0;
}

View File

@@ -0,0 +1,33 @@
void *malloc();
main()
{
int n;
int nv;
int c;
int cmax;
int *mem;
mem = malloc(sizeof(int) * 4000);
cmax = 0;
for (nv = 1; nv < 1000; nv++) {
n = nv;
c = 0;
while (n != 1) {
if (n < nv) {
c = c + mem[n];
break;
}
if (n & 1)
n = 3*n + 1;
else
n = n / 2;
c++;
}
mem[nv] = c;
if (c > cmax)
cmax = c;
}
printf("should print 178: %d\n", cmax);
}

View File

@@ -0,0 +1,27 @@
#include <stdio.h>
main()
{
int i;
int a;
int b;
int c;
int d;
for (a = 1; a < 1000; a++) {
for (b = a + 1; b < 1000; b++) {
d = a*a + b*b;
for (i = 0; i < 1000; i++) {
if (i * i == d) {
c = i;
if (b < c && a+b+c == 1000) {
printf("%d\n", a*b*c);
return 0;
}
break;
}
}
}
}
}

View File

@@ -0,0 +1,60 @@
#include <stdlib.h>
#include <stdio.h>
void *calloc();
int N;
int **b;
board()
{
int x;
int y;
for (y=0; y<8; y++) {
for (x=0; x<8; x++)
printf(" %02d", b[x][y]);
printf("\n");
}
printf("\n");
return 0;
}
chk(int x, int y)
{
if (x < 0 || x > 7 || y < 0 || y > 7)
return 0;
return b[x][y] == 0;
}
go(int k, int x, int y)
{
int i;
int j;
b[x][y] = k;
if (k == 64) {
if (x != 2 && y != 0 && abs(x-2) + abs(y) == 3) {
board();
N++;
if (N == 10)
exit(0);
}
} else
for (i=-2; i<=2; i++)
for (j=-2; j<=2; j++)
if (abs(i) + abs(j) == 3 && chk(x+i, y+j))
go(k+1, x+i, y+j);
b[x][y] = 0;
return 0;
}
main()
{
int i;
b = calloc(8, sizeof (int *));
for (i=0; i<8; i++)
b[i] = calloc(8, sizeof (int));
go(1, 2, 0);
}

View File

@@ -0,0 +1,88 @@
void *malloc();
void *SDL_CreateWindow();
void *SDL_CreateRenderer();
int SDL_SetRenderDrawColor();
int SDL_RenderDrawPoint();
int SDL_RenderClear();
int SDL_RenderPresent();
int SDL_PollEvent();
int SDL_DestroyRenderer();
int SDL_DestroyWindow();
int SDL_Quit();
int SDL_Init();
void *win;
void *rnd;
int W;
int H;
int *col;
plot(int x, int y)
{
int n;
int fx;
int fy;
int zx;
int zy;
int nx;
int ny;
fx = (x - W/2)*4000 / W;
fy = (y - H/2)*4000 / H;
zx = fx;
zy = fy;
for (n=0; n<200; n++) {
if (zx*zx + zy*zy > 4000000)
break;
nx = (zx*zx)/1000 - (zy*zy)/1000 + fx;
ny = zx*zy/500 + fy;
zx = nx;
zy = ny;
}
n = col[n];
SDL_SetRenderDrawColor(rnd, 100, n, n, 255);
SDL_RenderDrawPoint(rnd, x, y);
return 0;
}
main() {
int c;
int n;
int x;
int y;
void *e;
int *ie;
W = 800;
H = 800;
SDL_Init(32);
win = SDL_CreateWindow("Mandelbrot MiniC", 0, 0, W, H, 0);
rnd = SDL_CreateRenderer(win, -1, 0);
e = malloc(56);
ie = e;
col = malloc(201 * sizeof (int));
c = 20;
for (n=0; n<200; n++) {
col[n] = c;
c = c + (255-c)/8;
}
col[n] = 30;
SDL_RenderClear(rnd);
for (x=0; x<W; x++)
for (y=0; y<H; y++)
plot(x, y);
SDL_RenderPresent(rnd);
for (;;) {
if (SDL_PollEvent(e)) {
if (ie[0] == 769)
break;
}
}
SDL_DestroyRenderer(rnd);
SDL_DestroyWindow(win);
SDL_Quit();
}

View File

@@ -0,0 +1,28 @@
#include <stdio.h>
main() {
int n;
int t;
int c;
int p;
c = 0;
n = 2;
while (n < 5000) {
t = 2;
p = 1;
while (t*t <= n) {
if (n % t == 0)
p = 0;
t++;
}
if (p) {
if (c && c % 10 == 0)
printf("\n");
printf("%4d ", n);
c++;
}
n++;
}
printf("\n");
}

View File

@@ -0,0 +1,70 @@
int printf();
void *calloc();
int atoi();
int Q;
int N;
int **t;
print() {
int x;
int y;
for (y=0; y<Q; y++) {
for (x=0; x<Q; x++)
if (t[x][y])
printf(" Q");
else
printf(" .");
printf("\n");
}
printf("\n");
}
chk(int x, int y) {
int i;
int r;
for (r=i=0; i<Q; i++) {
r = r + t[x][i];
r = r + t[i][y];
if (x+i < Q & y+i < Q)
r = r + t[x+i][y+i];
if (x+i < Q & y-i >= 0)
r = r + t[x+i][y-i];
if (x-i >= 0 & y+i < Q)
r = r + t[x-i][y+i];
if (x-i >= 0 & y-i >= 0)
r = r + t[x-i][y-i];
}
return r;
}
go(int y) {
int x;
if (y == Q) {
print();
N++;
return 0;
}
for (x=0; x<Q; x++)
if (chk(x, y) == 0) {
t[x][y]++;
go(y+1);
t[x][y]--;
}
}
main(int ac, void **av) {
int i;
Q = 8;
if (ac >= 2)
Q = atoi(av[1]);
t = calloc(Q, sizeof(int *));
for (i=0; i<Q; i++)
t[i] = calloc(Q, sizeof(int));
go(0);
printf("found %d solutions\n", N);
}

1378
src/qbe/minic/yacc.c Normal file

File diff suppressed because it is too large Load Diff

228
src/qbe/ops.h Normal file
View File

@@ -0,0 +1,228 @@
#ifndef X /* amd64 */
#define X(NMemArgs, SetsZeroFlag, LeavesFlags)
#endif
#ifndef V /* riscv64 */
#define V(Imm)
#endif
#ifndef F
#define F(a,b,c,d,e,f,g,h,i,j)
#endif
#define T(a,b,c,d,e,f,g,h) { \
{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \
}
/*********************/
/* PUBLIC OPERATIONS */
/*********************/
/* can fold */
/* | has identity */
/* | | identity value for arg[1] */
/* | | | commutative */
/* | | | | associative */
/* | | | | | idempotent */
/* | | | | | | c{eq,ne}[wl] */
/* | | | | | | | c[us][gl][et][wl] */
/* | | | | | | | | value if = args */
/* | | | | | | | | | pinned */
/* Arithmetic and Bits v v v v v v v v v v */
O(add, T(w,l,s,d, w,l,s,d), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
O(sub, T(w,l,s,d, w,l,s,d), F(1,1,0,0,0,0,0,0,0,0)) X(2,1,0) V(0)
O(neg, T(w,l,s,d, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
O(div, T(w,l,s,d, w,l,s,d), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(rem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(udiv, T(w,l,e,e, w,l,e,e), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(urem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(mul, T(w,l,s,d, w,l,s,d), F(1,1,1,1,0,0,0,0,0,0)) X(2,0,0) V(0)
O(and, T(w,l,e,e, w,l,e,e), F(1,0,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
O(or, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
O(xor, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
O(sar, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
O(shr, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
O(shl, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
/* Comparisons */
O(ceqw, T(w,w,e,e, w,w,e,e), F(1,1,1,1,0,0,1,0,1,0)) X(0,1,0) V(0)
O(cnew, T(w,w,e,e, w,w,e,e), F(1,1,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
O(csgew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(csgtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
O(cslew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(csltw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
O(cugew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(cugtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
O(culew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(cultw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
O(ceql, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,1,0)) X(0,1,0) V(0)
O(cnel, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
O(csgel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(csgtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
O(cslel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(csltl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
O(cugel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(cugtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
O(culel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
O(cultl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
O(ceqs, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cges, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cgts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cles, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(clts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cnes, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cuos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(ceqd, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cged, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cgtd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cled, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cltd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cned, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
O(cuod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
/* Memory */
O(storeb, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(storeh, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(storew, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(storel, T(l,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(stores, T(s,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(stored, T(d,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(loadsb, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(loadub, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(loadsh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(loaduh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(loadsw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(loaduw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
O(load, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
/* Extensions and Truncations */
O(extsb, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(extub, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(extsh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(extuh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(extsw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(extuw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(exts, T(e,e,e,s, e,e,e,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(truncd, T(e,e,d,e, e,e,x,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(stosi, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(stoui, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(dtosi, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(dtoui, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(swtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(uwtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(sltof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(ultof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(cast, T(s,d,w,l, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Stack Allocation */
O(alloc4, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(alloc8, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(alloc16, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
/* Variadic Function Helpers */
O(vaarg, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(vastart, T(m,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(copy, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Debug */
O(dbgloc, T(w,e,e,e, w,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
/****************************************/
/* INTERNAL OPERATIONS (keep nop first) */
/****************************************/
/* Miscellaneous and Architecture-Specific Operations */
O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
O(sel0, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(sel1, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xidiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
O(xdiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
O(xcmp, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
O(xtest, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
O(acmp, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(acmn, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(afcmp, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(reqz, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(rnez, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
/* Arguments, Parameters, and Calls */
O(par, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(parsb, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(parub, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(parsh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(paruh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(parc, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(pare, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(arg, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(argsb, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(argub, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(argsh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(arguh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(argc, T(e,x,e,e, e,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(arge, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(argv, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
O(call, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
/* Flags Setting */
O(flagieq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagine, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagisge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagisgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagisle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagislt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagiuge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagiugt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagiule, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagiult, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfeq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagflt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
/* Backend Flag Select (Condition Move) */
O(xselieq, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xseline, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselisge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselisgt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselisle, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselislt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xseliuge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xseliugt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xseliule, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xseliult, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfeq, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfge, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfgt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfle, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselflt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfne, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
O(xselfuo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
#undef T
#undef X
#undef V
#undef O
/*
| column -t -o ' '
*/

43
src/qbe/out.s Normal file
View File

@@ -0,0 +1,43 @@
.text
.balign 4
_add:
hint #34
stp x29, x30, [sp, -16]!
mov x29, sp
add w0, w0, w1
ldp x29, x30, [sp], 16
ret
/* end function add */
.text
.balign 4
.globl _main
_main:
hint #34
stp x29, x30, [sp, -16]!
mov x29, sp
mov w1, #1
mov w0, #1
bl _add
mov x1, #16
sub sp, sp, x1
mov x1, #0
add x1, sp, x1
str w0, [x1]
adrp x0, _fmt@page
add x0, x0, _fmt@pageoff
bl _printf
mov x0, #16
add sp, sp, x0
mov w0, #0
ldp x29, x30, [sp], 16
ret
/* end function main */
.data
.balign 8
_fmt:
.ascii "One and one make %d!\n"
.byte 0
/* end data */

1433
src/qbe/parse.c Normal file

File diff suppressed because it is too large Load Diff

696
src/qbe/rega.c Normal file
View File

@@ -0,0 +1,696 @@
#include "all.h"
#ifdef TEST_PMOV
#undef assert
#define assert(x) assert_test(#x, x)
#endif
typedef struct RMap RMap;
struct RMap {
int t[Tmp0];
int r[Tmp0];
int w[Tmp0]; /* wait list, for unmatched hints */
BSet b[1];
int n;
};
static bits regu; /* registers used */
static Tmp *tmp; /* function temporaries */
static Mem *mem; /* function mem references */
static struct {
Ref src, dst;
int cls;
} pm[Tmp0]; /* parallel move constructed */
static int npm; /* size of pm */
static int loop; /* current loop level */
static uint stmov; /* stats: added moves */
static uint stblk; /* stats: added blocks */
static int *
hint(int t)
{
return &tmp[phicls(t, tmp)].hint.r;
}
static void
sethint(int t, int r)
{
Tmp *p;
p = &tmp[phicls(t, tmp)];
if (p->hint.r == -1 || p->hint.w > loop) {
p->hint.r = r;
p->hint.w = loop;
tmp[t].visit = -1;
}
}
static void
rcopy(RMap *ma, RMap *mb)
{
memcpy(ma->t, mb->t, sizeof ma->t);
memcpy(ma->r, mb->r, sizeof ma->r);
memcpy(ma->w, mb->w, sizeof ma->w);
bscopy(ma->b, mb->b);
ma->n = mb->n;
}
static int
rfind(RMap *m, int t)
{
int i;
for (i=0; i<m->n; i++)
if (m->t[i] == t)
return m->r[i];
return -1;
}
static Ref
rref(RMap *m, int t)
{
int r, s;
r = rfind(m, t);
if (r == -1) {
s = tmp[t].slot;
assert(s != -1 && "should have spilled");
return SLOT(s);
} else
return TMP(r);
}
static void
radd(RMap *m, int t, int r)
{
assert((t >= Tmp0 || t == r) && "invalid temporary");
assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr)
|| (T.fpr0 <= r && r < T.fpr0 + T.nfpr))
&& "invalid register");
assert(!bshas(m->b, t) && "temporary has mapping");
assert(!bshas(m->b, r) && "register already allocated");
assert(m->n <= T.ngpr+T.nfpr && "too many mappings");
bsset(m->b, t);
bsset(m->b, r);
m->t[m->n] = t;
m->r[m->n] = r;
m->n++;
regu |= BIT(r);
}
static Ref
ralloctry(RMap *m, int t, int try)
{
bits regs;
int h, r, r0, r1;
if (t < Tmp0) {
assert(bshas(m->b, t));
return TMP(t);
}
if (bshas(m->b, t)) {
r = rfind(m, t);
assert(r != -1);
return TMP(r);
}
r = tmp[t].visit;
if (r == -1 || bshas(m->b, r))
r = *hint(t);
if (r == -1 || bshas(m->b, r)) {
if (try)
return R;
regs = tmp[phicls(t, tmp)].hint.m;
regs |= m->b->t[0];
if (KBASE(tmp[t].cls) == 0) {
r0 = T.gpr0;
r1 = r0 + T.ngpr;
} else {
r0 = T.fpr0;
r1 = r0 + T.nfpr;
}
for (r=r0; r<r1; r++)
if (!(regs & BIT(r)))
goto Found;
for (r=r0; r<r1; r++)
if (!bshas(m->b, r))
goto Found;
die("no more regs");
}
Found:
radd(m, t, r);
sethint(t, r);
tmp[t].visit = r;
h = *hint(t);
if (h != -1 && h != r)
m->w[h] = t;
return TMP(r);
}
static inline Ref
ralloc(RMap *m, int t)
{
return ralloctry(m, t, 0);
}
static int
rfree(RMap *m, int t)
{
int i, r;
assert(t >= Tmp0 || !(BIT(t) & T.rglob));
if (!bshas(m->b, t))
return -1;
for (i=0; m->t[i] != t; i++)
assert(i+1 < m->n);
r = m->r[i];
bsclr(m->b, t);
bsclr(m->b, r);
m->n--;
memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
assert(t >= Tmp0 || t == r);
return r;
}
static void
mdump(RMap *m)
{
int i;
for (i=0; i<m->n; i++)
if (m->t[i] >= Tmp0)
fprintf(stderr, " (%s, R%d)",
tmp[m->t[i]].name,
m->r[i]);
fprintf(stderr, "\n");
}
static void
pmadd(Ref src, Ref dst, int k)
{
if (npm == Tmp0)
die("cannot have more moves than registers");
pm[npm].src = src;
pm[npm].dst = dst;
pm[npm].cls = k;
npm++;
}
enum PMStat { ToMove, Moving, Moved };
static int
pmrec(enum PMStat *status, int i, int *k)
{
int j, c;
/* note, this routine might emit
* too many large instructions
*/
if (req(pm[i].src, pm[i].dst)) {
status[i] = Moved;
return -1;
}
assert(KBASE(pm[i].cls) == KBASE(*k));
assert((Kw|Kl) == Kl && (Ks|Kd) == Kd);
*k |= pm[i].cls;
for (j=0; j<npm; j++)
if (req(pm[j].dst, pm[i].src))
break;
switch (j == npm ? Moved : status[j]) {
case Moving:
c = j; /* start of cycle */
emit(Oswap, *k, R, pm[i].src, pm[i].dst);
break;
case ToMove:
status[i] = Moving;
c = pmrec(status, j, k);
if (c == i) {
c = -1; /* end of cycle */
break;
}
if (c != -1) {
emit(Oswap, *k, R, pm[i].src, pm[i].dst);
break;
}
/* fall through */
case Moved:
c = -1;
emit(Ocopy, pm[i].cls, pm[i].dst, pm[i].src, R);
break;
default:
die("unreachable");
}
status[i] = Moved;
return c;
}
static void
pmgen()
{
int i;
enum PMStat *status;
status = alloc(npm * sizeof status[0]);
assert(!npm || status[npm-1] == ToMove);
for (i=0; i<npm; i++)
if (status[i] == ToMove)
pmrec(status, i, (int[]){pm[i].cls});
}
static void
move(int r, Ref to, RMap *m)
{
int n, t, r1;
r1 = req(to, R) ? -1 : rfree(m, to.val);
if (bshas(m->b, r)) {
/* r is used and not by to */
assert(r1 != r);
for (n=0; m->r[n] != r; n++)
assert(n+1 < m->n);
t = m->t[n];
rfree(m, t);
bsset(m->b, r);
ralloc(m, t);
bsclr(m->b, r);
}
t = req(to, R) ? r : to.val;
radd(m, t, r);
}
static int
regcpy(Ins *i)
{
return i->op == Ocopy && isreg(i->arg[0]);
}
static Ins *
dopm(Blk *b, Ins *i, RMap *m)
{
RMap m0;
int n, r, r1, t, s;
Ins *i1, *ip;
bits def;
m0 = *m; /* okay since we don't use m0.b */
m0.b->t = 0;
i1 = ++i;
do {
i--;
move(i->arg[0].val, i->to, m);
} while (i != b->ins && regcpy(i-1));
assert(m0.n <= m->n);
if (i != b->ins && (i-1)->op == Ocall) {
def = T.retregs((i-1)->arg[1], 0) | T.rglob;
for (r=0; T.rsave[r]>=0; r++)
if (!(BIT(T.rsave[r]) & def))
move(T.rsave[r], R, m);
}
for (npm=0, n=0; n<m->n; n++) {
t = m->t[n];
s = tmp[t].slot;
r1 = m->r[n];
r = rfind(&m0, t);
if (r != -1)
pmadd(TMP(r1), TMP(r), tmp[t].cls);
else if (s != -1)
pmadd(TMP(r1), SLOT(s), tmp[t].cls);
}
for (ip=i; ip<i1; ip++) {
if (!req(ip->to, R))
rfree(m, ip->to.val);
r = ip->arg[0].val;
if (rfind(m, r) == -1)
radd(m, r, r);
}
pmgen();
return i;
}
static int
prio1(Ref r1, Ref r2)
{
/* trivial heuristic to begin with,
* later we can use the distance to
* the definition instruction
*/
(void) r2;
return *hint(r1.val) != -1;
}
static void
insert(Ref *r, Ref **rs, int p)
{
int i;
rs[i = p] = r;
while (i-- > 0 && prio1(*r, *rs[i])) {
rs[i+1] = rs[i];
rs[i] = r;
}
}
static void
doblk(Blk *b, RMap *cur)
{
int t, x, r, rf, rt, nr;
bits rs;
Ins *i, *i1;
Mem *m;
Ref *ra[4];
if (rtype(b->jmp.arg) == RTmp)
b->jmp.arg = ralloc(cur, b->jmp.arg.val);
curi = &insb[NIns];
for (i1=&b->ins[b->nins]; i1!=b->ins;) {
emiti(*--i1);
i = curi;
rf = -1;
switch (i->op) {
case Ocall:
rs = T.argregs(i->arg[1], 0) | T.rglob;
for (r=0; T.rsave[r]>=0; r++)
if (!(BIT(T.rsave[r]) & rs))
rfree(cur, T.rsave[r]);
break;
case Ocopy:
if (regcpy(i)) {
curi++;
i1 = dopm(b, i1, cur);
stmov += i+1 - curi;
continue;
}
if (isreg(i->to))
if (rtype(i->arg[0]) == RTmp)
sethint(i->arg[0].val, i->to.val);
/* fall through */
default:
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
r = i->to.val;
if (r < Tmp0 && (BIT(r) & T.rglob))
break;
rf = rfree(cur, r);
if (rf == -1) {
assert(!isreg(i->to));
curi++;
continue;
}
i->to = TMP(rf);
}
break;
}
for (x=0, nr=0; x<2; x++)
switch (rtype(i->arg[x])) {
case RMem:
m = &mem[i->arg[x].val];
if (rtype(m->base) == RTmp)
insert(&m->base, ra, nr++);
if (rtype(m->index) == RTmp)
insert(&m->index, ra, nr++);
break;
case RTmp:
insert(&i->arg[x], ra, nr++);
break;
}
for (r=0; r<nr; r++)
*ra[r] = ralloc(cur, ra[r]->val);
if (i->op == Ocopy && req(i->to, i->arg[0]))
curi++;
/* try to change the register of a hinted
* temporary if rf is available */
if (rf != -1 && (t = cur->w[rf]) != 0)
if (!bshas(cur->b, rf) && *hint(t) == rf
&& (rt = rfree(cur, t)) != -1) {
tmp[t].visit = -1;
ralloc(cur, t);
assert(bshas(cur->b, rf));
emit(Ocopy, tmp[t].cls, TMP(rt), TMP(rf), R);
stmov += 1;
cur->w[rf] = 0;
for (r=0; r<nr; r++)
if (req(*ra[r], TMP(rt)))
*ra[r] = TMP(rf);
/* one could iterate this logic with
* the newly freed rt, but in this case
* the above loop must be changed */
}
}
idup(b, curi, &insb[NIns]-curi);
}
/* qsort() comparison function to peel
* loop nests from inside out */
static int
carve(const void *a, const void *b)
{
Blk *ba, *bb;
/* todo, evaluate if this order is really
* better than the simple postorder */
ba = *(Blk**)a;
bb = *(Blk**)b;
if (ba->loop == bb->loop)
return ba->id > bb->id ? -1 : ba->id < bb->id;
return ba->loop > bb->loop ? -1 : +1;
}
/* comparison function to order temporaries
* for allocation at the end of blocks */
static int
prio2(int t1, int t2)
{
if ((tmp[t1].visit ^ tmp[t2].visit) < 0) /* != signs */
return tmp[t1].visit != -1 ? +1 : -1;
if ((*hint(t1) ^ *hint(t2)) < 0)
return *hint(t1) != -1 ? +1 : -1;
return tmp[t1].cost - tmp[t2].cost;
}
/* register allocation
* depends on rpo, phi, cost, (and obviously spill)
*/
void
rega(Fn *fn)
{
int j, t, r, x, rl[Tmp0];
Blk *b, *b1, *s, ***ps, *blist, **blk, **bp;
RMap *end, *beg, cur, old, *m;
Ins *i;
Phi *p;
uint u, n;
Ref src, dst;
/* 1. setup */
stmov = 0;
stblk = 0;
regu = 0;
tmp = fn->tmp;
mem = fn->mem;
blk = alloc(fn->nblk * sizeof blk[0]);
end = alloc(fn->nblk * sizeof end[0]);
beg = alloc(fn->nblk * sizeof beg[0]);
for (n=0; n<fn->nblk; n++) {
bsinit(end[n].b, fn->ntmp);
bsinit(beg[n].b, fn->ntmp);
}
bsinit(cur.b, fn->ntmp);
bsinit(old.b, fn->ntmp);
loop = INT_MAX;
for (t=0; t<fn->ntmp; t++) {
tmp[t].hint.r = t < Tmp0 ? t : -1;
tmp[t].hint.w = loop;
tmp[t].visit = -1;
}
for (bp=blk, b=fn->start; b; b=b->link)
*bp++ = b;
qsort(blk, fn->nblk, sizeof blk[0], carve);
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op != Ocopy || !isreg(i->arg[0]))
break;
else {
assert(rtype(i->to) == RTmp);
sethint(i->to.val, i->arg[0].val);
}
/* 2. assign registers */
for (bp=blk; bp<&blk[fn->nblk]; bp++) {
b = *bp;
n = b->id;
loop = b->loop;
cur.n = 0;
bszero(cur.b);
memset(cur.w, 0, sizeof cur.w);
for (x=0, t=Tmp0; bsiter(b->out, &t); t++) {
j = x++;
rl[j] = t;
while (j-- > 0 && prio2(t, rl[j]) > 0) {
rl[j+1] = rl[j];
rl[j] = t;
}
}
for (r=0; bsiter(b->out, &r) && r<Tmp0; r++)
radd(&cur, r, r);
for (j=0; j<x; j++)
ralloctry(&cur, rl[j], 1);
for (j=0; j<x; j++)
ralloc(&cur, rl[j]);
rcopy(&end[n], &cur);
doblk(b, &cur);
bscopy(b->in, cur.b);
for (p=b->phi; p; p=p->link)
if (rtype(p->to) == RTmp)
bsclr(b->in, p->to.val);
rcopy(&beg[n], &cur);
}
/* 3. emit copies shared by multiple edges
* to the same block */
for (s=fn->start; s; s=s->link) {
if (s->npred <= 1)
continue;
m = &beg[s->id];
/* rl maps a register that is live at the
* beginning of s to the one used in all
* predecessors (if any, -1 otherwise) */
memset(rl, 0, sizeof rl);
/* to find the register of a phi in a
* predecessor, we have to find the
* corresponding argument */
for (p=s->phi; p; p=p->link) {
if (rtype(p->to) != RTmp
|| (r=rfind(m, p->to.val)) == -1)
continue;
for (u=0; u<p->narg; u++) {
b = p->blk[u];
src = p->arg[u];
if (rtype(src) != RTmp)
continue;
x = rfind(&end[b->id], src.val);
if (x == -1) /* spilled */
continue;
rl[r] = (!rl[r] || rl[r] == x) ? x : -1;
}
if (rl[r] == 0)
rl[r] = -1;
}
/* process non-phis temporaries */
for (j=0; j<m->n; j++) {
t = m->t[j];
r = m->r[j];
if (rl[r] || t < Tmp0 /* todo, remove this */)
continue;
for (bp=s->pred; bp<&s->pred[s->npred]; bp++) {
x = rfind(&end[(*bp)->id], t);
if (x == -1) /* spilled */
continue;
rl[r] = (!rl[r] || rl[r] == x) ? x : -1;
}
if (rl[r] == 0)
rl[r] = -1;
}
npm = 0;
for (j=0; j<m->n; j++) {
t = m->t[j];
r = m->r[j];
x = rl[r];
assert(x != 0 || t < Tmp0 /* todo, ditto */);
if (x > 0 && !bshas(m->b, x)) {
pmadd(TMP(x), TMP(r), tmp[t].cls);
m->r[j] = x;
bsset(m->b, x);
}
}
curi = &insb[NIns];
pmgen();
j = &insb[NIns] - curi;
if (j == 0)
continue;
stmov += j;
s->nins += j;
i = alloc(s->nins * sizeof(Ins));
icpy(icpy(i, curi, j), s->ins, s->nins-j);
s->ins = i;
}
if (debug['R']) {
fprintf(stderr, "\n> Register mappings:\n");
for (n=0; n<fn->nblk; n++) {
b = fn->rpo[n];
fprintf(stderr, "\t%-10s beg", b->name);
mdump(&beg[n]);
fprintf(stderr, "\t end");
mdump(&end[n]);
}
fprintf(stderr, "\n");
}
/* 4. emit remaining copies in new blocks */
blist = 0;
for (b=fn->start;; b=b->link) {
ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
for (; (s=**ps); ps++) {
npm = 0;
for (p=s->phi; p; p=p->link) {
dst = p->to;
assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
if (rtype(dst) == RTmp) {
r = rfind(&beg[s->id], dst.val);
if (r == -1)
continue;
dst = TMP(r);
}
for (u=0; p->blk[u]!=b; u++)
assert(u+1 < p->narg);
src = p->arg[u];
if (rtype(src) == RTmp)
src = rref(&end[b->id], src.val);
pmadd(src, dst, p->cls);
}
for (t=Tmp0; bsiter(s->in, &t); t++) {
src = rref(&end[b->id], t);
dst = rref(&beg[s->id], t);
pmadd(src, dst, tmp[t].cls);
}
curi = &insb[NIns];
pmgen();
if (curi == &insb[NIns])
continue;
b1 = newblk();
b1->loop = (b->loop+s->loop) / 2;
b1->link = blist;
blist = b1;
fn->nblk++;
strf(b1->name, "%s_%s", b->name, s->name);
stmov += &insb[NIns]-curi;
stblk += 1;
idup(b1, curi, &insb[NIns]-curi);
b1->jmp.type = Jjmp;
b1->s1 = s;
**ps = b1;
}
if (!b->link) {
b->link = blist;
break;
}
}
for (b=fn->start; b; b=b->link)
b->phi = 0;
fn->reg = regu;
if (debug['R']) {
fprintf(stderr, "\n> Register allocation statistics:\n");
fprintf(stderr, "\tnew moves: %d\n", stmov);
fprintf(stderr, "\tnew blocks: %d\n", stblk);
fprintf(stderr, "\n> After register allocation:\n");
printfn(fn, stderr);
}
}

653
src/qbe/rv64/abi.c Normal file
View File

@@ -0,0 +1,653 @@
#include "all.h"
/* the risc-v lp64d abi */
typedef struct Class Class;
typedef struct Insl Insl;
typedef struct Params Params;
enum {
Cptr = 1, /* replaced by a pointer */
Cstk1 = 2, /* pass first XLEN on the stack */
Cstk2 = 4, /* pass second XLEN on the stack */
Cstk = Cstk1 | Cstk2,
Cfpint = 8, /* float passed like integer */
};
struct Class {
char class;
Typ *type;
int reg[2];
int cls[2];
int off[2];
char ngp; /* only valid after typclass() */
char nfp; /* ditto */
char nreg;
};
struct Insl {
Ins i;
Insl *link;
};
struct Params {
int ngp;
int nfp;
int stk; /* stack offset for varargs */
};
static int gpreg[10] = {A0, A1, A2, A3, A4, A5, A6, A7};
static int fpreg[10] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
/* layout of call's second argument (RCall)
*
* 29 12 8 4 2 0
* |0.00|x|xxxx|xxxx|xx|xx| range
* | | | | ` gp regs returned (0..2)
* | | | ` fp regs returned (0..2)
* | | ` gp regs passed (0..8)
* | ` fp regs passed (0..8)
* ` env pointer passed in t5 (0..1)
*/
bits
rv64_retregs(Ref r, int p[2])
{
bits b;
int ngp, nfp;
assert(rtype(r) == RCall);
ngp = r.val & 3;
nfp = (r.val >> 2) & 3;
if (p) {
p[0] = ngp;
p[1] = nfp;
}
b = 0;
while (ngp--)
b |= BIT(A0+ngp);
while (nfp--)
b |= BIT(FA0+nfp);
return b;
}
bits
rv64_argregs(Ref r, int p[2])
{
bits b;
int ngp, nfp, t5;
assert(rtype(r) == RCall);
ngp = (r.val >> 4) & 15;
nfp = (r.val >> 8) & 15;
t5 = (r.val >> 12) & 1;
if (p) {
p[0] = ngp + t5;
p[1] = nfp;
}
b = 0;
while (ngp--)
b |= BIT(A0+ngp);
while (nfp--)
b |= BIT(FA0+nfp);
return b | ((bits)t5 << T5);
}
static int
fpstruct(Typ *t, int off, Class *c)
{
Field *f;
int n;
if (t->isunion)
return -1;
for (f=*t->fields; f->type != FEnd; f++)
if (f->type == FPad)
off += f->len;
else if (f->type == FTyp) {
if (fpstruct(&typ[f->len], off, c) == -1)
return -1;
}
else {
n = c->nfp + c->ngp;
if (n == 2)
return -1;
switch (f->type) {
default: die("unreachable");
case Fb:
case Fh:
case Fw: c->cls[n] = Kw; c->ngp++; break;
case Fl: c->cls[n] = Kl; c->ngp++; break;
case Fs: c->cls[n] = Ks; c->nfp++; break;
case Fd: c->cls[n] = Kd; c->nfp++; break;
}
c->off[n] = off;
off += f->len;
}
return c->nfp;
}
static void
typclass(Class *c, Typ *t, int fpabi, int *gp, int *fp)
{
uint n;
int i;
c->type = t;
c->class = 0;
c->ngp = 0;
c->nfp = 0;
if (t->align > 4)
err("alignments larger than 16 are not supported");
if (t->isdark || t->size > 16 || t->size == 0) {
/* large structs are replaced by a
* pointer to some caller-allocated
* memory
*/
c->class |= Cptr;
*c->cls = Kl;
*c->off = 0;
c->ngp = 1;
}
else if (!fpabi || fpstruct(t, 0, c) <= 0) {
for (n=0; 8*n<t->size; n++) {
c->cls[n] = Kl;
c->off[n] = 8*n;
}
c->nfp = 0;
c->ngp = n;
}
c->nreg = c->nfp + c->ngp;
for (i=0; i<c->nreg; i++)
if (KBASE(c->cls[i]) == 0)
c->reg[i] = *gp++;
else
c->reg[i] = *fp++;
}
static void
sttmps(Ref tmp[], int ntmp, Class *c, Ref mem, Fn *fn)
{
static int st[] = {
[Kw] = Ostorew, [Kl] = Ostorel,
[Ks] = Ostores, [Kd] = Ostored
};
int i;
Ref r;
assert(ntmp > 0);
assert(ntmp <= 2);
for (i=0; i<ntmp; i++) {
tmp[i] = newtmp("abi", c->cls[i], fn);
r = newtmp("abi", Kl, fn);
emit(st[c->cls[i]], 0, R, tmp[i], r);
emit(Oadd, Kl, r, mem, getcon(c->off[i], fn));
}
}
static void
ldregs(Class *c, Ref mem, Fn *fn)
{
int i;
Ref r;
for (i=0; i<c->nreg; i++) {
r = newtmp("abi", Kl, fn);
emit(Oload, c->cls[i], TMP(c->reg[i]), r, R);
emit(Oadd, Kl, r, mem, getcon(c->off[i], fn));
}
}
static void
selret(Blk *b, Fn *fn)
{
int j, k, cty;
Ref r;
Class cr;
j = b->jmp.type;
if (!isret(j) || j == Jret0)
return;
r = b->jmp.arg;
b->jmp.type = Jret0;
if (j == Jretc) {
typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
if (cr.class & Cptr) {
assert(rtype(fn->retr) == RTmp);
emit(Oblit1, 0, R, INT(cr.type->size), R);
emit(Oblit0, 0, R, r, fn->retr);
cty = 0;
} else {
ldregs(&cr, r, fn);
cty = (cr.nfp << 2) | cr.ngp;
}
} else {
k = j - Jretw;
if (KBASE(k) == 0) {
emit(Ocopy, k, TMP(A0), r, R);
cty = 1;
} else {
emit(Ocopy, k, TMP(FA0), r, R);
cty = 1 << 2;
}
}
b->jmp.arg = CALL(cty);
}
static int
argsclass(Ins *i0, Ins *i1, Class *carg, int retptr)
{
int ngp, nfp, *gp, *fp, vararg, envc;
Class *c;
Typ *t;
Ins *i;
gp = gpreg;
fp = fpreg;
ngp = 8;
nfp = 8;
vararg = 0;
envc = 0;
if (retptr) {
gp++;
ngp--;
}
for (i=i0, c=carg; i<i1; i++, c++) {
switch (i->op) {
case Opar:
case Oarg:
*c->cls = i->cls;
if (!vararg && KBASE(i->cls) == 1 && nfp > 0) {
nfp--;
*c->reg = *fp++;
} else if (ngp > 0) {
if (KBASE(i->cls) == 1)
c->class |= Cfpint;
ngp--;
*c->reg = *gp++;
} else
c->class |= Cstk1;
break;
case Oargv:
vararg = 1;
break;
case Oparc:
case Oargc:
t = &typ[i->arg[0].val];
typclass(c, t, 1, gp, fp);
if (c->nfp > 0)
if (c->nfp >= nfp || c->ngp >= ngp)
typclass(c, t, 0, gp, fp);
assert(c->nfp <= nfp);
if (c->ngp <= ngp) {
ngp -= c->ngp;
nfp -= c->nfp;
gp += c->ngp;
fp += c->nfp;
} else if (ngp > 0) {
assert(c->ngp == 2);
assert(c->class == 0);
c->class |= Cstk2;
c->nreg = 1;
ngp--;
gp++;
} else {
c->class |= Cstk1;
if (c->nreg > 1)
c->class |= Cstk2;
c->nreg = 0;
}
break;
case Opare:
case Oarge:
*c->reg = T5;
*c->cls = Kl;
envc = 1;
break;
}
}
return envc << 12 | (gp-gpreg) << 4 | (fp-fpreg) << 8;
}
static void
stkblob(Ref r, Typ *t, Fn *fn, Insl **ilp)
{
Insl *il;
int al;
uint64_t sz;
il = alloc(sizeof *il);
al = t->align - 2; /* specific to NAlign == 3 */
if (al < 0)
al = 0;
sz = (t->size + 7) & ~7;
il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
il->link = *ilp;
*ilp = il;
}
static void
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
{
Ins *i;
Class *ca, *c, cr;
int j, k, cty;
uint64_t stk, off;
Ref r, r1, r2, tmp[2];
ca = alloc((i1-i0) * sizeof ca[0]);
cr.class = 0;
if (!req(i1->arg[1], R))
typclass(&cr, &typ[i1->arg[1].val], 1, gpreg, fpreg);
cty = argsclass(i0, i1, ca, cr.class & Cptr);
stk = 0;
for (i=i0, c=ca; i<i1; i++, c++) {
if (i->op == Oargv)
continue;
if (c->class & Cptr) {
i->arg[0] = newtmp("abi", Kl, fn);
stkblob(i->arg[0], c->type, fn, ilp);
i->op = Oarg;
}
if (c->class & Cstk1)
stk += 8;
if (c->class & Cstk2)
stk += 8;
}
stk += stk & 15;
if (stk)
emit(Osalloc, Kl, R, getcon(-stk, fn), R);
if (!req(i1->arg[1], R)) {
stkblob(i1->to, cr.type, fn, ilp);
cty |= (cr.nfp << 2) | cr.ngp;
if (cr.class & Cptr)
/* spill & rega expect calls to be
* followed by copies from regs,
* so we emit a dummy
*/
emit(Ocopy, Kw, R, TMP(A0), R);
else {
sttmps(tmp, cr.nreg, &cr, i1->to, fn);
for (j=0; j<cr.nreg; j++) {
r = TMP(cr.reg[j]);
emit(Ocopy, cr.cls[j], tmp[j], r, R);
}
}
} else if (KBASE(i1->cls) == 0) {
emit(Ocopy, i1->cls, i1->to, TMP(A0), R);
cty |= 1;
} else {
emit(Ocopy, i1->cls, i1->to, TMP(FA0), R);
cty |= 1 << 2;
}
emit(Ocall, 0, R, i1->arg[0], CALL(cty));
if (cr.class & Cptr)
/* struct return argument */
emit(Ocopy, Kl, TMP(A0), i1->to, R);
/* move arguments into registers */
for (i=i0, c=ca; i<i1; i++, c++) {
if (i->op == Oargv || c->class & Cstk1)
continue;
if (i->op == Oargc) {
ldregs(c, i->arg[1], fn);
} else if (c->class & Cfpint) {
k = KWIDE(*c->cls) ? Kl : Kw;
r = newtmp("abi", k, fn);
emit(Ocopy, k, TMP(*c->reg), r, R);
*c->reg = r.val;
} else {
emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
}
}
for (i=i0, c=ca; i<i1; i++, c++) {
if (c->class & Cfpint) {
k = KWIDE(*c->cls) ? Kl : Kw;
emit(Ocast, k, TMP(*c->reg), i->arg[0], R);
}
if (c->class & Cptr) {
emit(Oblit1, 0, R, INT(c->type->size), R);
emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
}
}
if (!stk)
return;
/* populate the stack */
off = 0;
r = newtmp("abi", Kl, fn);
for (i=i0, c=ca; i<i1; i++, c++) {
if (i->op == Oargv || !(c->class & Cstk))
continue;
if (i->op == Oarg) {
r1 = newtmp("abi", Kl, fn);
emit(Ostorew+i->cls, Kw, R, i->arg[0], r1);
if (i->cls == Kw) {
/* TODO: we only need this sign
* extension for l temps passed
* as w arguments
* (see rv64/isel.c:fixarg)
*/
curi->op = Ostorel;
curi->arg[0] = newtmp("abi", Kl, fn);
emit(Oextsw, Kl, curi->arg[0], i->arg[0], R);
}
emit(Oadd, Kl, r1, r, getcon(off, fn));
off += 8;
}
if (i->op == Oargc) {
if (c->class & Cstk1) {
r1 = newtmp("abi", Kl, fn);
r2 = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, r2, r1);
emit(Oadd, Kl, r1, r, getcon(off, fn));
emit(Oload, Kl, r2, i->arg[1], R);
off += 8;
}
if (c->class & Cstk2) {
r1 = newtmp("abi", Kl, fn);
r2 = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, r2, r1);
emit(Oadd, Kl, r1, r, getcon(off, fn));
r1 = newtmp("abi", Kl, fn);
emit(Oload, Kl, r2, r1, R);
emit(Oadd, Kl, r1, i->arg[1], getcon(8, fn));
off += 8;
}
}
}
emit(Osalloc, Kl, r, getcon(stk, fn), R);
}
static Params
selpar(Fn *fn, Ins *i0, Ins *i1)
{
Class *ca, *c, cr;
Insl *il;
Ins *i;
int j, k, s, cty, nt;
Ref r, tmp[17], *t;
ca = alloc((i1-i0) * sizeof ca[0]);
cr.class = 0;
curi = &insb[NIns];
if (fn->retty >= 0) {
typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
if (cr.class & Cptr) {
fn->retr = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, fn->retr, TMP(A0), R);
}
}
cty = argsclass(i0, i1, ca, cr.class & Cptr);
fn->reg = rv64_argregs(CALL(cty), 0);
il = 0;
t = tmp;
for (i=i0, c=ca; i<i1; i++, c++) {
if (c->class & Cfpint) {
r = i->to;
k = *c->cls;
*c->cls = KWIDE(k) ? Kl : Kw;
i->to = newtmp("abi", k, fn);
emit(Ocast, k, r, i->to, R);
}
if (i->op == Oparc)
if (!(c->class & Cptr))
if (c->nreg != 0) {
nt = c->nreg;
if (c->class & Cstk2) {
c->cls[1] = Kl;
c->off[1] = 8;
assert(nt == 1);
nt = 2;
}
sttmps(t, nt, c, i->to, fn);
stkblob(i->to, c->type, fn, &il);
t += nt;
}
}
for (; il; il=il->link)
emiti(il->i);
t = tmp;
s = 2 + 8*fn->vararg;
for (i=i0, c=ca; i<i1; i++, c++)
if (i->op == Oparc && !(c->class & Cptr)) {
if (c->nreg == 0) {
fn->tmp[i->to.val].slot = -s;
s += (c->class & Cstk2) ? 2 : 1;
continue;
}
for (j=0; j<c->nreg; j++) {
r = TMP(c->reg[j]);
emit(Ocopy, c->cls[j], *t++, r, R);
}
if (c->class & Cstk2) {
emit(Oload, Kl, *t, SLOT(-s), R);
t++, s++;
}
} else if (c->class & Cstk1) {
emit(Oload, *c->cls, i->to, SLOT(-s), R);
s++;
} else {
emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
}
return (Params){
.stk = s,
.ngp = (cty >> 4) & 15,
.nfp = (cty >> 8) & 15,
};
}
static void
selvaarg(Fn *fn, Ins *i)
{
Ref loc, newloc;
loc = newtmp("abi", Kl, fn);
newloc = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, newloc, i->arg[0]);
emit(Oadd, Kl, newloc, loc, getcon(8, fn));
emit(Oload, i->cls, i->to, loc, R);
emit(Oload, Kl, loc, i->arg[0], R);
}
static void
selvastart(Fn *fn, Params p, Ref ap)
{
Ref rsave;
int s;
rsave = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, rsave, ap);
s = p.stk > 2 + 8 * fn->vararg ? p.stk : 2 + p.ngp;
emit(Oaddr, Kl, rsave, SLOT(-s), R);
}
void
rv64_abi(Fn *fn)
{
Blk *b;
Ins *i, *i0;
Insl *il;
int n0, n1, ioff;
Params p;
for (b=fn->start; b; b=b->link)
b->visit = 0;
/* lower parameters */
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
if (!ispar(i->op))
break;
p = selpar(fn, b->ins, i);
n0 = &insb[NIns] - curi;
ioff = i - b->ins;
n1 = b->nins - ioff;
vgrow(&b->ins, n0+n1);
icpy(b->ins+n0, b->ins+ioff, n1);
icpy(b->ins, curi, n0);
b->nins = n0+n1;
/* lower calls, returns, and vararg instructions */
il = 0;
b = fn->start;
do {
if (!(b = b->link))
b = fn->start; /* do it last */
if (b->visit)
continue;
curi = &insb[NIns];
selret(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
switch ((--i)->op) {
default:
emiti(*i);
break;
case Ocall:
for (i0=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
selcall(fn, i0, i, &il);
i = i0;
break;
case Ovastart:
selvastart(fn, p, i->arg[0]);
break;
case Ovaarg:
selvaarg(fn, i);
break;
case Oarg:
case Oargc:
die("unreachable");
}
if (b == fn->start)
for (; il; il=il->link)
emiti(il->i);
idup(b, curi, &insb[NIns]-curi);
} while (b != fn->start);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(fn, stderr);
}
}

52
src/qbe/rv64/all.h Normal file
View File

@@ -0,0 +1,52 @@
#include "../all.h"
typedef struct Rv64Op Rv64Op;
enum Rv64Reg {
/* caller-save */
T0 = RXX + 1, T1, T2, T3, T4, T5,
A0, A1, A2, A3, A4, A5, A6, A7,
/* callee-save */
S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11,
/* globally live */
FP, SP, GP, TP, RA,
/* FP caller-save */
FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7, FT8, FT9, FT10,
FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7,
/* FP callee-save */
FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11,
/* reserved (see rv64/emit.c) */
T6, FT11,
NFPR = FS11 - FT0 + 1,
NGPR = RA - T0 + 1,
NGPS = A7 - T0 + 1,
NFPS = FA7 - FT0 + 1,
NCLR = (S11 - S1 + 1) + (FS11 - FS0 + 1),
};
MAKESURE(reg_not_tmp, FT11 < (int)Tmp0);
struct Rv64Op {
char imm;
};
/* targ.c */
extern int rv64_rsave[];
extern int rv64_rclob[];
extern Rv64Op rv64_op[];
/* abi.c */
bits rv64_retregs(Ref, int[2]);
bits rv64_argregs(Ref, int[2]);
void rv64_abi(Fn *);
/* isel.c */
void rv64_isel(Fn *);
/* emit.c */
void rv64_emitfn(Fn *, FILE *);

569
src/qbe/rv64/emit.c Normal file
View File

@@ -0,0 +1,569 @@
#include "all.h"
enum {
Ki = -1, /* matches Kw and Kl */
Ka = -2, /* matches all classes */
};
static struct {
short op;
short cls;
char *fmt;
} omap[] = {
{ Oadd, Ki, "add%k %=, %0, %1" },
{ Oadd, Ka, "fadd.%k %=, %0, %1" },
{ Osub, Ki, "sub%k %=, %0, %1" },
{ Osub, Ka, "fsub.%k %=, %0, %1" },
{ Oneg, Ki, "neg%k %=, %0" },
{ Oneg, Ka, "fneg.%k %=, %0" },
{ Odiv, Ki, "div%k %=, %0, %1" },
{ Odiv, Ka, "fdiv.%k %=, %0, %1" },
{ Orem, Ki, "rem%k %=, %0, %1" },
{ Orem, Kl, "rem %=, %0, %1" },
{ Oudiv, Ki, "divu%k %=, %0, %1" },
{ Ourem, Ki, "remu%k %=, %0, %1" },
{ Omul, Ki, "mul%k %=, %0, %1" },
{ Omul, Ka, "fmul.%k %=, %0, %1" },
{ Oand, Ki, "and %=, %0, %1" },
{ Oor, Ki, "or %=, %0, %1" },
{ Oxor, Ki, "xor %=, %0, %1" },
{ Osar, Ki, "sra%k %=, %0, %1" },
{ Oshr, Ki, "srl%k %=, %0, %1" },
{ Oshl, Ki, "sll%k %=, %0, %1" },
{ Ocsltl, Ki, "slt %=, %0, %1" },
{ Ocultl, Ki, "sltu %=, %0, %1" },
{ Oceqs, Ki, "feq.s %=, %0, %1" },
{ Ocges, Ki, "fge.s %=, %0, %1" },
{ Ocgts, Ki, "fgt.s %=, %0, %1" },
{ Ocles, Ki, "fle.s %=, %0, %1" },
{ Oclts, Ki, "flt.s %=, %0, %1" },
{ Oceqd, Ki, "feq.d %=, %0, %1" },
{ Ocged, Ki, "fge.d %=, %0, %1" },
{ Ocgtd, Ki, "fgt.d %=, %0, %1" },
{ Ocled, Ki, "fle.d %=, %0, %1" },
{ Ocltd, Ki, "flt.d %=, %0, %1" },
{ Ostoreb, Kw, "sb %0, %M1" },
{ Ostoreh, Kw, "sh %0, %M1" },
{ Ostorew, Kw, "sw %0, %M1" },
{ Ostorel, Ki, "sd %0, %M1" },
{ Ostores, Kw, "fsw %0, %M1" },
{ Ostored, Kw, "fsd %0, %M1" },
{ Oloadsb, Ki, "lb %=, %M0" },
{ Oloadub, Ki, "lbu %=, %M0" },
{ Oloadsh, Ki, "lh %=, %M0" },
{ Oloaduh, Ki, "lhu %=, %M0" },
{ Oloadsw, Ki, "lw %=, %M0" },
/* riscv64 always sign-extends 32-bit
* values stored in 64-bit registers
*/
{ Oloaduw, Kw, "lw %=, %M0" },
{ Oloaduw, Kl, "lwu %=, %M0" },
{ Oload, Kw, "lw %=, %M0" },
{ Oload, Kl, "ld %=, %M0" },
{ Oload, Ks, "flw %=, %M0" },
{ Oload, Kd, "fld %=, %M0" },
{ Oextsb, Ki, "sext.b %=, %0" },
{ Oextub, Ki, "zext.b %=, %0" },
{ Oextsh, Ki, "sext.h %=, %0" },
{ Oextuh, Ki, "zext.h %=, %0" },
{ Oextsw, Kl, "sext.w %=, %0" },
{ Oextuw, Kl, "zext.w %=, %0" },
{ Otruncd, Ks, "fcvt.s.d %=, %0" },
{ Oexts, Kd, "fcvt.d.s %=, %0" },
{ Ostosi, Kw, "fcvt.w.s %=, %0, rtz" },
{ Ostosi, Kl, "fcvt.l.s %=, %0, rtz" },
{ Ostoui, Kw, "fcvt.wu.s %=, %0, rtz" },
{ Ostoui, Kl, "fcvt.lu.s %=, %0, rtz" },
{ Odtosi, Kw, "fcvt.w.d %=, %0, rtz" },
{ Odtosi, Kl, "fcvt.l.d %=, %0, rtz" },
{ Odtoui, Kw, "fcvt.wu.d %=, %0, rtz" },
{ Odtoui, Kl, "fcvt.lu.d %=, %0, rtz" },
{ Oswtof, Ka, "fcvt.%k.w %=, %0" },
{ Ouwtof, Ka, "fcvt.%k.wu %=, %0" },
{ Osltof, Ka, "fcvt.%k.l %=, %0" },
{ Oultof, Ka, "fcvt.%k.lu %=, %0" },
{ Ocast, Kw, "fmv.x.w %=, %0" },
{ Ocast, Kl, "fmv.x.d %=, %0" },
{ Ocast, Ks, "fmv.w.x %=, %0" },
{ Ocast, Kd, "fmv.d.x %=, %0" },
{ Ocopy, Ki, "mv %=, %0" },
{ Ocopy, Ka, "fmv.%k %=, %0" },
{ Oswap, Ki, "mv %?, %0\n\tmv %0, %1\n\tmv %1, %?" },
{ Oswap, Ka, "fmv.%k %?, %0\n\tfmv.%k %0, %1\n\tfmv.%k %1, %?" },
{ Oreqz, Ki, "seqz %=, %0" },
{ Ornez, Ki, "snez %=, %0" },
{ Ocall, Kw, "jalr %0" },
{ NOp, 0, 0 }
};
static char *rname[] = {
[FP] = "fp",
[SP] = "sp",
[GP] = "gp",
[TP] = "tp",
[RA] = "ra",
[T0] = "t0", "t1", "t2", "t3", "t4", "t5",
[A0] = "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
[S1] = "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8",
"s9", "s10", "s11",
[FT0] = "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7",
"ft8", "ft9", "ft10",
[FA0] = "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7",
[FS0] = "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7",
"fs8", "fs9", "fs10", "fs11",
[T6] = "t6",
[FT11] = "ft11",
};
static int64_t
slot(Ref r, Fn *fn)
{
int s;
s = rsval(r);
assert(s <= fn->slot);
if (s < 0)
return 8 * -s;
else
return -4 * (fn->slot - s);
}
static void
emitaddr(Con *c, FILE *f)
{
assert(c->sym.type == SGlo);
fputs(str(c->sym.id), f);
if (c->bits.i)
fprintf(f, "+%"PRIi64, c->bits.i);
}
static void
emitf(char *s, Ins *i, Fn *fn, FILE *f)
{
static char clschr[] = {'w', 'l', 's', 'd'};
Ref r;
int k, c;
Con *pc;
int64_t offset;
fputc('\t', f);
for (;;) {
k = i->cls;
while ((c = *s++) != '%')
if (!c) {
fputc('\n', f);
return;
} else
fputc(c, f);
switch ((c = *s++)) {
default:
die("invalid escape");
case '?':
if (KBASE(k) == 0)
fputs("t6", f);
else
fputs("ft11", f);
break;
case 'k':
if (i->cls != Kl)
fputc(clschr[i->cls], f);
break;
case '=':
case '0':
r = c == '=' ? i->to : i->arg[0];
assert(isreg(r));
fputs(rname[r.val], f);
break;
case '1':
r = i->arg[1];
switch (rtype(r)) {
default:
die("invalid second argument");
case RTmp:
assert(isreg(r));
fputs(rname[r.val], f);
break;
case RCon:
pc = &fn->con[r.val];
assert(pc->type == CBits);
assert(pc->bits.i >= -2048 && pc->bits.i < 2048);
fprintf(f, "%d", (int)pc->bits.i);
break;
}
break;
case 'M':
c = *s++;
assert(c == '0' || c == '1');
r = i->arg[c - '0'];
switch (rtype(r)) {
default:
die("invalid address argument");
case RTmp:
fprintf(f, "0(%s)", rname[r.val]);
break;
case RCon:
pc = &fn->con[r.val];
assert(pc->type == CAddr);
emitaddr(pc, f);
if (isstore(i->op)
|| (isload(i->op) && KBASE(i->cls) == 1)) {
/* store (and float load)
* pseudo-instructions need a
* temporary register in which to
* load the address
*/
fprintf(f, ", t6");
}
break;
case RSlot:
offset = slot(r, fn);
assert(offset >= -2048 && offset <= 2047);
fprintf(f, "%d(fp)", (int)offset);
break;
}
break;
}
}
}
static void
loadaddr(Con *c, char *rn, FILE *f)
{
char off[32];
if (c->sym.type == SThr) {
if (c->bits.i)
sprintf(off, "+%"PRIi64, c->bits.i);
else
off[0] = 0;
fprintf(f, "\tlui %s, %%tprel_hi(%s)%s\n",
rn, str(c->sym.id), off);
fprintf(f, "\tadd %s, %s, tp, %%tprel_add(%s)%s\n",
rn, rn, str(c->sym.id), off);
fprintf(f, "\taddi %s, %s, %%tprel_lo(%s)%s\n",
rn, rn, str(c->sym.id), off);
} else {
fprintf(f, "\tla %s, ", rn);
emitaddr(c, f);
fputc('\n', f);
}
}
static void
loadcon(Con *c, int r, int k, FILE *f)
{
char *rn;
int64_t n;
rn = rname[r];
switch (c->type) {
case CAddr:
loadaddr(c, rn, f);
break;
case CBits:
n = c->bits.i;
if (!KWIDE(k))
n = (int32_t)n;
fprintf(f, "\tli %s, %"PRIi64"\n", rn, n);
break;
default:
die("invalid constant");
}
}
static void
fixmem(Ref *pr, Fn *fn, FILE *f)
{
Ref r;
int64_t s;
Con *c;
r = *pr;
if (rtype(r) == RCon) {
c = &fn->con[r.val];
if (c->type == CAddr)
if (c->sym.type == SThr) {
loadcon(c, T6, Kl, f);
*pr = TMP(T6);
}
}
if (rtype(r) == RSlot) {
s = slot(r, fn);
if (s < -2048 || s > 2047) {
fprintf(f, "\tli t6, %"PRId64"\n", s);
fprintf(f, "\tadd t6, fp, t6\n");
*pr = TMP(T6);
}
}
}
static void
emitins(Ins *i, Fn *fn, FILE *f)
{
int o;
char *rn;
int64_t s;
Con *con;
switch (i->op) {
default:
if (isload(i->op))
fixmem(&i->arg[0], fn, f);
else if (isstore(i->op))
fixmem(&i->arg[1], fn, f);
Table:
/* most instructions are just pulled out of
* the table omap[], some special cases are
* detailed below */
for (o=0;; o++) {
/* this linear search should really be a binary
* search */
if (omap[o].op == NOp)
die("no match for %s(%c)",
optab[i->op].name, "wlsd"[i->cls]);
if (omap[o].op == i->op)
if (omap[o].cls == i->cls || omap[o].cls == Ka
|| (omap[o].cls == Ki && KBASE(i->cls) == 0))
break;
}
emitf(omap[o].fmt, i, fn, f);
break;
case Ocopy:
if (req(i->to, i->arg[0]))
break;
if (rtype(i->to) == RSlot) {
switch (rtype(i->arg[0])) {
case RSlot:
case RCon:
die("unimplemented");
break;
default:
assert(isreg(i->arg[0]));
i->arg[1] = i->to;
i->to = R;
switch (i->cls) {
case Kw: i->op = Ostorew; break;
case Kl: i->op = Ostorel; break;
case Ks: i->op = Ostores; break;
case Kd: i->op = Ostored; break;
}
fixmem(&i->arg[1], fn, f);
goto Table;
}
break;
}
assert(isreg(i->to));
switch (rtype(i->arg[0])) {
case RCon:
loadcon(&fn->con[i->arg[0].val], i->to.val, i->cls, f);
break;
case RSlot:
i->op = Oload;
fixmem(&i->arg[0], fn, f);
goto Table;
default:
assert(isreg(i->arg[0]));
goto Table;
}
break;
case Onop:
break;
case Oaddr:
assert(rtype(i->arg[0]) == RSlot);
rn = rname[i->to.val];
s = slot(i->arg[0], fn);
if (-s < 2048) {
fprintf(f, "\tadd %s, fp, %"PRId64"\n", rn, s);
} else {
fprintf(f,
"\tli %s, %"PRId64"\n"
"\tadd %s, fp, %s\n",
rn, s, rn, rn
);
}
break;
case Ocall:
switch (rtype(i->arg[0])) {
case RCon:
con = &fn->con[i->arg[0].val];
if (con->type != CAddr
|| con->sym.type != SGlo
|| con->bits.i)
goto Invalid;
fprintf(f, "\tcall %s\n", str(con->sym.id));
break;
case RTmp:
emitf("jalr %0", i, fn, f);
break;
default:
Invalid:
die("invalid call argument");
}
break;
case Osalloc:
emitf("sub sp, sp, %0", i, fn, f);
if (!req(i->to, R))
emitf("mv %=, sp", i, fn, f);
break;
case Odbgloc:
emitdbgloc(i->arg[0].val, i->arg[1].val, f);
break;
}
}
/*
Stack-frame layout:
+=============+
| varargs |
| save area |
+-------------+
| saved ra |
| saved fp |
+-------------+ <- fp
| ... |
| spill slots |
| ... |
+-------------+
| ... |
| locals |
| ... |
+-------------+
| padding |
+-------------+
| callee-save |
| registers |
+=============+
*/
void
rv64_emitfn(Fn *fn, FILE *f)
{
static int id0;
int lbl, neg, off, frame, *pr, r;
Blk *b, *s;
Ins *i, ii;
emitfnlnk(fn->name, &fn->lnk, f);
if (fn->vararg) {
/* TODO: only need space for registers
* unused by named arguments
*/
fprintf(f, "\tadd sp, sp, -64\n");
for (r=A0; r<=A7; r++)
fprintf(f,
"\tsd %s, %d(sp)\n",
rname[r], 8 * (r - A0)
);
}
fprintf(f, "\tsd fp, -16(sp)\n");
fprintf(f, "\tsd ra, -8(sp)\n");
fprintf(f, "\tadd fp, sp, -16\n");
frame = (16 + 4 * fn->slot + 15) & ~15;
for (pr=rv64_rclob; *pr>=0; pr++) {
if (fn->reg & BIT(*pr))
frame += 8;
}
frame = (frame + 15) & ~15;
if (frame <= 2048)
fprintf(f,
"\tadd sp, sp, -%d\n",
frame
);
else
fprintf(f,
"\tli t6, %d\n"
"\tsub sp, sp, t6\n",
frame
);
for (pr=rv64_rclob, off=0; *pr>=0; pr++) {
if (fn->reg & BIT(*pr)) {
fprintf(f,
"\t%s %s, %d(sp)\n",
*pr < FT0 ? "sd" : "fsd",
rname[*pr], off
);
off += 8;
}
}
for (lbl=0, b=fn->start; b; b=b->link) {
if (lbl || b->npred > 1)
fprintf(f, ".L%d:\n", id0+b->id);
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(i, fn, f);
lbl = 1;
switch (b->jmp.type) {
case Jhlt:
fprintf(f, "\tebreak\n");
break;
case Jret0:
if (fn->dynalloc) {
if (frame - 16 <= 2048)
fprintf(f,
"\tadd sp, fp, -%d\n",
frame - 16
);
else
fprintf(f,
"\tli t6, %d\n"
"\tsub sp, fp, t6\n",
frame - 16
);
}
for (pr=rv64_rclob, off=0; *pr>=0; pr++) {
if (fn->reg & BIT(*pr)) {
fprintf(f,
"\t%s %s, %d(sp)\n",
*pr < FT0 ? "ld" : "fld",
rname[*pr], off
);
off += 8;
}
}
fprintf(f,
"\tadd sp, fp, %d\n"
"\tld ra, 8(fp)\n"
"\tld fp, 0(fp)\n"
"\tret\n",
16 + fn->vararg * 64
);
break;
case Jjmp:
Jmp:
if (b->s1 != b->link)
fprintf(f, "\tj .L%d\n", id0+b->s1->id);
else
lbl = 0;
break;
case Jjnz:
neg = 0;
if (b->link == b->s2) {
s = b->s1;
b->s1 = b->s2;
b->s2 = s;
neg = 1;
}
if (rtype(b->jmp.arg) == RSlot) {
ii.arg[0] = b->jmp.arg;
emitf("lw t6, %M0", &ii, fn, f);
b->jmp.arg = TMP(T6);
}
assert(isreg(b->jmp.arg));
fprintf(f,
"\tb%sz %s, .L%d\n",
neg ? "ne" : "eq",
rname[b->jmp.arg.val],
id0+b->s2->id
);
goto Jmp;
}
}
id0 += fn->nblk;
elf_emitfnfin(fn->name, f);
}

255
src/qbe/rv64/isel.c Normal file
View File

@@ -0,0 +1,255 @@
#include "all.h"
static int
memarg(Ref *r, int op, Ins *i)
{
if (isload(op) || op == Ocall)
return r == &i->arg[0];
if (isstore(op))
return r == &i->arg[1];
return 0;
}
static int
immarg(Ref *r, int op, Ins *i)
{
return rv64_op[op].imm && r == &i->arg[1];
}
static void
fixarg(Ref *r, int k, Ins *i, Fn *fn)
{
char buf[32];
Ref r0, r1;
int s, n, op;
Con *c;
r0 = r1 = *r;
op = i ? i->op : Ocopy;
switch (rtype(r0)) {
case RCon:
c = &fn->con[r0.val];
if (c->type == CAddr && memarg(r, op, i))
break;
if (KBASE(k) == 0)
if (c->type == CBits && immarg(r, op, i))
if (-2048 <= c->bits.i && c->bits.i < 2048)
break;
r1 = newtmp("isel", k, fn);
if (KBASE(k) == 1) {
/* load floating points from memory
* slots, they can't be used as
* immediates
*/
assert(c->type == CBits);
n = stashbits(c->bits.i, KWIDE(k) ? 8 : 4);
vgrow(&fn->con, ++fn->ncon);
c = &fn->con[fn->ncon-1];
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
*c = (Con){.type = CAddr};
c->sym.id = intern(buf);
emit(Oload, k, r1, CON(c-fn->con), R);
break;
}
emit(Ocopy, k, r1, r0, R);
break;
case RTmp:
if (isreg(r0))
break;
s = fn->tmp[r0.val].slot;
if (s != -1) {
/* aggregate passed by value on
* stack, or fast local address,
* replace with slot if we can
*/
if (memarg(r, op, i)) {
r1 = SLOT(s);
break;
}
r1 = newtmp("isel", k, fn);
emit(Oaddr, k, r1, SLOT(s), R);
break;
}
if (k == Kw && fn->tmp[r0.val].cls == Kl) {
/* TODO: this sign extension isn't needed
* for 32-bit arithmetic instructions
*/
r1 = newtmp("isel", k, fn);
emit(Oextsw, Kl, r1, r0, R);
} else {
assert(k == fn->tmp[r0.val].cls);
}
break;
}
*r = r1;
}
static void
negate(Ref *pr, Fn *fn)
{
Ref r;
r = newtmp("isel", Kw, fn);
emit(Oxor, Kw, *pr, r, getcon(1, fn));
*pr = r;
}
static void
selcmp(Ins i, int k, int op, Fn *fn)
{
Ins *icmp;
Ref r, r0, r1;
int sign, swap, neg;
switch (op) {
case Cieq:
r = newtmp("isel", k, fn);
emit(Oreqz, i.cls, i.to, r, R);
emit(Oxor, k, r, i.arg[0], i.arg[1]);
icmp = curi;
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
return;
case Cine:
r = newtmp("isel", k, fn);
emit(Ornez, i.cls, i.to, r, R);
emit(Oxor, k, r, i.arg[0], i.arg[1]);
icmp = curi;
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
return;
case Cisge: sign = 1, swap = 0, neg = 1; break;
case Cisgt: sign = 1, swap = 1, neg = 0; break;
case Cisle: sign = 1, swap = 1, neg = 1; break;
case Cislt: sign = 1, swap = 0, neg = 0; break;
case Ciuge: sign = 0, swap = 0, neg = 1; break;
case Ciugt: sign = 0, swap = 1, neg = 0; break;
case Ciule: sign = 0, swap = 1, neg = 1; break;
case Ciult: sign = 0, swap = 0, neg = 0; break;
case NCmpI+Cfeq:
case NCmpI+Cfge:
case NCmpI+Cfgt:
case NCmpI+Cfle:
case NCmpI+Cflt:
swap = 0, neg = 0;
break;
case NCmpI+Cfuo:
negate(&i.to, fn);
/* fall through */
case NCmpI+Cfo:
r0 = newtmp("isel", i.cls, fn);
r1 = newtmp("isel", i.cls, fn);
emit(Oand, i.cls, i.to, r0, r1);
op = KWIDE(k) ? Oceqd : Oceqs;
emit(op, i.cls, r0, i.arg[0], i.arg[0]);
icmp = curi;
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
emit(op, i.cls, r1, i.arg[1], i.arg[1]);
icmp = curi;
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
return;
case NCmpI+Cfne:
swap = 0, neg = 1;
i.op = KWIDE(k) ? Oceqd : Oceqs;
break;
default:
assert(0 && "unknown comparison");
}
if (op < NCmpI)
i.op = sign ? Ocsltl : Ocultl;
if (swap) {
r = i.arg[0];
i.arg[0] = i.arg[1];
i.arg[1] = r;
}
if (neg)
negate(&i.to, fn);
emiti(i);
icmp = curi;
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
}
static void
sel(Ins i, Fn *fn)
{
Ins *i0;
int ck, cc;
if (INRANGE(i.op, Oalloc, Oalloc1)) {
i0 = curi - 1;
salloc(i.to, i.arg[0], fn);
fixarg(&i0->arg[0], Kl, i0, fn);
return;
}
if (iscmp(i.op, &ck, &cc)) {
selcmp(i, ck, cc, fn);
return;
}
if (i.op != Onop) {
emiti(i);
i0 = curi; /* fixarg() can change curi */
fixarg(&i0->arg[0], argcls(&i, 0), i0, fn);
fixarg(&i0->arg[1], argcls(&i, 1), i0, fn);
}
}
static void
seljmp(Blk *b, Fn *fn)
{
/* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */
if (b->jmp.type == Jjnz)
fixarg(&b->jmp.arg, Kw, 0, fn);
}
void
rv64_isel(Fn *fn)
{
Blk *b, **sb;
Ins *i;
Phi *p;
uint n;
int al;
int64_t sz;
/* assign slots to fast allocs */
b = fn->start;
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
for (i=b->ins; i<&b->ins[b->nins]; i++)
if (i->op == al) {
if (rtype(i->arg[0]) != RCon)
break;
sz = fn->con[i->arg[0].val].bits.i;
if (sz < 0 || sz >= INT_MAX-15)
err("invalid alloc size %"PRId64, sz);
sz = (sz + n-1) & -n;
sz /= 4;
if (sz > INT_MAX - fn->slot)
die("alloc too large");
fn->tmp[i->to.val].slot = fn->slot;
fn->slot += sz;
*i = (Ins){.op = Onop};
}
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
for (p=(*sb)->phi; p; p=p->link) {
for (n=0; p->blk[n] != b; n++)
assert(n+1 < p->narg);
fixarg(&p->arg[n], p->cls, 0, fn);
}
seljmp(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
sel(*--i, fn);
idup(b, curi, &insb[NIns]-curi);
}
if (debug['I']) {
fprintf(stderr, "\n> After instruction selection:\n");
printfn(fn, stderr);
}
}

57
src/qbe/rv64/targ.c Normal file
View File

@@ -0,0 +1,57 @@
#include "all.h"
Rv64Op rv64_op[NOp] = {
#define O(op, t, x) [O##op] =
#define V(imm) { imm },
#include "../ops.h"
};
int rv64_rsave[] = {
T0, T1, T2, T3, T4, T5,
A0, A1, A2, A3, A4, A5, A6, A7,
FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7,
FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7,
FT8, FT9, FT10,
-1
};
int rv64_rclob[] = {
S1, S2, S3, S4, S5, S6, S7,
S8, S9, S10, S11,
FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7,
FS8, FS9, FS10, FS11,
-1
};
#define RGLOB (BIT(FP) | BIT(SP) | BIT(GP) | BIT(TP) | BIT(RA))
static int
rv64_memargs(int op)
{
(void)op;
return 0;
}
Target T_rv64 = {
.name = "rv64",
.gpr0 = T0,
.ngpr = NGPR,
.fpr0 = FT0,
.nfpr = NFPR,
.rglob = RGLOB,
.nrglob = 5,
.rsave = rv64_rsave,
.nrsave = {NGPS, NFPS},
.retregs = rv64_retregs,
.argregs = rv64_argregs,
.memargs = rv64_memargs,
.abi0 = elimsb,
.abi1 = rv64_abi,
.isel = rv64_isel,
.emitfn = rv64_emitfn,
.emitfin = elf_emitfin,
.asloc = ".L",
.cansel = 0,
};
MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int));
MAKESURE(rclob_size_ok, sizeof rv64_rclob == (NCLR+1) * sizeof(int));

124
src/qbe/simpl.c Normal file
View File

@@ -0,0 +1,124 @@
#include "all.h"
static void
blit(Ref sd[2], int sz, Fn *fn)
{
struct { int st, ld, cls, size; } *p, tbl[] = {
{ Ostorel, Oload, Kl, 8 },
{ Ostorew, Oload, Kw, 4 },
{ Ostoreh, Oloaduh, Kw, 2 },
{ Ostoreb, Oloadub, Kw, 1 }
};
Ref r, r1, ro;
int off, fwd, n;
fwd = sz >= 0;
sz = abs(sz);
off = fwd ? sz : 0;
for (p=tbl; sz; p++)
for (n=p->size; sz>=n; sz-=n) {
off -= fwd ? n : 0;
r = newtmp("blt", Kl, fn);
r1 = newtmp("blt", Kl, fn);
ro = getcon(off, fn);
emit(p->st, 0, R, r, r1);
emit(Oadd, Kl, r1, sd[1], ro);
r1 = newtmp("blt", Kl, fn);
emit(p->ld, p->cls, r, r1, R);
emit(Oadd, Kl, r1, sd[0], ro);
off += fwd ? 0 : n;
}
}
static int
ulog2_tab64[64] = {
63, 0, 1, 41, 37, 2, 16, 42,
38, 29, 32, 3, 12, 17, 43, 55,
39, 35, 30, 53, 33, 21, 4, 23,
13, 9, 18, 6, 25, 44, 48, 56,
62, 40, 36, 15, 28, 31, 11, 54,
34, 52, 20, 22, 8, 5, 24, 47,
61, 14, 27, 10, 51, 19, 7, 46,
60, 26, 50, 45, 59, 49, 58, 57,
};
static int
ulog2(uint64_t pow2)
{
return ulog2_tab64[(pow2 * 0x5b31ab928877a7e) >> 58];
}
static int
ispow2(uint64_t v)
{
return v && (v & (v - 1)) == 0;
}
static void
ins(Ins **pi, int *new, Blk *b, Fn *fn)
{
ulong ni;
Con *c;
Ins *i;
Ref r;
int n;
i = *pi;
/* simplify more instructions here;
* copy 0 into xor, bit rotations,
* etc. */
switch (i->op) {
case Oblit1:
assert(i > b->ins);
assert((i-1)->op == Oblit0);
if (!*new) {
curi = &insb[NIns];
ni = &b->ins[b->nins] - (i+1);
curi -= ni;
icpy(curi, i+1, ni);
*new = 1;
}
blit((i-1)->arg, rsval(i->arg[0]), fn);
*pi = i-1;
return;
case Oudiv:
case Ourem:
r = i->arg[1];
if (KBASE(i->cls) == 0)
if (rtype(r) == RCon) {
c = &fn->con[r.val];
if (c->type == CBits)
if (ispow2(c->bits.i)) {
n = ulog2(c->bits.i);
if (i->op == Ourem) {
i->op = Oand;
i->arg[1] = getcon((1ull<<n) - 1, fn);
} else {
i->op = Oshr;
i->arg[1] = getcon(n, fn);
}
}
}
break;
}
if (*new)
emiti(*i);
}
void
simpl(Fn *fn)
{
Blk *b;
Ins *i;
int new;
for (b=fn->start; b; b=b->link) {
new = 0;
for (i=&b->ins[b->nins]; i!=b->ins;) {
--i;
ins(&i, &new, b, fn);
}
if (new)
idup(b, curi, &insb[NIns]-curi);
}
}

531
src/qbe/spill.c Normal file
View File

@@ -0,0 +1,531 @@
#include "all.h"
static void
aggreg(Blk *hd, Blk *b)
{
int k;
/* aggregate looping information at
* loop headers */
bsunion(hd->gen, b->gen);
for (k=0; k<2; k++)
if (b->nlive[k] > hd->nlive[k])
hd->nlive[k] = b->nlive[k];
}
static void
tmpuse(Ref r, int use, int loop, Fn *fn)
{
Mem *m;
Tmp *t;
if (rtype(r) == RMem) {
m = &fn->mem[r.val];
tmpuse(m->base, 1, loop, fn);
tmpuse(m->index, 1, loop, fn);
}
else if (rtype(r) == RTmp && r.val >= Tmp0) {
t = &fn->tmp[r.val];
t->nuse += use;
t->ndef += !use;
t->cost += loop;
}
}
/* evaluate spill costs of temporaries,
* this also fills usage information
* requires rpo, preds
*/
void
fillcost(Fn *fn)
{
int n;
uint a;
Blk *b;
Ins *i;
Tmp *t;
Phi *p;
loopiter(fn, aggreg);
if (debug['S']) {
fprintf(stderr, "\n> Loop information:\n");
for (b=fn->start; b; b=b->link) {
for (a=0; a<b->npred; ++a)
if (b->id <= b->pred[a]->id)
break;
if (a != b->npred) {
fprintf(stderr, "\t%-10s", b->name);
fprintf(stderr, " (% 3d ", b->nlive[0]);
fprintf(stderr, "% 3d) ", b->nlive[1]);
dumpts(b->gen, fn->tmp, stderr);
}
}
}
for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
t->cost = t-fn->tmp < Tmp0 ? UINT_MAX : 0;
t->nuse = 0;
t->ndef = 0;
}
for (b=fn->start; b; b=b->link) {
for (p=b->phi; p; p=p->link) {
t = &fn->tmp[p->to.val];
tmpuse(p->to, 0, 0, fn);
for (a=0; a<p->narg; a++) {
n = p->blk[a]->loop;
t->cost += n;
tmpuse(p->arg[a], 1, n, fn);
}
}
n = b->loop;
for (i=b->ins; i<&b->ins[b->nins]; i++) {
tmpuse(i->to, 0, n, fn);
tmpuse(i->arg[0], 1, n, fn);
tmpuse(i->arg[1], 1, n, fn);
}
tmpuse(b->jmp.arg, 1, n, fn);
}
if (debug['S']) {
fprintf(stderr, "\n> Spill costs:\n");
for (n=Tmp0; n<fn->ntmp; n++)
fprintf(stderr, "\t%-10s %d\n",
fn->tmp[n].name,
fn->tmp[n].cost);
fprintf(stderr, "\n");
}
}
static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
static Tmp *tmp; /* current temporaries (for tcmpX) */
static int ntmp; /* current # of temps (for limit) */
static int locs; /* stack size used by locals */
static int slot4; /* next slot of 4 bytes */
static int slot8; /* ditto, 8 bytes */
static BSet mask[2][1]; /* class masks */
static int
tcmp0(const void *pa, const void *pb)
{
uint ca, cb;
ca = tmp[*(int *)pa].cost;
cb = tmp[*(int *)pb].cost;
return (cb < ca) ? -1 : (cb > ca);
}
static int
tcmp1(const void *pa, const void *pb)
{
int c;
c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
return c ? c : tcmp0(pa, pb);
}
static Ref
slot(int t)
{
int s;
assert(t >= Tmp0 && "cannot spill register");
s = tmp[t].slot;
if (s == -1) {
/* specific to NAlign == 3 */
/* nice logic to pack stack slots
* on demand, there can be only
* one hole and slot4 points to it
*
* invariant: slot4 <= slot8
*/
if (KWIDE(tmp[t].cls)) {
s = slot8;
if (slot4 == slot8)
slot4 += 2;
slot8 += 2;
} else {
s = slot4;
if (slot4 == slot8) {
slot8 += 2;
slot4 += 1;
} else
slot4 = slot8;
}
s += locs;
tmp[t].slot = s;
}
return SLOT(s);
}
/* restricts b to hold at most k
* temporaries, preferring those
* present in f (if given), then
* those with the largest spill
* cost
*/
static void
limit(BSet *b, int k, BSet *f)
{
static int *tarr, maxt;
int i, t, nt;
nt = bscount(b);
if (nt <= k)
return;
if (nt > maxt) {
free(tarr);
tarr = emalloc(nt * sizeof tarr[0]);
maxt = nt;
}
for (i=0, t=0; bsiter(b, &t); t++) {
bsclr(b, t);
tarr[i++] = t;
}
if (nt > 1) {
if (!f)
qsort(tarr, nt, sizeof tarr[0], tcmp0);
else {
fst = f;
qsort(tarr, nt, sizeof tarr[0], tcmp1);
}
}
for (i=0; i<k && i<nt; i++)
bsset(b, tarr[i]);
for (; i<nt; i++)
slot(tarr[i]);
}
/* spills temporaries to fit the
* target limits using the same
* preferences as limit(); assumes
* that k1 gprs and k2 fprs are
* currently in use
*/
static void
limit2(BSet *b1, int k1, int k2, BSet *f)
{
BSet b2[1];
bsinit(b2, ntmp); /* todo, free those */
bscopy(b2, b1);
bsinter(b1, mask[0]);
bsinter(b2, mask[1]);
limit(b1, T.ngpr - k1, f);
limit(b2, T.nfpr - k2, f);
bsunion(b1, b2);
}
static void
sethint(BSet *u, bits r)
{
int t;
for (t=Tmp0; bsiter(u, &t); t++)
tmp[phicls(t, tmp)].hint.m |= r;
}
/* reloads temporaries in u that are
* not in v from their slots
*/
static void
reloads(BSet *u, BSet *v)
{
int t;
for (t=Tmp0; bsiter(u, &t); t++)
if (!bshas(v, t))
emit(Oload, tmp[t].cls, TMP(t), slot(t), R);
}
static void
store(Ref r, int s)
{
if (s != -1)
emit(Ostorew + tmp[r.val].cls, 0, R, r, SLOT(s));
}
static int
regcpy(Ins *i)
{
return i->op == Ocopy && isreg(i->arg[0]);
}
static Ins *
dopm(Blk *b, Ins *i, BSet *v)
{
int n, t;
BSet u[1];
Ins *i1;
bits r;
bsinit(u, ntmp); /* todo, free those */
/* consecutive copies from
* registers need to be handled
* as one large instruction
*
* fixme: there is an assumption
* that calls are always followed
* by copy instructions here, this
* might not be true if previous
* passes change
*/
i1 = ++i;
do {
i--;
t = i->to.val;
if (!req(i->to, R))
if (bshas(v, t)) {
bsclr(v, t);
store(i->to, tmp[t].slot);
}
bsset(v, i->arg[0].val);
} while (i != b->ins && regcpy(i-1));
bscopy(u, v);
if (i != b->ins && (i-1)->op == Ocall) {
v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
limit2(v, T.nrsave[0], T.nrsave[1], 0);
for (n=0, r=0; T.rsave[n]>=0; n++)
r |= BIT(T.rsave[n]);
v->t[0] |= T.argregs((i-1)->arg[1], 0);
} else {
limit2(v, 0, 0, 0);
r = v->t[0];
}
sethint(v, r);
reloads(u, v);
do
emiti(*--i1);
while (i1 != i);
return i;
}
static void
merge(BSet *u, Blk *bu, BSet *v, Blk *bv)
{
int t;
if (bu->loop <= bv->loop)
bsunion(u, v);
else
for (t=0; bsiter(v, &t); t++)
if (tmp[t].slot == -1)
bsset(u, t);
}
/* spill code insertion
* requires spill costs, rpo, liveness
*
* Note: this will replace liveness
* information (in, out) with temporaries
* that must be in registers at block
* borders
*
* Be careful with:
* - Ocopy instructions to ensure register
* constraints
*/
void
spill(Fn *fn)
{
Blk *b, *s1, *s2, *hd, **bp;
int j, l, t, k, lvarg[2];
uint n;
BSet u[1], v[1], w[1];
Ins *i;
Phi *p;
Mem *m;
bits r;
tmp = fn->tmp;
ntmp = fn->ntmp;
bsinit(u, ntmp);
bsinit(v, ntmp);
bsinit(w, ntmp);
bsinit(mask[0], ntmp);
bsinit(mask[1], ntmp);
locs = fn->slot;
slot4 = 0;
slot8 = 0;
for (t=0; t<ntmp; t++) {
k = 0;
if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
k = 1;
if (t >= Tmp0)
k = KBASE(tmp[t].cls);
bsset(mask[k], t);
}
for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
b = *--bp;
/* invariant: all blocks with bigger rpo got
* their in,out updated. */
/* 1. find temporaries in registers at
* the end of the block (put them in v) */
curi = 0;
s1 = b->s1;
s2 = b->s2;
hd = 0;
if (s1 && s1->id <= b->id)
hd = s1;
if (s2 && s2->id <= b->id)
if (!hd || s2->id >= hd->id)
hd = s2;
if (hd) {
/* back-edge */
bszero(v);
hd->gen->t[0] |= T.rglob; /* don't spill registers */
for (k=0; k<2; k++) {
n = k == 0 ? T.ngpr : T.nfpr;
bscopy(u, b->out);
bsinter(u, mask[k]);
bscopy(w, u);
bsinter(u, hd->gen);
bsdiff(w, hd->gen);
if (bscount(u) < n) {
j = bscount(w); /* live through */
l = hd->nlive[k];
limit(w, n - (l - j), 0);
bsunion(u, w);
} else
limit(u, n, 0);
bsunion(v, u);
}
} else if (s1) {
/* avoid reloading temporaries
* in the middle of loops */
bszero(v);
liveon(w, b, s1);
merge(v, b, w, s1);
if (s2) {
liveon(u, b, s2);
merge(v, b, u, s2);
bsinter(w, u);
}
limit2(v, 0, 0, w);
} else {
bscopy(v, b->out);
if (rtype(b->jmp.arg) == RCall)
v->t[0] |= T.retregs(b->jmp.arg, 0);
}
if (rtype(b->jmp.arg) == RTmp) {
t = b->jmp.arg.val;
assert(KBASE(tmp[t].cls) == 0);
bsset(v, t);
limit2(v, 0, 0, NULL);
if (!bshas(v, t))
b->jmp.arg = slot(t);
}
for (t=Tmp0; bsiter(b->out, &t); t++)
if (!bshas(v, t))
slot(t);
bscopy(b->out, v);
/* 2. process the block instructions */
curi = &insb[NIns];
for (i=&b->ins[b->nins]; i!=b->ins;) {
i--;
if (regcpy(i)) {
i = dopm(b, i, v);
continue;
}
bszero(w);
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
t = i->to.val;
if (bshas(v, t))
bsclr(v, t);
else {
/* make sure we have a reg
* for the result */
assert(t >= Tmp0 && "dead reg");
bsset(v, t);
bsset(w, t);
}
}
j = T.memargs(i->op);
for (n=0; n<2; n++)
if (rtype(i->arg[n]) == RMem)
j--;
for (n=0; n<2; n++)
switch (rtype(i->arg[n])) {
case RMem:
t = i->arg[n].val;
m = &fn->mem[t];
if (rtype(m->base) == RTmp) {
bsset(v, m->base.val);
bsset(w, m->base.val);
}
if (rtype(m->index) == RTmp) {
bsset(v, m->index.val);
bsset(w, m->index.val);
}
break;
case RTmp:
t = i->arg[n].val;
lvarg[n] = bshas(v, t);
bsset(v, t);
if (j-- <= 0)
bsset(w, t);
break;
}
bscopy(u, v);
limit2(v, 0, 0, w);
for (n=0; n<2; n++)
if (rtype(i->arg[n]) == RTmp) {
t = i->arg[n].val;
if (!bshas(v, t)) {
/* do not reload if the
* argument is dead
*/
if (!lvarg[n])
bsclr(u, t);
i->arg[n] = slot(t);
}
}
reloads(u, v);
if (!req(i->to, R)) {
t = i->to.val;
store(i->to, tmp[t].slot);
if (t >= Tmp0)
/* in case i->to was a
* dead temporary */
bsclr(v, t);
}
emiti(*i);
r = v->t[0]; /* Tmp0 is NBit */
if (r)
sethint(v, r);
}
if (b == fn->start)
assert(v->t[0] == (T.rglob | fn->reg));
else
assert(v->t[0] == T.rglob);
for (p=b->phi; p; p=p->link) {
assert(rtype(p->to) == RTmp);
t = p->to.val;
if (bshas(v, t)) {
bsclr(v, t);
store(p->to, tmp[t].slot);
} else if (bshas(b->in, t))
/* only if the phi is live */
p->to = slot(p->to.val);
}
bscopy(b->in, v);
idup(b, curi, &insb[NIns]-curi);
}
/* align the locals to a 16 byte boundary */
/* specific to NAlign == 3 */
slot8 += slot8 & 3;
fn->slot += slot8;
if (debug['S']) {
fprintf(stderr, "\n> Block information:\n");
for (b=fn->start; b; b=b->link) {
fprintf(stderr, "\t%-10s (% 5d) ", b->name, b->loop);
dumpts(b->out, fn->tmp, stderr);
}
fprintf(stderr, "\n> After spilling:\n");
printfn(fn, stderr);
}
}

433
src/qbe/ssa.c Normal file
View File

@@ -0,0 +1,433 @@
#include "all.h"
#include <stdarg.h>
void
adduse(Tmp *tmp, int ty, Blk *b, ...)
{
Use *u;
int n;
va_list ap;
if (!tmp->use)
return;
va_start(ap, b);
n = tmp->nuse;
vgrow(&tmp->use, ++tmp->nuse);
u = &tmp->use[n];
u->type = ty;
u->bid = b->id;
switch (ty) {
case UPhi:
u->u.phi = va_arg(ap, Phi *);
break;
case UIns:
u->u.ins = va_arg(ap, Ins *);
break;
case UJmp:
break;
default:
die("unreachable");
}
va_end(ap);
}
/* fill usage, width, phi, and class information
* must not change .visit fields
*/
void
filluse(Fn *fn)
{
Blk *b;
Phi *p;
Ins *i;
int m, t, tp, w, x;
uint a;
Tmp *tmp;
tmp = fn->tmp;
for (t=Tmp0; t<fn->ntmp; t++) {
tmp[t].def = 0;
tmp[t].bid = -1u;
tmp[t].ndef = 0;
tmp[t].nuse = 0;
tmp[t].cls = 0;
tmp[t].phi = 0;
tmp[t].width = WFull;
if (tmp[t].use == 0)
tmp[t].use = vnew(0, sizeof(Use), PFn);
}
for (b=fn->start; b; b=b->link) {
for (p=b->phi; p; p=p->link) {
assert(rtype(p->to) == RTmp);
tp = p->to.val;
tmp[tp].bid = b->id;
tmp[tp].ndef++;
tmp[tp].cls = p->cls;
tp = phicls(tp, fn->tmp);
for (a=0; a<p->narg; a++)
if (rtype(p->arg[a]) == RTmp) {
t = p->arg[a].val;
adduse(&tmp[t], UPhi, b, p);
t = phicls(t, fn->tmp);
if (t != tp)
tmp[t].phi = tp;
}
}
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
w = WFull;
if (isparbh(i->op))
w = Wsb + (i->op - Oparsb);
if (isload(i->op) && i->op != Oload)
w = Wsb + (i->op - Oloadsb);
if (isext(i->op))
w = Wsb + (i->op - Oextsb);
if (iscmp(i->op, &x, &x))
w = Wub;
if (w == Wsw || w == Wuw)
if (i->cls == Kw)
w = WFull;
t = i->to.val;
tmp[t].width = w;
tmp[t].def = i;
tmp[t].bid = b->id;
tmp[t].ndef++;
tmp[t].cls = i->cls;
}
for (m=0; m<2; m++)
if (rtype(i->arg[m]) == RTmp) {
t = i->arg[m].val;
adduse(&tmp[t], UIns, b, i);
}
}
if (rtype(b->jmp.arg) == RTmp)
adduse(&tmp[b->jmp.arg.val], UJmp, b);
}
}
static Ref
refindex(int t, Fn *fn)
{
return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn);
}
static void
phiins(Fn *fn)
{
BSet u[1], defs[1];
Blk *a, *b, **blist, **be, **bp;
Ins *i;
Phi *p;
Use *use;
Ref r;
int t, nt, ok;
uint n, defb;
short k;
bsinit(u, fn->nblk);
bsinit(defs, fn->nblk);
blist = emalloc(fn->nblk * sizeof blist[0]);
be = &blist[fn->nblk];
nt = fn->ntmp;
for (t=Tmp0; t<nt; t++) {
fn->tmp[t].visit = 0;
if (fn->tmp[t].phi != 0)
continue;
if (fn->tmp[t].ndef == 1) {
ok = 1;
defb = fn->tmp[t].bid;
use = fn->tmp[t].use;
for (n=fn->tmp[t].nuse; n--; use++)
ok &= use->bid == defb;
if (ok || defb == fn->start->id)
continue;
}
bszero(u);
k = Kx;
bp = be;
for (b=fn->start; b; b=b->link) {
b->visit = 0;
r = R;
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (!req(r, R)) {
if (req(i->arg[0], TMP(t)))
i->arg[0] = r;
if (req(i->arg[1], TMP(t)))
i->arg[1] = r;
}
if (req(i->to, TMP(t))) {
if (!bshas(b->out, t)) {
r = refindex(t, fn);
i->to = r;
} else {
if (!bshas(u, b->id)) {
bsset(u, b->id);
*--bp = b;
}
if (clsmerge(&k, i->cls))
die("invalid input");
}
}
}
if (!req(r, R) && req(b->jmp.arg, TMP(t)))
b->jmp.arg = r;
}
bscopy(defs, u);
while (bp != be) {
fn->tmp[t].visit = t;
b = *bp++;
bsclr(u, b->id);
for (n=0; n<b->nfron; n++) {
a = b->fron[n];
if (a->visit++ == 0)
if (bshas(a->in, t)) {
p = alloc(sizeof *p);
p->cls = k;
p->to = TMP(t);
p->link = a->phi;
p->arg = vnew(0, sizeof p->arg[0], PFn);
p->blk = vnew(0, sizeof p->blk[0], PFn);
a->phi = p;
if (!bshas(defs, a->id))
if (!bshas(u, a->id)) {
bsset(u, a->id);
*--bp = a;
}
}
}
}
}
free(blist);
}
typedef struct Name Name;
struct Name {
Ref r;
Blk *b;
Name *up;
};
static Name *namel;
static Name *
nnew(Ref r, Blk *b, Name *up)
{
Name *n;
if (namel) {
n = namel;
namel = n->up;
} else
/* could use alloc, here
* but namel should be reset
*/
n = emalloc(sizeof *n);
n->r = r;
n->b = b;
n->up = up;
return n;
}
static void
nfree(Name *n)
{
n->up = namel;
namel = n;
}
static void
rendef(Ref *r, Blk *b, Name **stk, Fn *fn)
{
Ref r1;
int t;
t = r->val;
if (req(*r, R) || !fn->tmp[t].visit)
return;
r1 = refindex(t, fn);
fn->tmp[r1.val].visit = t;
stk[t] = nnew(r1, b, stk[t]);
*r = r1;
}
static Ref
getstk(int t, Blk *b, Name **stk)
{
Name *n, *n1;
n = stk[t];
while (n && !dom(n->b, b)) {
n1 = n;
n = n->up;
nfree(n1);
}
stk[t] = n;
if (!n) {
/* uh, oh, warn */
return UNDEF;
} else
return n->r;
}
static void
renblk(Blk *b, Name **stk, Fn *fn)
{
Phi *p;
Ins *i;
Blk *s, **ps, *succ[3];
int t, m;
for (p=b->phi; p; p=p->link)
rendef(&p->to, b, stk, fn);
for (i=b->ins; i<&b->ins[b->nins]; i++) {
for (m=0; m<2; m++) {
t = i->arg[m].val;
if (rtype(i->arg[m]) == RTmp)
if (fn->tmp[t].visit)
i->arg[m] = getstk(t, b, stk);
}
rendef(&i->to, b, stk, fn);
}
t = b->jmp.arg.val;
if (rtype(b->jmp.arg) == RTmp)
if (fn->tmp[t].visit)
b->jmp.arg = getstk(t, b, stk);
succ[0] = b->s1;
succ[1] = b->s2 == b->s1 ? 0 : b->s2;
succ[2] = 0;
for (ps=succ; (s=*ps); ps++)
for (p=s->phi; p; p=p->link) {
t = p->to.val;
if ((t=fn->tmp[t].visit)) {
m = p->narg++;
vgrow(&p->arg, p->narg);
vgrow(&p->blk, p->narg);
p->arg[m] = getstk(t, b, stk);
p->blk[m] = b;
}
}
for (s=b->dom; s; s=s->dlink)
renblk(s, stk, fn);
}
/* require rpo and use */
void
ssa(Fn *fn)
{
Name **stk, *n;
int d, nt;
Blk *b, *b1;
nt = fn->ntmp;
stk = emalloc(nt * sizeof stk[0]);
d = debug['L'];
debug['L'] = 0;
filldom(fn);
if (debug['N']) {
fprintf(stderr, "\n> Dominators:\n");
for (b1=fn->start; b1; b1=b1->link) {
if (!b1->dom)
continue;
fprintf(stderr, "%10s:", b1->name);
for (b=b1->dom; b; b=b->dlink)
fprintf(stderr, " %s", b->name);
fprintf(stderr, "\n");
}
}
fillfron(fn);
filllive(fn);
phiins(fn);
renblk(fn->start, stk, fn);
while (nt--)
while ((n=stk[nt])) {
stk[nt] = n->up;
nfree(n);
}
debug['L'] = d;
free(stk);
if (debug['N']) {
fprintf(stderr, "\n> After SSA construction:\n");
printfn(fn, stderr);
}
}
static int
phicheck(Phi *p, Blk *b, Ref t)
{
Blk *b1;
uint n;
for (n=0; n<p->narg; n++)
if (req(p->arg[n], t)) {
b1 = p->blk[n];
if (b1 != b && !sdom(b, b1))
return 1;
}
return 0;
}
/* require use and ssa */
void
ssacheck(Fn *fn)
{
Tmp *t;
Ins *i;
Phi *p;
Use *u;
Blk *b, *bu;
Ref r;
for (t=&fn->tmp[Tmp0]; t-fn->tmp < fn->ntmp; t++) {
if (t->ndef > 1)
err("ssa temporary %%%s defined more than once",
t->name);
if (t->nuse > 0 && t->ndef == 0) {
bu = fn->rpo[t->use[0].bid];
goto Err;
}
}
for (b=fn->start; b; b=b->link) {
for (p=b->phi; p; p=p->link) {
r = p->to;
t = &fn->tmp[r.val];
for (u=t->use; u<&t->use[t->nuse]; u++) {
bu = fn->rpo[u->bid];
if (u->type == UPhi) {
if (phicheck(u->u.phi, b, r))
goto Err;
} else
if (bu != b && !sdom(b, bu))
goto Err;
}
}
for (i=b->ins; i<&b->ins[b->nins]; i++) {
if (rtype(i->to) != RTmp)
continue;
r = i->to;
t = &fn->tmp[r.val];
for (u=t->use; u<&t->use[t->nuse]; u++) {
bu = fn->rpo[u->bid];
if (u->type == UPhi) {
if (phicheck(u->u.phi, b, r))
goto Err;
} else {
if (bu == b) {
if (u->type == UIns)
if (u->u.ins <= i)
goto Err;
} else
if (!sdom(b, bu))
goto Err;
}
}
}
}
return;
Err:
if (t->visit)
die("%%%s violates ssa invariant", t->name);
else
err("ssa temporary %%%s is used undefined in @%s",
t->name, bu->name);
}

25
src/qbe/test/_alt.ssa Normal file
View File

@@ -0,0 +1,25 @@
# an example with reducible control
# flow graph that exposes poor
# handling of looping constructs
function $test() {
@start
%ten =w copy 10
%dum =w copy 0 # dummy live-through temporary
@loop
%alt =w phi @start 0, @left %alt1, @right %alt1
%cnt =w phi @start 100, @left %cnt, @right %cnt1
%alt1 =w sub 1, %alt
jnz %alt1, @right, @left
@left
%x =w phi @loop 10, @left %x1
%x1 =w sub %x, 1
%z =w copy %x
jnz %z, @left, @loop
@right
%cnt1 =w sub %cnt, %ten
jnz %cnt1, @loop, @end
@end
%ret =w add %cnt, %dum
ret
}

2687
src/qbe/test/_bf99.ssa Normal file

File diff suppressed because it is too large Load Diff

9079
src/qbe/test/_bfmandel.ssa Normal file

File diff suppressed because it is too large Load Diff

233
src/qbe/test/_chacha20.ssa Normal file
View File

@@ -0,0 +1,233 @@
export function $chacha20_rounds_qbe(l %out, l %in) {
@start
%t0 =w loadw %in
%in =l add %in, 4
%t1 =w loadw %in
%in =l add %in, 4
%t2 =w loadw %in
%in =l add %in, 4
%t3 =w loadw %in
%in =l add %in, 4
%t4 =w loadw %in
%in =l add %in, 4
%t5 =w loadw %in
%in =l add %in, 4
%t6 =w loadw %in
%in =l add %in, 4
%t7 =w loadw %in
%in =l add %in, 4
%t8 =w loadw %in
%in =l add %in, 4
%t9 =w loadw %in
%in =l add %in, 4
%t10 =w loadw %in
%in =l add %in, 4
%t11 =w loadw %in
%in =l add %in, 4
%t12 =w loadw %in
%in =l add %in, 4
%t13 =w loadw %in
%in =l add %in, 4
%t14 =w loadw %in
%in =l add %in, 4
%t15 =w loadw %in
%in =l add %in, 4
%counter =w copy 10
@loop
%t0 =w add %t0, %t4
%t12 =w xor %t12, %t0
%rotl32_a =w shl %t12, 16
%rotl32_b =w shr %t12, 16
%t12 =w xor %rotl32_a, %rotl32_b
%t8 =w add %t8, %t12
%t4 =w xor %t4, %t8
%rotl32_a =w shl %t4, 12
%rotl32_b =w shr %t4, 20
%t4 =w xor %rotl32_a, %rotl32_b
%t0 =w add %t0, %t4
%t12 =w xor %t12, %t0
%rotl32_a =w shl %t12, 8
%rotl32_b =w shr %t12, 24
%t12 =w xor %rotl32_a, %rotl32_b
%t8 =w add %t8, %t12
%t4 =w xor %t4, %t8
%rotl32_a =w shl %t4, 7
%rotl32_b =w shr %t4, 25
%t4 =w xor %rotl32_a, %rotl32_b
%t1 =w add %t1, %t5
%t13 =w xor %t13, %t1
%rotl32_a =w shl %t13, 16
%rotl32_b =w shr %t13, 16
%t13 =w xor %rotl32_a, %rotl32_b
%t9 =w add %t9, %t13
%t5 =w xor %t5, %t9
%rotl32_a =w shl %t5, 12
%rotl32_b =w shr %t5, 20
%t5 =w xor %rotl32_a, %rotl32_b
%t1 =w add %t1, %t5
%t13 =w xor %t13, %t1
%rotl32_a =w shl %t13, 8
%rotl32_b =w shr %t13, 24
%t13 =w xor %rotl32_a, %rotl32_b
%t9 =w add %t9, %t13
%t5 =w xor %t5, %t9
%rotl32_a =w shl %t5, 7
%rotl32_b =w shr %t5, 25
%t5 =w xor %rotl32_a, %rotl32_b
%t2 =w add %t2, %t6
%t14 =w xor %t14, %t2
%rotl32_a =w shl %t14, 16
%rotl32_b =w shr %t14, 16
%t14 =w xor %rotl32_a, %rotl32_b
%t10 =w add %t10, %t14
%t6 =w xor %t6, %t10
%rotl32_a =w shl %t6, 12
%rotl32_b =w shr %t6, 20
%t6 =w xor %rotl32_a, %rotl32_b
%t2 =w add %t2, %t6
%t14 =w xor %t14, %t2
%rotl32_a =w shl %t14, 8
%rotl32_b =w shr %t14, 24
%t14 =w xor %rotl32_a, %rotl32_b
%t10 =w add %t10, %t14
%t6 =w xor %t6, %t10
%rotl32_a =w shl %t6, 7
%rotl32_b =w shr %t6, 25
%t6 =w xor %rotl32_a, %rotl32_b
%t3 =w add %t3, %t7
%t15 =w xor %t15, %t3
%rotl32_a =w shl %t15, 16
%rotl32_b =w shr %t15, 16
%t15 =w xor %rotl32_a, %rotl32_b
%t11 =w add %t11, %t15
%t7 =w xor %t7, %t11
%rotl32_a =w shl %t7, 12
%rotl32_b =w shr %t7, 20
%t7 =w xor %rotl32_a, %rotl32_b
%t3 =w add %t3, %t7
%t15 =w xor %t15, %t3
%rotl32_a =w shl %t15, 8
%rotl32_b =w shr %t15, 24
%t15 =w xor %rotl32_a, %rotl32_b
%t11 =w add %t11, %t15
%t7 =w xor %t7, %t11
%rotl32_a =w shl %t7, 7
%rotl32_b =w shr %t7, 25
%t7 =w xor %rotl32_a, %rotl32_b
%t0 =w add %t0, %t5
%t15 =w xor %t15, %t0
%rotl32_a =w shl %t15, 16
%rotl32_b =w shr %t15, 16
%t15 =w xor %rotl32_a, %rotl32_b
%t10 =w add %t10, %t15
%t5 =w xor %t5, %t10
%rotl32_a =w shl %t5, 12
%rotl32_b =w shr %t5, 20
%t5 =w xor %rotl32_a, %rotl32_b
%t0 =w add %t0, %t5
%t15 =w xor %t15, %t0
%rotl32_a =w shl %t15, 8
%rotl32_b =w shr %t15, 24
%t15 =w xor %rotl32_a, %rotl32_b
%t10 =w add %t10, %t15
%t5 =w xor %t5, %t10
%rotl32_a =w shl %t5, 7
%rotl32_b =w shr %t5, 25
%t5 =w xor %rotl32_a, %rotl32_b
%t1 =w add %t1, %t6
%t12 =w xor %t12, %t1
%rotl32_a =w shl %t12, 16
%rotl32_b =w shr %t12, 16
%t12 =w xor %rotl32_a, %rotl32_b
%t11 =w add %t11, %t12
%t6 =w xor %t6, %t11
%rotl32_a =w shl %t6, 12
%rotl32_b =w shr %t6, 20
%t6 =w xor %rotl32_a, %rotl32_b
%t1 =w add %t1, %t6
%t12 =w xor %t12, %t1
%rotl32_a =w shl %t12, 8
%rotl32_b =w shr %t12, 24
%t12 =w xor %rotl32_a, %rotl32_b
%t11 =w add %t11, %t12
%t6 =w xor %t6, %t11
%rotl32_a =w shl %t6, 7
%rotl32_b =w shr %t6, 25
%t6 =w xor %rotl32_a, %rotl32_b
%t2 =w add %t2, %t7
%t13 =w xor %t13, %t2
%rotl32_a =w shl %t13, 16
%rotl32_b =w shr %t13, 16
%t13 =w xor %rotl32_a, %rotl32_b
%t8 =w add %t8, %t13
%t7 =w xor %t7, %t8
%rotl32_a =w shl %t7, 12
%rotl32_b =w shr %t7, 20
%t7 =w xor %rotl32_a, %rotl32_b
%t2 =w add %t2, %t7
%t13 =w xor %t13, %t2
%rotl32_a =w shl %t13, 8
%rotl32_b =w shr %t13, 24
%t13 =w xor %rotl32_a, %rotl32_b
%t8 =w add %t8, %t13
%t7 =w xor %t7, %t8
%rotl32_a =w shl %t7, 7
%rotl32_b =w shr %t7, 25
%t7 =w xor %rotl32_a, %rotl32_b
%t3 =w add %t3, %t4
%t14 =w xor %t14, %t3
%rotl32_a =w shl %t14, 16
%rotl32_b =w shr %t14, 16
%t14 =w xor %rotl32_a, %rotl32_b
%t9 =w add %t9, %t14
%t4 =w xor %t4, %t9
%rotl32_a =w shl %t4, 12
%rotl32_b =w shr %t4, 20
%t4 =w xor %rotl32_a, %rotl32_b
%t3 =w add %t3, %t4
%t14 =w xor %t14, %t3
%rotl32_a =w shl %t14, 8
%rotl32_b =w shr %t14, 24
%t14 =w xor %rotl32_a, %rotl32_b
%t9 =w add %t9, %t14
%t4 =w xor %t4, %t9
%rotl32_a =w shl %t4, 7
%rotl32_b =w shr %t4, 25
%t4 =w xor %rotl32_a, %rotl32_b
%counter =w sub %counter, 10
jnz %counter, @loop, @done
@done
storew %t0, %out
%out =l add %out, 4
storew %t1, %out
%out =l add %out, 4
storew %t2, %out
%out =l add %out, 4
storew %t3, %out
%out =l add %out, 4
storew %t4, %out
%out =l add %out, 4
storew %t5, %out
%out =l add %out, 4
storew %t6, %out
%out =l add %out, 4
storew %t7, %out
%out =l add %out, 4
storew %t8, %out
%out =l add %out, 4
storew %t9, %out
%out =l add %out, 4
storew %t10, %out
%out =l add %out, 4
storew %t11, %out
%out =l add %out, 4
storew %t12, %out
%out =l add %out, 4
storew %t13, %out
%out =l add %out, 4
storew %t14, %out
%out =l add %out, 4
storew %t15, %out
%out =l add %out, 4
ret
}

33
src/qbe/test/_dragon.ssa Normal file
View File

@@ -0,0 +1,33 @@
# a moderately complex test for
# dominators computation from
# the dragon book
# because branching is limited to
# two, I had to split some blocks
function $dragon() {
@start
@b1
jnz 0, @b2, @b3
@b2
jmp @b3
@b3
jmp @b4.1
@b4.1
jnz 0, @b3, @b4.2
@b4.2
jnz 0, @b5, @b6
@b5
jmp @b7
@b6
jmp @b7
@b7
jnz 0, @b8.1, @b4.1
@b8.1
jnz 0, @b3, @b8.2
@b8.2
jnz 0, @b9, @b10
@b9
jmp @b1
@b10
jmp @b7
}

15
src/qbe/test/_fix1.ssa Normal file
View File

@@ -0,0 +1,15 @@
function $test() {
@start
%x =w copy 1
@loop
jnz %x, @noz, @isz
@noz
%x =w copy 0
jmp @end
@isz
%x =w copy 1
jmp @loop
@end
%z =w add 10, %x
ret
}

15
src/qbe/test/_fix2.ssa Normal file
View File

@@ -0,0 +1,15 @@
function $test() {
@start
%x =w copy 1
@loop
jnz %x, @noz, @isz
@noz
%x =w copy 0
jnz %x, @loop, @end
@isz
%x =w copy 1
jmp @loop
@end
%z =w add 10, %x
ret
}

20
src/qbe/test/_fix3.ssa Normal file
View File

@@ -0,0 +1,20 @@
function w $test() {
@start
%x =w copy 100
%s =w copy 0
@l
%c =w cslew %x, 10
jnz %c, @a, @b
@a
%s =w add %s, %x
%x =w sub %x, 1
jmp @c
@b
%s =w sub %s, %x
jmp @c
@c
%x =w sub %x, 1
jnz %x, @l, @end
@end
ret %s
}

27
src/qbe/test/_fix4.ssa Normal file
View File

@@ -0,0 +1,27 @@
function $test() {
@start
%x =w copy 3
%n =w copy 2
@loop
%c =w ceqw %n, 10000
jnz %c, @end, @next
@next
%t =w copy 3
%x =w add %x, 2
@tloop
%s =w mul %t, %t
%c =w csgtw %s, %x
jnz %c, @prime, @test
@test
%r =w rem %x, %t
jnz %r, @tnext, @loop
@tnext
%t =w add %t, 2
jmp @tloop
@prime
%n =w add %n, 1
jmp @loop
@end
storew %x, $a
ret
}

48
src/qbe/test/_gcm1.ssa Normal file
View File

@@ -0,0 +1,48 @@
export
function w $ifmv(w %p1, w %p2, w %p3) {
@start
@entry
%rt =w add %p2, %p3 # gcm moves to @true
%rf =w sub %p2, %p3 # gcm moves to @false
jnz %p1, @true, @false
@true
%r =w copy %rt
jmp @exit
@false
%r =w copy %rf
jmp @exit
@exit
ret %r
}
export
function w $hoist1(w %p1, w %p2, w %p3) {
@start
@entry
%n =w copy 0
%i =w copy %p1
@loop
%base =w add %p2, %p3 # gcm moves to @exit
%i =w sub %i, 1
%n =w add %n, 1
jnz %i, @loop, @exit
@exit
%r =w add %base, %n
ret %r
}
export
function w $hoist2(w %p1, w %p2, w %p3) {
@start
@entry
%n =w copy 0
%i =w copy %p1
@loop
%base =w add %p2, %p3 # gcm moves to @entry
%i =w sub %i, 1
%n =w add %n, %base
jnz %i, @loop, @exit
@exit
%r =w add %base, %n
ret %r
}

43
src/qbe/test/_gcm2.ssa Normal file
View File

@@ -0,0 +1,43 @@
# Programs from "Global Code Motion Global Value Numbering" by Cliff Click
# https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf
# GCM program in Figure 1
function w $gcm_test(w %a){
@start
%i.0 =w copy 0
@loop
%i.1 =w phi @start %i.0, @loop %i.2
%b =w add %a, 1 # early schedule moves to @start
%i.2 =w add %i.1, %b
%c =w mul %i.2, 2 # late schedule moves to @end
%x =w csltw %i.2, 10
jnz %x, @loop, @end
@end
ret %c
}
# GCM program in "Figure 3 x's definition does not dominate it's use"
#
# SSA contruction will insert phi instruction for "x" in @if_false
# preventing the "add" in @if_false from being moved to @if_true
function $gcm_test2 (w %a){
@start
%f =w copy 1
%x =w copy 0
%s.0 =w copy 0
@loop
%s.1 = w phi @start %s.0, @if_false %s.2
jnz %a, @if, @end
@if
jnz %f, @if_true, @if_false
@if_true
%f =w copy 0
%x =w add %x, 1
@if_false
%s.2 =w add %s.1, %x
jmp @loop
@end
ret
}

21
src/qbe/test/_live.ssa Normal file
View File

@@ -0,0 +1,21 @@
# this control flow graph is irreducible
# yet, we expecet the liveness analysis
# to work properly and make %x live in
# the block @left
#
# nothing should ever be live at the entry
function $test() {
@start
%b =w copy 0
%x =w copy 10
jnz 0, @loop, @left
@left
jmp @inloop
@loop
%x1 =w add %x, 1
@inloop
%b1 =w add %b, 1
@endloop
jmp @loop
}

View File

@@ -0,0 +1,17 @@
# GCM can eliminate unused add/load instructions
export
function w $f(l %p, w %c) {
@start
jnz %c, @true, @false
@true
%p1 =l add %p, 4
%v1 =w loaduw %p1
jmp @end
@false
%p2 =l add %p, 4
%v2 =w loaduw %p2
jmp @end
@end
ret 0
}

12
src/qbe/test/_rpo.ssa Normal file
View File

@@ -0,0 +1,12 @@
function $test() {
@start
jmp @foo
@baz
jnz 1, @end, @foo
@bar
jmp @end
@foo
jnz 0, @bar, @baz
@end
ret
}

35762
src/qbe/test/_slow.qbe Normal file

File diff suppressed because it is too large Load Diff

22
src/qbe/test/_spill1.ssa Normal file
View File

@@ -0,0 +1,22 @@
# test with NReg == 3
# there must be a spill
# happening on %c
#
# if you replace the sub
# by an add or comment
# the two marked lines
# there should be no
# spill
#
function $test() {
@start
%f =w copy 0 # here
%b =w copy 1
%c =w copy 2
%a =w sub %b, %c
%d =w copy %b
%e =w copy %f # and there
%g =w copy %a
ret
}

22
src/qbe/test/_spill2.ssa Normal file
View File

@@ -0,0 +1,22 @@
# stupid spilling test
function $test() {
@start
%x1 =w copy 10
%x2 =w add %x1, %x1
%x3 =w sub %x2, %x1
%x4 =w add %x3, %x1
%x5 =w sub %x4, %x1
%x6 =w add %x5, %x1
%x7 =w sub %x6, %x1
%x8 =w add %x7, %x1
%x9 =w sub %x8, %x8
%x10 =w add %x9, %x7
%x11 =w sub %x10, %x6
%x12 =w add %x11, %x5
%x13 =w sub %x12, %x4
%x14 =w add %x13, %x3
%x15 =w sub %x14, %x2
%x16 =w add %x15, %x1
ret
}

24
src/qbe/test/_spill3.ssa Normal file
View File

@@ -0,0 +1,24 @@
# make sure comparisons
# never get their two
# operands in memory
# run with NReg == 3, or
# adapt it!
function $test() {
@start
%a =w loadw $a
%b =w loadw $a
@loop
%c =w phi @start 0, @loop %f
%d =w phi @start 0, @loop %g
%e =w phi @start 0, @loop %h
%f =w add %c, %d
%g =w add %c, %e
%h =w add %e, %d
%x =w cslew %a, %b
jnz %x, @loop, @end
@end
ret
}

60
src/qbe/test/abi1.ssa Normal file
View File

@@ -0,0 +1,60 @@
# test calling into C with two
# large struct arguments (passed
# on the stack)
type :mem = { b 17 }
function $alpha(l %p, w %l, l %n) {
@ini
%pe =l add %p, %n
@lop
%p1 =l phi @ini %p, @lop %p2
%l1 =w phi @ini %l, @lop %l2
storeb %l1, %p1
%p2 =l add %p1, 1
%l2 =w add %l1, 1
%c1 =w ceql %p1, %pe
jnz %c1, @end, @lop
@end
storeb 0, %pe
ret
}
export
function $test() {
@start
%p =l alloc4 17
%q =l alloc4 17
%r0 =w call $alpha(l %p, w 65, l 16)
%r1 =w call $alpha(l %q, w 97, l 16)
%r2 =w call $fcb(:mem %p, w 1, w 2, w 3, w 4, w 5, w 6, w 7, w 8, w 9, :mem %q)
ret
}
# >>> driver
# #include <stdio.h>
# typedef struct { char t[17]; } mem;
# extern void test();
# void fcb(mem m, int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, mem n) {
# printf("fcb: m = (mem){ t = \"%s\" }\n", m.t);
# printf(" n = (mem){ t = \"%s\" }\n", n.t);
# #define T(n) printf(" i%d = %d\n", n, i##n);
# T(1) T(2) T(3) T(4) T(5) T(6) T(7) T(8) T(9)
# }
# int main() { test(); return 0; }
# <<<
# >>> output
# fcb: m = (mem){ t = "ABCDEFGHIJKLMNOP" }
# n = (mem){ t = "abcdefghijklmnop" }
# i1 = 1
# i2 = 2
# i3 = 3
# i4 = 4
# i5 = 5
# i6 = 6
# i7 = 7
# i8 = 8
# i9 = 9
# <<<

19
src/qbe/test/abi2.ssa Normal file
View File

@@ -0,0 +1,19 @@
type :fps = { s, b, s }
export
function s $sum(:fps %p) {
@start
%f1 =s load %p
%p8 =l add 8, %p
%f2 =s load %p8
%s =s add %f1, %f2
ret %s
}
# >>> driver
# typedef struct { float f1; char b; float f2; } fps;
# extern float sum(fps);
# int main() { fps x = { 1.23, -1, 2.34 }; return !(sum(x) == 1.23f+2.34f); }
# /* Note the f suffixes above are important
# * otherwise C does double operations. */
# <<<

45
src/qbe/test/abi3.ssa Normal file
View File

@@ -0,0 +1,45 @@
type :four = {l, b, w}
data $z = { w 0 }
export
function $test() {
@start
%a =w loadw $z
%y =w add %a, %a
%yl =l extsw %y
%s =l alloc8 16 # allocate a :four struct
%s1 =l add %s, 12 # get address of the w
storel 4, %s # set the l
storew 5, %s1 # set the w
# only the last argument should be on the stack
%f =l add $F, %yl
%x =w call %f(w %y, w 1, w 2, w 3, :four %s, w 6)
# store the result in the
# global variable a
%x1 =w add %y, %x
storew %x1, $a
ret
}
# >>> driver
# #include <stdio.h>
# struct four { long long l; char c; int i; };
# extern void test(void);
# int F(int a0, int a1, int a2, int a3, struct four s, int a6) {
# printf("%d %d %d %d %d %d %d\n",
# a0, a1, a2, a3, (int)s.l, s.i, a6);
# return 42;
# }
# int a;
# int main() { test(); printf("%d\n", a); return 0; }
# <<<
# >>> output
# 0 1 2 3 4 5 6
# 42
# <<<

39
src/qbe/test/abi4.ssa Normal file
View File

@@ -0,0 +1,39 @@
# return a large struct to C
type :mem = { b 17 }
function $alpha(l %p, w %l, l %n) {
@ini
%pe =l add %p, %n
@lop
%p1 =l phi @ini %p, @lop %p2
%l1 =w phi @ini %l, @lop %l2
storeb %l1, %p1
%p2 =l add %p1, 1
%l2 =w add %l1, 1
%c1 =w ceql %p1, %pe
jnz %c1, @end, @lop
@end
storeb 0, %pe
ret
}
export
function :mem $test() {
@ini
%p =l alloc4 17
%r0 =w call $alpha(l %p, w 65, l 16)
ret %p
}
# >>> driver
# #include <stdio.h>
# typedef struct { char t[17]; } mem;
# extern mem test(void);
# int main() { mem m = test(); printf("%s\n", m.t); return 0; }
# <<<
# >>> output
# ABCDEFGHIJKLMNOP
# <<<

144
src/qbe/test/abi5.ssa Normal file
View File

@@ -0,0 +1,144 @@
# returning structs from C
type :st1 = { b 17 }
type :st2 = { w }
type :st3 = { s, w }
type :st4 = { w, d }
type :st5 = { s, l }
type :st6 = { b 16 }
type :st7 = { s, d }
type :st8 = { w 4 }
type :un9 = { { b } { s } }
type :st9 = { w, :un9 }
type :sta = { b, s }
type :stb = { b, b, s }
data $fmt1 = { b "t1: %s\n", b 0 }
data $fmt2 = { b "t2: %d\n", b 0 }
data $fmt3 = { b "t3: %f %d\n", b 0 }
data $fmt4 = { b "t4: %d %f\n", b 0 }
data $fmt5 = { b "t5: %f %lld\n", b 0 }
data $fmt6 = { b "t6: %s\n", b 0 }
data $fmt7 = { b "t7: %f %f\n", b 0 }
data $fmt8 = { b "t8: %d %d %d %d\n", b 0 }
data $fmt9 = { b "t9: %d %f\n", b 0 }
data $fmta = { b "ta: %d %f\n", b 0 }
data $fmtb = { b "tb: %d %d %f\n", b 0 }
export
function $test() {
@start
%r1 =:st1 call $t1()
%i1 =w call $printf(l $fmt1, ..., l %r1)
%r2 =:st2 call $t2()
%w2 =w loadw %r2
%i2 =w call $printf(l $fmt2, ..., w %w2)
%r3 =:st3 call $t3()
%s3 =s loads %r3
%r34 =l add %r3, 4
%w3 =w loadw %r34
%p3 =d exts %s3
%i3 =w call $printf(l $fmt3, ..., d %p3, w %w3)
%r4 =:st4 call $t4()
%w4 =w loadw %r4
%r48 =l add 8, %r4
%d4 =d loadd %r48
%i4 =w call $printf(l $fmt4, ..., w %w4, d %d4)
%r5 =:st5 call $t5()
%s5 =s loads %r5
%d5 =d exts %s5
%r58 =l add %r5, 8
%l5 =l loadl %r58
%i5 =w call $printf(l $fmt5, ..., d %d5, l %l5)
%r6 =:st6 call $t6()
%i6 =w call $printf(l $fmt6, ..., l %r6)
%r7 =:st7 call $t7()
%s7 =s loads %r7
%d71 =d exts %s7
%r78 =l add %r7, 8
%d72 =d loadd %r78
%i7 =w call $printf(l $fmt7, ..., d %d71, d %d72)
%r8 =:st8 call $t8()
%r84 =l add 4, %r8
%r88 =l add 4, %r84
%r812 =l add 4, %r88
%w81 =w loadw %r8
%w82 =w loadw %r84
%w83 =w loadw %r88
%w84 =w loadw %r812
%i8 =w call $printf(l $fmt8, ..., w %w81, w %w82, w %w83, w %w84)
%r9 =:st9 call $t9()
%r94 =l add 4, %r9
%w9 =w loadw %r9
%s9 =s loads %r94
%d9 =d exts %s9
%i9 =w call $printf(l $fmt9, ..., w %w9, d %d9)
%ra =:sta call $ta()
%ra4 =l add 4, %ra
%wa =w loadsb %ra
%sa =s loads %ra4
%da =d exts %sa
%ia =w call $printf(l $fmta, ..., w %wa, d %da)
%rb =:stb call $tb()
%rb1 =l add 1, %rb
%rb4 =l add 4, %rb
%w0b =w loadsb %rb
%w1b =w loadsb %rb1
%sb =s loads %rb4
%db =d exts %sb
%ib =w call $printf(l $fmtb, ..., w %w0b, w %w1b, d %db)
ret
}
# >>> driver
# typedef struct { char t[17]; } st1;
# typedef struct { int i; } st2;
# typedef struct { float f; int i; } st3;
# typedef struct { int i; double d; } st4;
# typedef struct { float f; long long l; } st5;
# typedef struct { char t[16]; } st6;
# typedef struct { float f; double d; } st7;
# typedef struct { int i[4]; } st8;
# typedef struct { int i; union { char c; float f; } u; } st9;
# typedef struct { char c; float f; } sta;
# typedef struct { char c0, c1; float f; } stb;
# extern void test(void);
# st1 t1() { return (st1){"abcdefghijklmnop"}; }
# st2 t2() { return (st2){2}; }
# st3 t3() { return (st3){3.0,30}; }
# st4 t4() { return (st4){4,-40}; }
# st5 t5() { return (st5){5.5,-55}; }
# st6 t6() { return (st6){"abcdefghijklmno"}; }
# st7 t7() { return (st7){7.77,77.7}; }
# st8 t8() { return (st8){-8,88,-888,8888}; }
# st9 t9() { return (st9){9,{.f=9.9}}; }
# sta ta() { return (sta){-10,10.1}; }
# stb tb() { return (stb){-1,11,11.11}; }
# int main() { test(); return 0; }
# <<<
# >>> output
# t1: abcdefghijklmnop
# t2: 2
# t3: 3.000000 30
# t4: 4 -40.000000
# t5: 5.500000 -55
# t6: abcdefghijklmno
# t7: 7.770000 77.700000
# t8: -8 88 -888 8888
# t9: 9 9.900000
# ta: -10 10.100000
# tb: -1 11 11.110000
# <<<

38
src/qbe/test/abi6.ssa Normal file
View File

@@ -0,0 +1,38 @@
# test arm64's hfa
data $dfmt = { b "double: %g\n", b 0 }
type :hfa3 = { s, s, s }
export
function $f(:hfa3 %h1, :hfa3 %h2, d %d1, :hfa3 %h3, d %d2) {
# the first three parameters should be in 7 registers
# the last two should be on the stack
@start
call $phfa3(:hfa3 %h1)
call $phfa3(:hfa3 %h2)
call $phfa3(:hfa3 %h3)
call $printf(l $dfmt, ..., d %d1)
call $printf(l $dfmt, ..., d %d2)
ret
}
# >>> driver
# #include <stdio.h>
# typedef struct { float f1, f2, f3; } hfa3;
# void f(hfa3, hfa3, double, hfa3, double);
# void phfa3(hfa3 h) { printf("{ %g, %g, %g }\n", h.f1, h.f2, h.f3); }
# int main() {
# hfa3 h1={1,2,3}, h2={2,3,4}, h3={3,4,5};
# f(h1, h2, 1, h3, 2);
# }
# <<<
# >>> output
# { 1, 2, 3 }
# { 2, 3, 4 }
# { 3, 4, 5 }
# double: 1
# double: 2
# <<<

21
src/qbe/test/abi7.ssa Normal file
View File

@@ -0,0 +1,21 @@
# test padding calculation with
# embedded struct
type :s1 = align 4 { w 3 }
type :s2 = align 4 { b 1, :s1 1 }
export function :s2 $test() {
@start
ret $s
}
# >>> driver
# struct s2 {
# char x;
# struct { int a[3]; } s1;
# } s = { .x = 123 };
# extern struct s2 test(void);
# int main(void) {
# return !(test().x == 123);
# }
# <<<

278
src/qbe/test/abi8.ssa Normal file
View File

@@ -0,0 +1,278 @@
# riscv64 ABI stress
# see tools/abi8.py
type :fi1 = { h, s } # in a gp & fp pair
type :fi2 = { s, w } # ditto
type :uw = { { w } }
type :fi3 = { s, :uw } # in a single gp reg
type :ss = { s, s } # in two fp regs
type :sd = { s, d } # ditto
type :ww = { w, w } # in a single gp reg
type :lb = { l, b } # in two gp regs
type :big = { b 17 } # by reference
type :ddd = { d, d, d} # big hfa on arm64
data $ctoqbestr = { b "c->qbe(%d)", b 0 }
data $emptystr = { b 0 }
export
function $qfn0(s %p0, s %p1, s %p2, s %p3, s %p4, s %p5, s %p6, s %p7, s %p8) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 0)
call $ps(s %p8)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn1(w %p0, s %p1, :fi1 %p2) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 1)
call $pw(w %p0)
call $ps(s %p1)
call $pfi1(l %p2)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn2(w %p0, :fi2 %p1, s %p2) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 2)
call $pw(w %p0)
call $pfi2(l %p1)
call $ps(s %p2)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn3(w %p0, s %p1, :fi3 %p2) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 3)
call $pw(w %p0)
call $ps(s %p1)
call $pfi3(l %p2)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn4(:ss %p0) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 4)
call $pss(l %p0)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn5(d %p0, d %p1, d %p2, d %p3, d %p4, d %p5, d %p6, :ss %p7, s %p8, l %p9) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 5)
call $pss(l %p7)
call $ps(s %p8)
call $pl(l %p9)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn6(:lb %p0) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 6)
call $plb(l %p0)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn7(w %p0, w %p1, w %p2, w %p3, w %p4, w %p5, w %p6, :lb %p7) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 7)
call $plb(l %p7)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn8(w %p0, w %p1, w %p2, w %p3, w %p4, w %p5, w %p6, w %p7, :lb %p8) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 8)
call $plb(l %p8)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn9(:big %p0) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 9)
call $pbig(l %p0)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn10(w %p0, w %p1, w %p2, w %p3, w %p4, w %p5, w %p6, w %p7, :big %p8, s %p9, l %p10) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 10)
call $pbig(l %p8)
call $ps(s %p9)
call $pl(l %p10)
%r1 =w call $puts(l $emptystr)
ret
}
export
function $qfn11(:ddd %p0) {
@start
%r0 =w call $printf(l $ctoqbestr, ..., w 11)
call $pddd(l %p0)
%r1 =w call $puts(l $emptystr)
ret
}
export
function w $main() {
@start
call $cfn0(s 0, s 0, s 0, s 0, s 0, s 0, s 0, s 0, s s_9.9)
call $cfn1(w 1, s s_2.2, :fi1 $fi1)
call $cfn2(w 1, :fi2 $fi2, s s_3.3)
call $cfn3(w 1, s s_2.2, :fi3 $fi3)
call $cfn4(:ss $ss)
call $cfn5(d 0, d 0, d 0, d 0, d 0, d 0, d 0, :ss $ss, s s_9.9, l 10)
call $cfn6(:lb $lb)
call $cfn7(w 0, w 0, w 0, w 0, w 0, w 0, w 0, :lb $lb)
call $cfn8(w 0, w 0, w 0, w 0, w 0, w 0, w 0, w 0, :lb $lb)
call $cfn9(:big $big)
call $cfn10(w 0, w 0, w 0, w 0, w 0, w 0, w 0, w 0, :big $big, s s_10.10, l 11)
call $cfn11(:ddd $ddd)
ret 0
}
# >>> driver
# #include <stdio.h>
# typedef struct { short h; float s; } Sfi1;
# typedef struct { float s; int w; } Sfi2;
# typedef struct { float s; union { int w; } u; } Sfi3;
# typedef struct { float s0, s1; } Sss;
# typedef struct { float s; double d; } Ssd;
# typedef struct { int w0, w1; } Sww;
# typedef struct { long long l; char b; } Slb;
# typedef struct { char b[17]; } Sbig;
# typedef struct { double d0, d1, d2; } Sddd;
# Sfi1 zfi1, fi1 = { -123, 4.56 };
# Sfi2 zfi2, fi2 = { 1.23, 456 };
# Sfi3 zfi3, fi3 = { 3.45, 567 };
# Sss zss, ss = { 1.23, 45.6 };
# Ssd zsd, sd = { 2.34, 5.67 };
# Sww zww, ww = { -123, -456 };
# Slb zlb, lb = { 123, 'z' };
# Sbig zbig, big = { "abcdefhijklmnopqr" };
# Sddd zddd, ddd = { 1.23, 45.6, 7.89 };
# void pfi1(Sfi1 *s) { printf(" { %d, %g }", s->h, s->s); }
# void pfi2(Sfi2 *s) { printf(" { %g, %d }", s->s, s->w); }
# void pfi3(Sfi3 *s) { printf(" { %g, %d }", s->s, s->u.w); }
# void pss(Sss *s) { printf(" { %g, %g }", s->s0, s->s1); }
# void psd(Ssd *s) { printf(" { %g, %g }", s->s, s->d); }
# void pww(Sww *s) { printf(" { %d, %d }", s->w0, s->w1); }
# void plb(Slb *s) { printf(" { %lld, '%c' }", s->l, s->b); }
# void pbig(Sbig *s) { printf(" \"%.17s\"", s->b); }
# void pddd(Sddd *s) { printf(" { %g, %g, %g }", s->d0, s->d1, s->d2); }
# void pw(int w) { printf(" %d", w); }
# void pl(long long l) { printf(" %lld", l); }
# void ps(float s) { printf(" %g", s); }
# void pd(double d) { printf(" %g", d); }
# /* --------------------------- */
# extern void qfn0(float, float, float, float, float, float, float, float, float);
# void cfn0(float p0, float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8) {
# printf("qbe->c(%d)", 0);
# ps(p8); puts("");
# qfn0(p0, p1, p2, p3, p4, p5, p6, p7, p8);
# }
# extern void qfn1(int, float, Sfi1);
# void cfn1(int p0, float p1, Sfi1 p2) {
# printf("qbe->c(%d)", 1);
# pw(p0); ps(p1); pfi1(&p2); puts("");
# qfn1(p0, p1, p2);
# }
# extern void qfn2(int, Sfi2, float);
# void cfn2(int p0, Sfi2 p1, float p2) {
# printf("qbe->c(%d)", 2);
# pw(p0); pfi2(&p1); ps(p2); puts("");
# qfn2(p0, p1, p2);
# }
# extern void qfn3(int, float, Sfi3);
# void cfn3(int p0, float p1, Sfi3 p2) {
# printf("qbe->c(%d)", 3);
# pw(p0); ps(p1); pfi3(&p2); puts("");
# qfn3(p0, p1, p2);
# }
# extern void qfn4(Sss);
# void cfn4(Sss p0) {
# printf("qbe->c(%d)", 4);
# pss(&p0); puts("");
# qfn4(p0);
# }
# extern void qfn5(double, double, double, double, double, double, double, Sss, float, long long);
# void cfn5(double p0, double p1, double p2, double p3, double p4, double p5, double p6, Sss p7, float p8, long long p9) {
# printf("qbe->c(%d)", 5);
# pss(&p7); ps(p8); pl(p9); puts("");
# qfn5(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9);
# }
# extern void qfn6(Slb);
# void cfn6(Slb p0) {
# printf("qbe->c(%d)", 6);
# plb(&p0); puts("");
# qfn6(p0);
# }
# extern void qfn7(int, int, int, int, int, int, int, Slb);
# void cfn7(int p0, int p1, int p2, int p3, int p4, int p5, int p6, Slb p7) {
# printf("qbe->c(%d)", 7);
# plb(&p7); puts("");
# qfn7(p0, p1, p2, p3, p4, p5, p6, p7);
# }
# extern void qfn8(int, int, int, int, int, int, int, int, Slb);
# void cfn8(int p0, int p1, int p2, int p3, int p4, int p5, int p6, int p7, Slb p8) {
# printf("qbe->c(%d)", 8);
# plb(&p8); puts("");
# qfn8(p0, p1, p2, p3, p4, p5, p6, p7, p8);
# }
# extern void qfn9(Sbig);
# void cfn9(Sbig p0) {
# printf("qbe->c(%d)", 9);
# pbig(&p0); puts("");
# qfn9(p0);
# }
# extern void qfn10(int, int, int, int, int, int, int, int, Sbig, float, long long);
# void cfn10(int p0, int p1, int p2, int p3, int p4, int p5, int p6, int p7, Sbig p8, float p9, long long p10) {
# printf("qbe->c(%d)", 10);
# pbig(&p8); ps(p9); pl(p10); puts("");
# qfn10(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
# }
# extern void qfn11(Sddd);
# void cfn11(Sddd p0) {
# printf("qbe->c(%d)", 11);
# pddd(&p0); puts("");
# qfn11(p0);
# }
# <<<
# >>> output
# qbe->c(0) 9.9
# c->qbe(0) 9.9
# qbe->c(1) 1 2.2 { -123, 4.56 }
# c->qbe(1) 1 2.2 { -123, 4.56 }
# qbe->c(2) 1 { 1.23, 456 } 3.3
# c->qbe(2) 1 { 1.23, 456 } 3.3
# qbe->c(3) 1 2.2 { 3.45, 567 }
# c->qbe(3) 1 2.2 { 3.45, 567 }
# qbe->c(4) { 1.23, 45.6 }
# c->qbe(4) { 1.23, 45.6 }
# qbe->c(5) { 1.23, 45.6 } 9.9 10
# c->qbe(5) { 1.23, 45.6 } 9.9 10
# qbe->c(6) { 123, 'z' }
# c->qbe(6) { 123, 'z' }
# qbe->c(7) { 123, 'z' }
# c->qbe(7) { 123, 'z' }
# qbe->c(8) { 123, 'z' }
# c->qbe(8) { 123, 'z' }
# qbe->c(9) "abcdefhijklmnopqr"
# c->qbe(9) "abcdefhijklmnopqr"
# qbe->c(10) "abcdefhijklmnopqr" 10.1 11
# c->qbe(10) "abcdefhijklmnopqr" 10.1 11
# qbe->c(11) { 1.23, 45.6, 7.89 }
# c->qbe(11) { 1.23, 45.6, 7.89 }
# <<<

20
src/qbe/test/abi9.ssa Normal file
View File

@@ -0,0 +1,20 @@
type :obj = { l, l, l, l }
export
function :obj $f(l %self) {
@_0
%_1 =l alloc8 16
storel 77, %_1
ret %_1
}
# >>> driver
# #include <stdio.h>
# typedef struct { long long a, b, c, d; } obj;
# extern obj f();
# int main() { obj ret = f(); printf("%lld\n", ret.a); return 0; }
# <<<
# >>> output
# 77
# <<<

21
src/qbe/test/alias1.ssa Normal file
View File

@@ -0,0 +1,21 @@
export function w $main() {
@start
%a =l alloc4 4
%b =l alloc4 4
storew 4, %a
storew 5, %b
@loop
# %mem will be %a and %b successively,
# but we do not know it when processing
# the phi because %b goes through a cpy
%mem =l phi @start %a, @loop %bcpy
%w =w load %mem
%eq5 =w ceqw %w, 5
%bcpy =l copy %b
jnz %eq5, @exit, @loop
@exit
ret 0
}

17
src/qbe/test/align.ssa Normal file
View File

@@ -0,0 +1,17 @@
export
function $test() {
@start
%x =l alloc16 16
%y =l add %x, 8
%m =w rem %y, 16
storew %m, %y
%n =w loadw %y
storew %n, $a
ret
}
# >>> driver
# extern void test(void);
# int a;
# int main() { test(); return !(a == 8 || a == -8); }
# <<<

17
src/qbe/test/cmp1.ssa Normal file
View File

@@ -0,0 +1,17 @@
# test cmp used in jnz as well as its result value
export
function w $test(w %c) {
@start
%cmp =w cultw 1, %c
jnz %cmp, @yes, @no
@yes
%cmp =w copy 1
@no
ret %cmp
}
# >>> driver
# int test(int);
# int main(void) { return test(0); }
# <<<

62
src/qbe/test/collatz.ssa Normal file
View File

@@ -0,0 +1,62 @@
# a solution for N=1000 to
# https://projecteuler.net/problem=14
# we use a fast local array to
# memoize small collatz numbers
export
function $test() {
@start
%mem =l alloc4 4000
@loop
%n =w phi @start 1, @newm %n9, @oldm %n9
%cmax =w phi @start 0, @newm %c, @oldm %cmax
%fin =w csltw %n, 1000
jnz %fin, @cloop, @end
@cloop
%n0 =w phi @loop %n, @odd %n2, @even %n3
%c0 =w phi @loop 0, @odd %c1, @even %c1
%no1 =w cnew %n0, 1
jnz %no1, @iter0, @endcl
@iter0
%ism =w csltw %n0, %n
jnz %ism, @getmemo, @iter1
@iter1
%c1 =w add %c0, 1
%p =w and %n0, 1
jnz %p, @odd, @even
@odd
%n1 =w mul 3, %n0
%n2 =w add %n1, 1
jmp @cloop
@even
%n3 =w shr %n0, 1
jmp @cloop
@getmemo # get the count for n0 in mem
%n0l =l extsw %n0
%idx0 =l mul %n0l, 4
%loc0 =l add %idx0, %mem
%cn0 =w loadw %loc0
%c2 =w add %c0, %cn0
@endcl # store the count for n in mem
%c =w phi @getmemo %c2, @cloop %c0
%nl =l extsw %n
%idx1 =l mul %nl, 4
%loc1 =l add %idx1, %mem
storew %c, %loc1
%n9 =w add 1, %n
%big =w cslew %cmax, %c
jnz %big, @newm, @oldm
@newm
jmp @loop
@oldm
jmp @loop
@end
storew %cmax, $a
ret
}
# >>> driver
# extern void test(void);
# int a;
# int main() { test(); return !(a == 178); }
# <<<

76
src/qbe/test/conaddr.ssa Normal file
View File

@@ -0,0 +1,76 @@
# skip amd64_win (no signals on win32)
# test amd64 addressing modes
export
function w $f0(l %o) {
@start
%addr =l add $a, %o
%char =w loadub %addr
ret %char
}
export
function w $f1(l %o) {
@start
%o1 =l mul %o, 1
%addr =l add 10, %o1
%char =w loadub %addr
ret %char
}
export
function w $f2(l %o1, l %o2) {
@start
%o22 =l mul %o2, 2
%o =l add %o1, %o22
%addr =l add $a, %o
%char =w loadub %addr
ret %char
}
export
function l $f3(l %o) {
@start
%addr =l add %o, $a
ret %addr
}
export
function $f4() {
@start
storel $p, $p
ret
}
export
function $writeto0() {
@start
storel 0, 0
ret
}
# >>> driver
# #include <stdlib.h>
# #include <signal.h>
# char a[] = "qbe rocks";
# void *p;
# int ok;
# extern unsigned f0(long), f1(long), f2(long, long);
# extern char *f3(long);
# extern void f4(), writeto0();
# void h(int sig, siginfo_t *si, void *unused) {
# ok += si->si_addr == 0;
# exit(!(ok == 6));
# }
# int main() {
# struct sigaction sa = {.sa_flags=SA_SIGINFO, .sa_sigaction=h};
# sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, 0);
# ok += f0(2) == 'e';
# ok += f1((long)a-5) == 'o';
# ok += f2(4, 2) == 's';
# ok += *f3(0) == 'q';
# f4();
# ok += p == &p;
# writeto0(); /* will segfault */
# }
# <<<

15
src/qbe/test/copy.ssa Normal file
View File

@@ -0,0 +1,15 @@
export function w $f() {
@start
%x0 =w loadsb $a
# the extension must not be eliminated
# even though the load already extended
%x1 =l extsb %x0
%c =w ceql %x1, -1
ret %c
}
# >>> driver
# char a = -1;
# extern int f();
# int main() { return !(f() == 1); }
# <<<

Some files were not shown because too many files have changed in this diff Show More