Merge branch 'fix_native_suite'
This commit is contained in:
128
build.cm
128
build.cm
@@ -490,6 +490,69 @@ Build.build_static = function(packages, target, output, buildtype) {
|
||||
// Native .cm compilation (source → mcode → QBE IL → .o → .dylib)
|
||||
// ============================================================================
|
||||
|
||||
// Batched native compilation: split functions into batches, run QBE on each,
|
||||
// assemble in parallel, return array of .o paths.
|
||||
// il_parts: {data: text, functions: [text, ...]}
|
||||
// cc: C compiler path
|
||||
// tmp_prefix: prefix for temp files (e.g. /tmp/cell_native_<hash>)
|
||||
function compile_native_batched(il_parts, cc, tmp_prefix) {
|
||||
var nfuncs = length(il_parts.functions)
|
||||
var nbatch = 8
|
||||
var o_paths = []
|
||||
var s_paths = []
|
||||
var asm_cmds = []
|
||||
var batch_fns = null
|
||||
var batch_il = null
|
||||
var asm_text = null
|
||||
var s_path = null
|
||||
var o_path = null
|
||||
var end = 0
|
||||
var bi = 0
|
||||
var fi = 0
|
||||
var ai = 0
|
||||
var rc = null
|
||||
var parallel_cmd = null
|
||||
var helpers_il = (il_parts.helpers && length(il_parts.helpers) > 0)
|
||||
? text(il_parts.helpers, "\n") : ""
|
||||
var prefix = null
|
||||
|
||||
if (nfuncs < nbatch) nbatch = nfuncs
|
||||
if (nbatch < 1) nbatch = 1
|
||||
|
||||
// Generate .s files: run QBE on each batch
|
||||
while (bi < nbatch) {
|
||||
batch_fns = []
|
||||
end = nfuncs * (bi + 1) / nbatch
|
||||
while (fi < end) {
|
||||
batch_fns[] = il_parts.functions[fi]
|
||||
fi = fi + 1
|
||||
}
|
||||
// Batch 0 includes helper functions; others reference them as external symbols
|
||||
prefix = (bi == 0 && helpers_il != "") ? helpers_il + "\n\n" : ""
|
||||
batch_il = il_parts.data + "\n\n" + prefix + text(batch_fns, "\n")
|
||||
asm_text = os.qbe(batch_il)
|
||||
s_path = tmp_prefix + '_b' + text(bi) + '.s'
|
||||
o_path = tmp_prefix + '_b' + text(bi) + '.o'
|
||||
fd.slurpwrite(s_path, stone(blob(asm_text)))
|
||||
s_paths[] = s_path
|
||||
o_paths[] = o_path
|
||||
bi = bi + 1
|
||||
}
|
||||
|
||||
// Assemble all batches in parallel
|
||||
while (ai < length(s_paths)) {
|
||||
asm_cmds[] = cc + ' -c ' + s_paths[ai] + ' -o ' + o_paths[ai]
|
||||
ai = ai + 1
|
||||
}
|
||||
parallel_cmd = text(asm_cmds, ' & ') + ' & wait'
|
||||
rc = os.system(parallel_cmd)
|
||||
if (rc != 0) {
|
||||
print('Parallel assembly failed'); disrupt
|
||||
}
|
||||
|
||||
return o_paths
|
||||
}
|
||||
|
||||
// Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement)
|
||||
function qbe_insert_dead_labels(il_text) {
|
||||
var lines = array(il_text, "\n")
|
||||
@@ -559,10 +622,7 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
|
||||
if (pkg) {
|
||||
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
|
||||
}
|
||||
var il = qbe_emit(optimized, qbe_macros, sym_name)
|
||||
|
||||
// Step 3: Post-process (insert dead labels)
|
||||
il = qbe_insert_dead_labels(il)
|
||||
var il_parts = qbe_emit(optimized, qbe_macros, sym_name)
|
||||
|
||||
// Content hash for cache key
|
||||
var hash = content_hash(src + '\n' + _target + '\nnative')
|
||||
@@ -573,28 +633,14 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
|
||||
if (fd.is_file(dylib_path))
|
||||
return dylib_path
|
||||
|
||||
// Step 4: Write QBE IL to temp file
|
||||
// Compile and assemble via batched parallel pipeline
|
||||
var tmp = '/tmp/cell_native_' + hash
|
||||
var ssa_path = tmp + '.ssa'
|
||||
var s_path = tmp + '.s'
|
||||
var o_path = tmp + '.o'
|
||||
var rt_o_path = '/tmp/cell_qbe_rt.o'
|
||||
|
||||
fd.slurpwrite(ssa_path, stone(blob(il)))
|
||||
var o_paths = compile_native_batched(il_parts, cc, tmp)
|
||||
|
||||
// Step 5: QBE compile to assembly
|
||||
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
|
||||
if (rc != 0) {
|
||||
print('QBE compilation failed for: ' + src_path); disrupt
|
||||
}
|
||||
|
||||
// Step 6: Assemble
|
||||
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
|
||||
if (rc != 0) {
|
||||
print('Assembly failed for: ' + src_path); disrupt
|
||||
}
|
||||
|
||||
// Step 7: Compile QBE runtime stubs if needed
|
||||
// Compile QBE runtime stubs if needed
|
||||
var rc = null
|
||||
if (!fd.is_file(rt_o_path)) {
|
||||
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
|
||||
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
|
||||
@@ -603,14 +649,19 @@ Build.compile_native = function(src_path, target, buildtype, pkg) {
|
||||
}
|
||||
}
|
||||
|
||||
// Step 8: Link dylib
|
||||
// Link dylib
|
||||
var link_cmd = cc + ' -shared -fPIC'
|
||||
if (tc.system == 'darwin') {
|
||||
link_cmd = link_cmd + ' -undefined dynamic_lookup'
|
||||
} else if (tc.system == 'linux') {
|
||||
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
|
||||
}
|
||||
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
|
||||
var oi = 0
|
||||
while (oi < length(o_paths)) {
|
||||
link_cmd = link_cmd + ' ' + o_paths[oi]
|
||||
oi = oi + 1
|
||||
}
|
||||
link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path
|
||||
|
||||
rc = os.system(link_cmd)
|
||||
if (rc != 0) {
|
||||
@@ -654,8 +705,7 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
|
||||
if (pkg) {
|
||||
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
|
||||
}
|
||||
var il = qbe_emit(optimized, qbe_macros, sym_name)
|
||||
il = qbe_insert_dead_labels(il)
|
||||
var il_parts = qbe_emit(optimized, qbe_macros, sym_name)
|
||||
|
||||
var src = text(fd.slurp(src_path))
|
||||
var hash = content_hash(src + '\n' + _target + '\nnative')
|
||||
@@ -666,24 +716,14 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
|
||||
if (fd.is_file(dylib_path))
|
||||
return dylib_path
|
||||
|
||||
// Compile and assemble via batched parallel pipeline
|
||||
var tmp = '/tmp/cell_native_' + hash
|
||||
var ssa_path = tmp + '.ssa'
|
||||
var s_path = tmp + '.s'
|
||||
var o_path = tmp + '.o'
|
||||
var rt_o_path = '/tmp/cell_qbe_rt.o'
|
||||
|
||||
fd.slurpwrite(ssa_path, stone(blob(il)))
|
||||
|
||||
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
|
||||
if (rc != 0) {
|
||||
print('QBE compilation failed for: ' + src_path); disrupt
|
||||
}
|
||||
|
||||
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
|
||||
if (rc != 0) {
|
||||
print('Assembly failed for: ' + src_path); disrupt
|
||||
}
|
||||
var o_paths = compile_native_batched(il_parts, cc, tmp)
|
||||
|
||||
// Compile QBE runtime stubs if needed
|
||||
var rc = null
|
||||
if (!fd.is_file(rt_o_path)) {
|
||||
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
|
||||
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
|
||||
@@ -692,13 +732,19 @@ Build.compile_native_ir = function(optimized, src_path, opts) {
|
||||
}
|
||||
}
|
||||
|
||||
// Link dylib
|
||||
var link_cmd = cc + ' -shared -fPIC'
|
||||
if (tc.system == 'darwin') {
|
||||
link_cmd = link_cmd + ' -undefined dynamic_lookup'
|
||||
} else if (tc.system == 'linux') {
|
||||
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
|
||||
}
|
||||
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
|
||||
var oi = 0
|
||||
while (oi < length(o_paths)) {
|
||||
link_cmd = link_cmd + ' ' + o_paths[oi]
|
||||
oi = oi + 1
|
||||
}
|
||||
link_cmd = link_cmd + ' ' + rt_o_path + ' -o ' + dylib_path
|
||||
|
||||
rc = os.system(link_cmd)
|
||||
if (rc != 0) {
|
||||
|
||||
@@ -7,6 +7,7 @@ var build = use('build')
|
||||
var fd_mod = use('fd')
|
||||
var os = use('os')
|
||||
var json = use('json')
|
||||
var time = use('time')
|
||||
|
||||
var show = function(v) {
|
||||
if (v == null) return "null"
|
||||
@@ -39,12 +40,28 @@ var fold = use('fold')
|
||||
var mcode_mod = use('mcode')
|
||||
var streamline_mod = use('streamline')
|
||||
|
||||
var t0 = time.number()
|
||||
var src = text(fd_mod.slurp(abs))
|
||||
var t1 = time.number()
|
||||
var tok = tokenize(src, abs)
|
||||
var t2 = time.number()
|
||||
var ast = parse_mod(tok.tokens, src, abs, tokenize)
|
||||
var t3 = time.number()
|
||||
var folded = fold(ast)
|
||||
var t4 = time.number()
|
||||
var compiled = mcode_mod(folded)
|
||||
var t5 = time.number()
|
||||
var optimized = streamline_mod(compiled)
|
||||
var t6 = time.number()
|
||||
|
||||
print('--- front-end timing ---')
|
||||
print(' read: ' + text(t1 - t0) + 's')
|
||||
print(' tokenize: ' + text(t2 - t1) + 's')
|
||||
print(' parse: ' + text(t3 - t2) + 's')
|
||||
print(' fold: ' + text(t4 - t3) + 's')
|
||||
print(' mcode: ' + text(t5 - t4) + 's')
|
||||
print(' streamline: ' + text(t6 - t5) + 's')
|
||||
print(' total: ' + text(t6 - t0) + 's')
|
||||
|
||||
// Shared env for both paths — only non-intrinsic runtime functions.
|
||||
// Intrinsics (starts_with, ends_with, logical, some, every, etc.) live on
|
||||
|
||||
@@ -447,6 +447,7 @@ static JSValue js_os_dylib_close(JSContext *js, JSValue self, int argc, JSValue
|
||||
Uses cell_rt_native_module_load from qbe_helpers.c */
|
||||
extern JSValue cell_rt_native_module_load(JSContext *ctx, void *dl_handle, JSValue env);
|
||||
extern JSValue cell_rt_native_module_load_named(JSContext *ctx, void *dl_handle, const char *sym_name, JSValue env);
|
||||
extern JSValue js_os_qbe(JSContext *, JSValue, int, JSValue *);
|
||||
|
||||
static JSValue js_os_native_module_load(JSContext *js, JSValue self, int argc, JSValue *argv)
|
||||
{
|
||||
@@ -663,6 +664,7 @@ static const JSCFunctionListEntry js_os_funcs[] = {
|
||||
MIST_FUNC_DEF(os, print, 1),
|
||||
MIST_FUNC_DEF(os, random, 0),
|
||||
MIST_FUNC_DEF(os, getenv, 1),
|
||||
MIST_FUNC_DEF(os, qbe, 1),
|
||||
};
|
||||
|
||||
JSValue js_core_os_use(JSContext *js) {
|
||||
|
||||
53
meson.build
53
meson.build
@@ -60,6 +60,7 @@ src += [ # core
|
||||
|
||||
src += ['scheduler.c']
|
||||
src += ['qbe_helpers.c']
|
||||
src += ['qbe_backend.c']
|
||||
|
||||
scripts = [
|
||||
'debug/js.c',
|
||||
@@ -84,18 +85,67 @@ foreach file: scripts
|
||||
endforeach
|
||||
|
||||
srceng = 'source'
|
||||
includes = [srceng, 'internal', 'debug', 'net', 'archive']
|
||||
includes = [srceng, 'internal', 'debug', 'net', 'archive', 'src/qbe']
|
||||
|
||||
foreach file : src
|
||||
full_path = join_paths(srceng, file)
|
||||
sources += files(full_path)
|
||||
endforeach
|
||||
|
||||
# QBE compiler sources (all except main.c)
|
||||
# Built as a separate static library to avoid -x objective-c on macOS
|
||||
# (QBE uses 'Class' as a struct name, which conflicts with ObjC)
|
||||
qbe_src = [
|
||||
'src/qbe/util.c',
|
||||
'src/qbe/parse.c',
|
||||
'src/qbe/abi.c',
|
||||
'src/qbe/cfg.c',
|
||||
'src/qbe/mem.c',
|
||||
'src/qbe/ssa.c',
|
||||
'src/qbe/alias.c',
|
||||
'src/qbe/load.c',
|
||||
'src/qbe/copy.c',
|
||||
'src/qbe/fold.c',
|
||||
'src/qbe/gvn.c',
|
||||
'src/qbe/gcm.c',
|
||||
'src/qbe/simpl.c',
|
||||
'src/qbe/ifopt.c',
|
||||
'src/qbe/live.c',
|
||||
'src/qbe/spill.c',
|
||||
'src/qbe/rega.c',
|
||||
'src/qbe/emit.c',
|
||||
'src/qbe/amd64/targ.c',
|
||||
'src/qbe/amd64/sysv.c',
|
||||
'src/qbe/amd64/isel.c',
|
||||
'src/qbe/amd64/emit.c',
|
||||
'src/qbe/amd64/winabi.c',
|
||||
'src/qbe/arm64/targ.c',
|
||||
'src/qbe/arm64/abi.c',
|
||||
'src/qbe/arm64/isel.c',
|
||||
'src/qbe/arm64/emit.c',
|
||||
'src/qbe/rv64/targ.c',
|
||||
'src/qbe/rv64/abi.c',
|
||||
'src/qbe/rv64/isel.c',
|
||||
'src/qbe/rv64/emit.c',
|
||||
]
|
||||
|
||||
qbe_files = []
|
||||
foreach file : qbe_src
|
||||
qbe_files += files(file)
|
||||
endforeach
|
||||
|
||||
includers = []
|
||||
foreach inc : includes
|
||||
includers += include_directories(inc)
|
||||
endforeach
|
||||
|
||||
qbe_c_args = ['-x', 'c']
|
||||
qbe_lib = static_library('qbe',
|
||||
qbe_files,
|
||||
include_directories: includers,
|
||||
c_args: qbe_c_args,
|
||||
)
|
||||
|
||||
if host_machine.system() == 'windows'
|
||||
exe_ext = '.exe'
|
||||
link += '-Wl,--export-all-symbols'
|
||||
@@ -109,6 +159,7 @@ cell_so = shared_library(
|
||||
sources,
|
||||
include_directories: includers,
|
||||
dependencies: deps,
|
||||
link_whole: qbe_lib,
|
||||
install : true,
|
||||
)
|
||||
|
||||
|
||||
7
qbe.cm
7
qbe.cm
@@ -519,12 +519,9 @@ var ne_bool = function(p, a, b) {
|
||||
`
|
||||
}
|
||||
|
||||
// --- Type guard: is_identical ---
|
||||
// --- Type guard: is_identical (chases forwarding pointers via C helper) ---
|
||||
var is_identical = function(p, a, b) {
|
||||
return ` %${p}.cr =w ceql ${a}, ${b}
|
||||
%${p}.crext =l extuw %${p}.cr
|
||||
%${p}.sh =l shl %${p}.crext, 5
|
||||
%${p} =l or %${p}.sh, 3
|
||||
return ` %${p} =l call $cell_rt_is_identical(l %ctx, l ${a}, l ${b})
|
||||
`
|
||||
}
|
||||
|
||||
|
||||
1238
qbe_emit.cm
1238
qbe_emit.cm
File diff suppressed because it is too large
Load Diff
172
source/qbe_backend.c
Normal file
172
source/qbe_backend.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* QBE Backend — in-process QBE IR → assembly compilation.
|
||||
*
|
||||
* Wraps QBE as a library: feeds IR text via fmemopen(), captures
|
||||
* assembly output via open_memstream(), returns it as a JS string.
|
||||
* No subprocess, no temp files for IR, no external qbe binary needed.
|
||||
*/
|
||||
|
||||
#include "cell.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* QBE headers */
|
||||
#include "all.h"
|
||||
#include "config.h"
|
||||
|
||||
/* QBE globals (declared extern in all.h) */
|
||||
Target T;
|
||||
char debug['Z'+1] = {0};
|
||||
|
||||
extern Target T_amd64_sysv;
|
||||
extern Target T_amd64_apple;
|
||||
extern Target T_amd64_win;
|
||||
extern Target T_arm64;
|
||||
extern Target T_arm64_apple;
|
||||
extern Target T_rv64;
|
||||
|
||||
/* Captured output stream — set before calling parse() */
|
||||
static FILE *qbe_outf;
|
||||
|
||||
static void qbe_data(Dat *d) {
|
||||
emitdat(d, qbe_outf);
|
||||
if (d->type == DEnd) {
|
||||
fputs("/* end data */\n\n", qbe_outf);
|
||||
freeall();
|
||||
}
|
||||
}
|
||||
|
||||
static void qbe_func(Fn *fn) {
|
||||
uint n;
|
||||
|
||||
T.abi0(fn);
|
||||
fillcfg(fn);
|
||||
filluse(fn);
|
||||
promote(fn);
|
||||
filluse(fn);
|
||||
ssa(fn);
|
||||
filluse(fn);
|
||||
ssacheck(fn);
|
||||
fillalias(fn);
|
||||
loadopt(fn);
|
||||
filluse(fn);
|
||||
fillalias(fn);
|
||||
coalesce(fn);
|
||||
filluse(fn);
|
||||
filldom(fn);
|
||||
ssacheck(fn);
|
||||
gvn(fn);
|
||||
fillcfg(fn);
|
||||
simplcfg(fn);
|
||||
filluse(fn);
|
||||
filldom(fn);
|
||||
gcm(fn);
|
||||
filluse(fn);
|
||||
ssacheck(fn);
|
||||
if (T.cansel) {
|
||||
ifconvert(fn);
|
||||
fillcfg(fn);
|
||||
filluse(fn);
|
||||
filldom(fn);
|
||||
ssacheck(fn);
|
||||
}
|
||||
T.abi1(fn);
|
||||
simpl(fn);
|
||||
fillcfg(fn);
|
||||
filluse(fn);
|
||||
T.isel(fn);
|
||||
fillcfg(fn);
|
||||
filllive(fn);
|
||||
fillloop(fn);
|
||||
fillcost(fn);
|
||||
spill(fn);
|
||||
rega(fn);
|
||||
fillcfg(fn);
|
||||
simpljmp(fn);
|
||||
fillcfg(fn);
|
||||
assert(fn->rpo[0] == fn->start);
|
||||
for (n = 0;; n++)
|
||||
if (n == fn->nblk - 1) {
|
||||
fn->rpo[n]->link = 0;
|
||||
break;
|
||||
} else
|
||||
fn->rpo[n]->link = fn->rpo[n+1];
|
||||
T.emitfn(fn, qbe_outf);
|
||||
fprintf(qbe_outf, "/* end function %s */\n\n", fn->name);
|
||||
freeall();
|
||||
}
|
||||
|
||||
static void qbe_dbgfile(char *fn) {
|
||||
emitdbgfile(fn, qbe_outf);
|
||||
}
|
||||
|
||||
/*
|
||||
* js_os_qbe(ctx, self, argc, argv)
|
||||
*
|
||||
* Takes a single string argument (QBE IR text).
|
||||
* Returns the compiled assembly as a string.
|
||||
*/
|
||||
JSValue js_os_qbe(JSContext *js, JSValue self, int argc, JSValue *argv) {
|
||||
if (argc < 1)
|
||||
return JS_ThrowTypeError(js, "os.qbe requires an IR string argument");
|
||||
|
||||
const char *ir = JS_ToCString(js, argv[0]);
|
||||
if (!ir)
|
||||
return JS_EXCEPTION;
|
||||
|
||||
size_t ir_len = strlen(ir);
|
||||
|
||||
/* Select target for host platform */
|
||||
#if defined(__APPLE__) && defined(__aarch64__)
|
||||
T = T_arm64_apple;
|
||||
#elif defined(__APPLE__) && defined(__x86_64__)
|
||||
T = T_amd64_apple;
|
||||
#elif defined(_WIN32) && defined(__x86_64__)
|
||||
T = T_amd64_win;
|
||||
#elif defined(__x86_64__)
|
||||
T = T_amd64_sysv;
|
||||
#elif defined(__aarch64__)
|
||||
T = T_arm64;
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
T = T_rv64;
|
||||
#else
|
||||
T = Deftgt;
|
||||
#endif
|
||||
|
||||
memset(debug, 0, sizeof(debug));
|
||||
|
||||
/* Open IR string as input FILE */
|
||||
FILE *inf = fmemopen((void *)ir, ir_len, "r");
|
||||
if (!inf) {
|
||||
JS_FreeCString(js, ir);
|
||||
return JS_ThrowInternalError(js, "os.qbe: fmemopen failed");
|
||||
}
|
||||
|
||||
/* Open output memory stream */
|
||||
char *out_buf = NULL;
|
||||
size_t out_len = 0;
|
||||
qbe_outf = open_memstream(&out_buf, &out_len);
|
||||
if (!qbe_outf) {
|
||||
fclose(inf);
|
||||
JS_FreeCString(js, ir);
|
||||
return JS_ThrowInternalError(js, "os.qbe: open_memstream failed");
|
||||
}
|
||||
|
||||
/* Run the QBE pipeline */
|
||||
parse(inf, "<ir>", qbe_dbgfile, qbe_data, qbe_func);
|
||||
fclose(inf);
|
||||
|
||||
/* Finalize (emit assembler directives) */
|
||||
T.emitfin(qbe_outf);
|
||||
fflush(qbe_outf);
|
||||
fclose(qbe_outf);
|
||||
qbe_outf = NULL;
|
||||
|
||||
JS_FreeCString(js, ir);
|
||||
|
||||
/* Return assembly text */
|
||||
JSValue result = JS_NewStringLen(js, out_buf, out_len);
|
||||
free(out_buf);
|
||||
return result;
|
||||
}
|
||||
@@ -222,6 +222,16 @@ JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b) {
|
||||
/* --- Property access --- */
|
||||
|
||||
JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name) {
|
||||
if (JS_IsFunction(obj)) {
|
||||
JS_ThrowTypeError(ctx, "cannot read property of function");
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
return JS_GetPropertyStr(ctx, obj, name);
|
||||
}
|
||||
|
||||
/* Like cell_rt_load_field but without the function guard.
|
||||
Used by load_dynamic when the key happens to be a static string. */
|
||||
JSValue cell_rt_load_prop_str(JSContext *ctx, JSValue obj, const char *name) {
|
||||
return JS_GetPropertyStr(ctx, obj, name);
|
||||
}
|
||||
|
||||
@@ -238,10 +248,15 @@ JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key) {
|
||||
|
||||
void cell_rt_store_dynamic(JSContext *ctx, JSValue val, JSValue obj,
|
||||
JSValue key) {
|
||||
if (JS_IsInt(key))
|
||||
if (JS_IsInt(key)) {
|
||||
JS_SetPropertyNumber(ctx, obj, (uint32_t)JS_VALUE_GET_INT(key), val);
|
||||
else
|
||||
} else if (JS_IsArray(obj) && !JS_IsInt(key)) {
|
||||
JS_ThrowTypeError(ctx, "array index must be a number");
|
||||
} else if (JS_IsBool(key) || JS_IsNull(key) || JS_IsArray(key) || JS_IsFunction(key)) {
|
||||
JS_ThrowTypeError(ctx, "object key must be text");
|
||||
} else {
|
||||
JS_SetProperty(ctx, obj, key, val);
|
||||
}
|
||||
}
|
||||
|
||||
JSValue cell_rt_load_index(JSContext *ctx, JSValue arr, JSValue idx) {
|
||||
@@ -466,7 +481,8 @@ static JSValue cell_fn_trampoline(JSContext *ctx, JSValue this_val,
|
||||
return result;
|
||||
}
|
||||
|
||||
JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp) {
|
||||
JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp,
|
||||
int64_t nr_args) {
|
||||
(void)outer_fp;
|
||||
if (g_native_fn_count >= MAX_NATIVE_FN)
|
||||
return JS_ThrowTypeError(ctx, "too many native functions (max %d)", MAX_NATIVE_FN);
|
||||
@@ -487,7 +503,7 @@ JSValue cell_rt_make_function(JSContext *ctx, int64_t fn_idx, void *outer_fp) {
|
||||
}
|
||||
|
||||
return JS_NewCFunction2(ctx, (JSCFunction *)cell_fn_trampoline, "native_fn",
|
||||
255, JS_CFUNC_generic_magic, global_id);
|
||||
(int)nr_args, JS_CFUNC_generic_magic, global_id);
|
||||
}
|
||||
|
||||
/* --- Frame-based function calling --- */
|
||||
@@ -515,15 +531,35 @@ JSValue cell_rt_invoke(JSContext *ctx, JSValue frame_val) {
|
||||
JSFrameRegister *fr = (JSFrameRegister *)JS_VALUE_GET_PTR(frame_val);
|
||||
int nr_slots = (int)objhdr_cap56(fr->header);
|
||||
int c_argc = (nr_slots >= 2) ? nr_slots - 2 : 0;
|
||||
JSValue fn_val = fr->function;
|
||||
|
||||
/* Copy args to C stack */
|
||||
JSValue args[c_argc > 0 ? c_argc : 1];
|
||||
for (int i = 0; i < c_argc; i++)
|
||||
args[i] = fr->slots[i + 1];
|
||||
if (!JS_IsFunction(fn_val)) {
|
||||
JS_ThrowTypeError(ctx, "not a function");
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
|
||||
JSFunction *fn = JS_VALUE_GET_FUNCTION(fn_val);
|
||||
JSValue result;
|
||||
|
||||
if (fn->kind == JS_FUNC_KIND_C) {
|
||||
/* Match MACH_INVOKE: C functions go directly to js_call_c_function,
|
||||
bypassing JS_Call's arity check. Extra args are silently available. */
|
||||
result = js_call_c_function(ctx, fn_val, fr->slots[0], c_argc, &fr->slots[1]);
|
||||
} else {
|
||||
/* Register/bytecode functions — use JS_CallInternal (no arity gate) */
|
||||
JSValue args[c_argc > 0 ? c_argc : 1];
|
||||
for (int i = 0; i < c_argc; i++)
|
||||
args[i] = fr->slots[i + 1];
|
||||
result = JS_CallInternal(ctx, fn_val, fr->slots[0], c_argc, args, 0);
|
||||
}
|
||||
|
||||
JSValue result = JS_Call(ctx, fr->function, fr->slots[0], c_argc, args);
|
||||
if (JS_IsException(result))
|
||||
return JS_EXCEPTION;
|
||||
/* Clear any stale exception left by functions that returned a valid
|
||||
value despite internal error (e.g., sign("text") returns null
|
||||
but JS_ToFloat64 leaves an exception flag) */
|
||||
if (JS_HasException(ctx))
|
||||
JS_GetException(ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -549,6 +585,16 @@ JSValue cell_rt_pop(JSContext *ctx, JSValue arr) {
|
||||
|
||||
JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key) {
|
||||
int ret = JS_DeleteProperty(ctx, obj, key);
|
||||
if (ret < 0)
|
||||
return JS_EXCEPTION;
|
||||
return JS_NewBool(ctx, ret >= 0);
|
||||
}
|
||||
|
||||
JSValue cell_rt_delete_str(JSContext *ctx, JSValue obj, const char *name) {
|
||||
JSValue key = JS_NewString(ctx, name);
|
||||
int ret = JS_DeleteProperty(ctx, obj, key);
|
||||
if (ret < 0)
|
||||
return JS_EXCEPTION;
|
||||
return JS_NewBool(ctx, ret >= 0);
|
||||
}
|
||||
|
||||
@@ -595,12 +641,37 @@ JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b) {
|
||||
return JS_NewBool(ctx, r);
|
||||
}
|
||||
|
||||
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b) {
|
||||
return JS_NewBool(ctx, a == b);
|
||||
static int cell_rt_tol_eq_inner(JSContext *ctx, JSValue a, JSValue b,
|
||||
JSValue tol) {
|
||||
if (JS_IsNumber(a) && JS_IsNumber(b) && JS_IsNumber(tol)) {
|
||||
double da, db, dt;
|
||||
JS_ToFloat64(ctx, &da, a);
|
||||
JS_ToFloat64(ctx, &db, b);
|
||||
JS_ToFloat64(ctx, &dt, tol);
|
||||
return fabs(da - db) <= dt;
|
||||
}
|
||||
if (JS_IsText(a) && JS_IsText(b) && JS_IsBool(tol) && JS_VALUE_GET_BOOL(tol)) {
|
||||
return js_string_compare_value_nocase(ctx, a, b) == 0;
|
||||
}
|
||||
/* Fallback to standard equality */
|
||||
if (a == b) return 1;
|
||||
if (JS_IsText(a) && JS_IsText(b))
|
||||
return js_string_compare_value(ctx, a, b, 1) == 0;
|
||||
if (JS_IsNumber(a) && JS_IsNumber(b)) {
|
||||
double da, db;
|
||||
JS_ToFloat64(ctx, &da, a);
|
||||
JS_ToFloat64(ctx, &db, b);
|
||||
return da == db;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b) {
|
||||
return JS_NewBool(ctx, a != b);
|
||||
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b, JSValue tol) {
|
||||
return JS_NewBool(ctx, cell_rt_tol_eq_inner(ctx, a, b, tol));
|
||||
}
|
||||
|
||||
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b, JSValue tol) {
|
||||
return JS_NewBool(ctx, !cell_rt_tol_eq_inner(ctx, a, b, tol));
|
||||
}
|
||||
|
||||
/* --- Type check: is_proxy (function with arity 2) --- */
|
||||
@@ -612,6 +683,14 @@ int cell_rt_is_proxy(JSContext *ctx, JSValue v) {
|
||||
return fn->length == 2;
|
||||
}
|
||||
|
||||
/* --- Identity check (chases forwarding pointers) --- */
|
||||
|
||||
JSValue cell_rt_is_identical(JSContext *ctx, JSValue a, JSValue b) {
|
||||
if (JS_IsPtr(a)) a = JS_MKPTR(chase(a));
|
||||
if (JS_IsPtr(b)) b = JS_MKPTR(chase(b));
|
||||
return JS_NewBool(ctx, a == b);
|
||||
}
|
||||
|
||||
/* --- Short-circuit and/or (non-allocating) --- */
|
||||
|
||||
JSValue cell_rt_and(JSContext *ctx, JSValue left, JSValue right) {
|
||||
|
||||
6
src/qbe/.gitignore
vendored
Normal file
6
src/qbe/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
*.o
|
||||
qbe
|
||||
config.h
|
||||
.comfile
|
||||
*.out
|
||||
*~
|
||||
19
src/qbe/LICENSE
Normal file
19
src/qbe/LICENSE
Normal file
@@ -0,0 +1,19 @@
|
||||
© 2015-2026 Quentin Carbonneaux <quentin@c9x.me>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
103
src/qbe/Makefile
Normal file
103
src/qbe/Makefile
Normal file
@@ -0,0 +1,103 @@
|
||||
.POSIX:
|
||||
.SUFFIXES: .o .c
|
||||
|
||||
PREFIX = /usr/local
|
||||
BINDIR = $(PREFIX)/bin
|
||||
|
||||
COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
|
||||
copy.o fold.o gvn.o gcm.o simpl.o ifopt.o live.o spill.o rega.o \
|
||||
emit.o
|
||||
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o amd64/winabi.o
|
||||
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
|
||||
RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
|
||||
OBJ = $(COMMOBJ) $(AMD64OBJ) $(ARM64OBJ) $(RV64OBJ)
|
||||
|
||||
SRCALL = $(OBJ:.o=.c)
|
||||
|
||||
CC = cc
|
||||
CFLAGS = -std=c99 -g -Wall -Wextra -Wpedantic
|
||||
|
||||
qbe: $(OBJ)
|
||||
$(CC) $(LDFLAGS) $(OBJ) -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJ): all.h ops.h
|
||||
$(AMD64OBJ): amd64/all.h
|
||||
$(ARM64OBJ): arm64/all.h
|
||||
$(RV64OBJ): rv64/all.h
|
||||
main.o: config.h
|
||||
|
||||
config.h:
|
||||
@case `uname` in \
|
||||
*Darwin*) \
|
||||
case `uname -m` in \
|
||||
*arm64*) \
|
||||
echo "#define Deftgt T_arm64_apple";\
|
||||
;; \
|
||||
*) \
|
||||
echo "#define Deftgt T_amd64_apple";\
|
||||
;; \
|
||||
esac \
|
||||
;; \
|
||||
*) \
|
||||
case `uname -m` in \
|
||||
*aarch64*|*arm64*) \
|
||||
echo "#define Deftgt T_arm64"; \
|
||||
;; \
|
||||
*riscv64*) \
|
||||
echo "#define Deftgt T_rv64"; \
|
||||
;; \
|
||||
*) \
|
||||
echo "#define Deftgt T_amd64_sysv";\
|
||||
;; \
|
||||
esac \
|
||||
;; \
|
||||
esac > $@
|
||||
|
||||
install: qbe
|
||||
mkdir -p "$(DESTDIR)$(BINDIR)"
|
||||
install -m755 qbe "$(DESTDIR)$(BINDIR)/qbe"
|
||||
|
||||
uninstall:
|
||||
rm -f "$(DESTDIR)$(BINDIR)/qbe"
|
||||
|
||||
clean:
|
||||
rm -f *.o */*.o qbe
|
||||
|
||||
clean-gen: clean
|
||||
rm -f config.h
|
||||
|
||||
check: qbe
|
||||
tools/test.sh all
|
||||
|
||||
check-x86_64: qbe
|
||||
TARGET=x86_64 tools/test.sh all
|
||||
|
||||
check-arm64: qbe
|
||||
TARGET=arm64 tools/test.sh all
|
||||
|
||||
check-rv64: qbe
|
||||
TARGET=rv64 tools/test.sh all
|
||||
|
||||
check-amd64_win: qbe
|
||||
TARGET=amd64_win tools/test.sh all
|
||||
|
||||
src:
|
||||
@echo $(SRCALL)
|
||||
|
||||
80:
|
||||
@for F in $(SRCALL); \
|
||||
do \
|
||||
awk "{ \
|
||||
gsub(/\\t/, \" \"); \
|
||||
if (length(\$$0) > $@) \
|
||||
printf(\"$$F:%d: %s\\n\", NR, \$$0); \
|
||||
}" < $$F; \
|
||||
done
|
||||
|
||||
wc:
|
||||
@wc -l $(SRCALL)
|
||||
|
||||
.PHONY: clean clean-gen check check-arm64 check-rv64 src 80 wc install uninstall
|
||||
18
src/qbe/README
Normal file
18
src/qbe/README
Normal file
@@ -0,0 +1,18 @@
|
||||
QBE - Backend Compiler http://c9x.me/compile/
|
||||
|
||||
doc/ Documentation.
|
||||
minic/ An example C frontend for QBE.
|
||||
tools/ Miscellaneous tools (testing).
|
||||
test/ Tests.
|
||||
amd64/
|
||||
arm64/
|
||||
rv64/ Architecture-specific code.
|
||||
|
||||
The LICENSE file applies to all files distributed.
|
||||
|
||||
- Compilation and Installation
|
||||
|
||||
Invoke make in this directory to create the executable
|
||||
file qbe. Install using 'make install', the standard
|
||||
DESTDIR and PREFIX environment variables are supported.
|
||||
Alternatively, you may simply copy the qbe binary.
|
||||
25
src/qbe/abi.c
Normal file
25
src/qbe/abi.c
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "all.h"
|
||||
|
||||
/* eliminate sub-word abi op
|
||||
* variants for targets that
|
||||
* treat char/short/... as
|
||||
* words with arbitrary high
|
||||
* bits
|
||||
*/
|
||||
void
|
||||
elimsb(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i;
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (isargbh(i->op))
|
||||
i->op = Oarg;
|
||||
if (isparbh(i->op))
|
||||
i->op = Opar;
|
||||
}
|
||||
if (isretbh(b->jmp.type))
|
||||
b->jmp.type = Jretw;
|
||||
}
|
||||
}
|
||||
222
src/qbe/alias.c
Normal file
222
src/qbe/alias.c
Normal file
@@ -0,0 +1,222 @@
|
||||
#include "all.h"
|
||||
|
||||
void
|
||||
getalias(Alias *a, Ref r, Fn *fn)
|
||||
{
|
||||
Con *c;
|
||||
|
||||
switch (rtype(r)) {
|
||||
default:
|
||||
die("unreachable");
|
||||
case RTmp:
|
||||
*a = fn->tmp[r.val].alias;
|
||||
if (astack(a->type))
|
||||
a->type = a->slot->type;
|
||||
assert(a->type != ABot);
|
||||
break;
|
||||
case RCon:
|
||||
c = &fn->con[r.val];
|
||||
if (c->type == CAddr) {
|
||||
a->type = ASym;
|
||||
a->u.sym = c->sym;
|
||||
} else
|
||||
a->type = ACon;
|
||||
a->offset = c->bits.i;
|
||||
a->slot = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
alias(Ref p, int op, int sp, Ref q, int sq, int *delta, Fn *fn)
|
||||
{
|
||||
Alias ap, aq;
|
||||
int ovlap;
|
||||
|
||||
getalias(&ap, p, fn);
|
||||
getalias(&aq, q, fn);
|
||||
ap.offset += op;
|
||||
/* when delta is meaningful (ovlap == 1),
|
||||
* we do not overflow int because sp and
|
||||
* sq are bounded by 2^28 */
|
||||
*delta = ap.offset - aq.offset;
|
||||
ovlap = ap.offset < aq.offset + sq && aq.offset < ap.offset + sp;
|
||||
|
||||
if (astack(ap.type) && astack(aq.type)) {
|
||||
/* if both are offsets of the same
|
||||
* stack slot, they alias iif they
|
||||
* overlap */
|
||||
if (ap.base == aq.base && ovlap)
|
||||
return MustAlias;
|
||||
return NoAlias;
|
||||
}
|
||||
|
||||
if (ap.type == ASym && aq.type == ASym) {
|
||||
/* they conservatively alias if the
|
||||
* symbols are different, or they
|
||||
* alias for sure if they overlap */
|
||||
if (!symeq(ap.u.sym, aq.u.sym))
|
||||
return MayAlias;
|
||||
if (ovlap)
|
||||
return MustAlias;
|
||||
return NoAlias;
|
||||
}
|
||||
|
||||
if ((ap.type == ACon && aq.type == ACon)
|
||||
|| (ap.type == aq.type && ap.base == aq.base)) {
|
||||
assert(ap.type == ACon || ap.type == AUnk);
|
||||
/* if they have the same base, we
|
||||
* can rely on the offsets only */
|
||||
if (ovlap)
|
||||
return MustAlias;
|
||||
return NoAlias;
|
||||
}
|
||||
|
||||
/* if one of the two is unknown
|
||||
* there may be aliasing unless
|
||||
* the other is provably local */
|
||||
if (ap.type == AUnk && aq.type != ALoc)
|
||||
return MayAlias;
|
||||
if (aq.type == AUnk && ap.type != ALoc)
|
||||
return MayAlias;
|
||||
|
||||
return NoAlias;
|
||||
}
|
||||
|
||||
int
|
||||
escapes(Ref r, Fn *fn)
|
||||
{
|
||||
Alias *a;
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return 1;
|
||||
a = &fn->tmp[r.val].alias;
|
||||
return !astack(a->type) || a->slot->type == AEsc;
|
||||
}
|
||||
|
||||
static void
|
||||
esc(Ref r, Fn *fn)
|
||||
{
|
||||
Alias *a;
|
||||
|
||||
assert(rtype(r) <= RType);
|
||||
if (rtype(r) == RTmp) {
|
||||
a = &fn->tmp[r.val].alias;
|
||||
if (astack(a->type))
|
||||
a->slot->type = AEsc;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
store(Ref r, int sz, Fn *fn)
|
||||
{
|
||||
Alias *a;
|
||||
int64_t off;
|
||||
bits m;
|
||||
|
||||
if (rtype(r) == RTmp) {
|
||||
a = &fn->tmp[r.val].alias;
|
||||
if (a->slot) {
|
||||
assert(astack(a->type));
|
||||
off = a->offset;
|
||||
if (sz >= NBit
|
||||
|| (off < 0 || off >= NBit))
|
||||
m = -1;
|
||||
else
|
||||
m = (BIT(sz) - 1) << off;
|
||||
a->slot->u.loc.m |= m;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fillalias(Fn *fn)
|
||||
{
|
||||
uint n;
|
||||
int t, sz;
|
||||
int64_t x;
|
||||
Blk *b;
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
Con *c;
|
||||
Alias *a, a0, a1;
|
||||
|
||||
for (t=0; t<fn->ntmp; t++)
|
||||
fn->tmp[t].alias.type = ABot;
|
||||
for (n=0; n<fn->nblk; ++n) {
|
||||
b = fn->rpo[n];
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
assert(rtype(p->to) == RTmp);
|
||||
a = &fn->tmp[p->to.val].alias;
|
||||
assert(a->type == ABot);
|
||||
a->type = AUnk;
|
||||
a->base = p->to.val;
|
||||
a->offset = 0;
|
||||
a->slot = 0;
|
||||
}
|
||||
for (i=b->ins; i<&b->ins[b->nins]; ++i) {
|
||||
a = 0;
|
||||
if (!req(i->to, R)) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
a = &fn->tmp[i->to.val].alias;
|
||||
assert(a->type == ABot);
|
||||
if (Oalloc <= i->op && i->op <= Oalloc1) {
|
||||
a->type = ALoc;
|
||||
a->slot = a;
|
||||
a->u.loc.sz = -1;
|
||||
if (rtype(i->arg[0]) == RCon) {
|
||||
c = &fn->con[i->arg[0].val];
|
||||
x = c->bits.i;
|
||||
if (c->type == CBits)
|
||||
if (0 <= x && x <= NBit)
|
||||
a->u.loc.sz = x;
|
||||
}
|
||||
} else {
|
||||
a->type = AUnk;
|
||||
a->slot = 0;
|
||||
}
|
||||
a->base = i->to.val;
|
||||
a->offset = 0;
|
||||
}
|
||||
if (i->op == Ocopy) {
|
||||
assert(a);
|
||||
getalias(a, i->arg[0], fn);
|
||||
}
|
||||
if (i->op == Oadd) {
|
||||
getalias(&a0, i->arg[0], fn);
|
||||
getalias(&a1, i->arg[1], fn);
|
||||
if (a0.type == ACon) {
|
||||
*a = a1;
|
||||
a->offset += a0.offset;
|
||||
}
|
||||
else if (a1.type == ACon) {
|
||||
*a = a0;
|
||||
a->offset += a1.offset;
|
||||
}
|
||||
}
|
||||
if (req(i->to, R) || a->type == AUnk)
|
||||
if (i->op != Oblit0) {
|
||||
if (!isload(i->op))
|
||||
esc(i->arg[0], fn);
|
||||
if (!isstore(i->op))
|
||||
if (i->op != Oargc)
|
||||
esc(i->arg[1], fn);
|
||||
}
|
||||
if (i->op == Oblit0) {
|
||||
++i;
|
||||
assert(i->op == Oblit1);
|
||||
assert(rtype(i->arg[0]) == RInt);
|
||||
sz = abs(rsval(i->arg[0]));
|
||||
store((i-1)->arg[1], sz, fn);
|
||||
}
|
||||
if (isstore(i->op))
|
||||
store(i->arg[1], storesz(i), fn);
|
||||
}
|
||||
if (b->jmp.type != Jretc)
|
||||
esc(b->jmp.arg, fn);
|
||||
}
|
||||
for (b=fn->start; b; b=b->link)
|
||||
for (p=b->phi; p; p=p->link)
|
||||
for (n=0; n<p->narg; n++)
|
||||
esc(p->arg[n], fn);
|
||||
}
|
||||
631
src/qbe/all.h
Normal file
631
src/qbe/all.h
Normal file
@@ -0,0 +1,631 @@
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1]
|
||||
#define die(...) die_(__FILE__, __VA_ARGS__)
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned long long bits;
|
||||
|
||||
typedef struct BSet BSet;
|
||||
typedef struct Ref Ref;
|
||||
typedef struct Op Op;
|
||||
typedef struct Ins Ins;
|
||||
typedef struct Phi Phi;
|
||||
typedef struct Blk Blk;
|
||||
typedef struct Use Use;
|
||||
typedef struct Sym Sym;
|
||||
typedef struct Num Num;
|
||||
typedef struct Alias Alias;
|
||||
typedef struct Tmp Tmp;
|
||||
typedef struct Con Con;
|
||||
typedef struct Addr Mem;
|
||||
typedef struct Fn Fn;
|
||||
typedef struct Typ Typ;
|
||||
typedef struct Field Field;
|
||||
typedef struct Dat Dat;
|
||||
typedef struct Lnk Lnk;
|
||||
typedef struct Target Target;
|
||||
|
||||
enum {
|
||||
NString = 80,
|
||||
NIns = 1 << 20,
|
||||
NAlign = 3,
|
||||
NField = 32,
|
||||
NBit = CHAR_BIT * sizeof(bits),
|
||||
};
|
||||
|
||||
struct Target {
|
||||
char name[16];
|
||||
char apple;
|
||||
char windows;
|
||||
int gpr0; /* first general purpose reg */
|
||||
int ngpr;
|
||||
int fpr0; /* first floating point reg */
|
||||
int nfpr;
|
||||
bits rglob; /* globally live regs (e.g., sp, fp) */
|
||||
int nrglob;
|
||||
int *rsave; /* caller-save */
|
||||
int nrsave[2];
|
||||
bits (*retregs)(Ref, int[2]);
|
||||
bits (*argregs)(Ref, int[2]);
|
||||
int (*memargs)(int);
|
||||
void (*abi0)(Fn *);
|
||||
void (*abi1)(Fn *);
|
||||
void (*isel)(Fn *);
|
||||
void (*emitfn)(Fn *, FILE *);
|
||||
void (*emitfin)(FILE *);
|
||||
char asloc[4];
|
||||
char assym[4];
|
||||
uint cansel:1;
|
||||
};
|
||||
|
||||
#define BIT(n) ((bits)1 << (n))
|
||||
|
||||
enum {
|
||||
RXX = 0,
|
||||
Tmp0 = NBit, /* first non-reg temporary */
|
||||
};
|
||||
|
||||
struct BSet {
|
||||
uint nt;
|
||||
bits *t;
|
||||
};
|
||||
|
||||
struct Ref {
|
||||
uint type:3;
|
||||
uint val:29;
|
||||
};
|
||||
|
||||
enum {
|
||||
RTmp,
|
||||
RCon,
|
||||
RInt,
|
||||
RType, /* last kind to come out of the parser */
|
||||
RSlot,
|
||||
RCall,
|
||||
RMem,
|
||||
};
|
||||
|
||||
#define R (Ref){RTmp, 0}
|
||||
#define UNDEF (Ref){RCon, 0} /* represents uninitialized data */
|
||||
#define CON_Z (Ref){RCon, 1}
|
||||
#define TMP(x) (Ref){RTmp, x}
|
||||
#define CON(x) (Ref){RCon, x}
|
||||
#define SLOT(x) (Ref){RSlot, (x)&0x1fffffff}
|
||||
#define TYPE(x) (Ref){RType, x}
|
||||
#define CALL(x) (Ref){RCall, x}
|
||||
#define MEM(x) (Ref){RMem, x}
|
||||
#define INT(x) (Ref){RInt, (x)&0x1fffffff}
|
||||
|
||||
static inline int req(Ref a, Ref b)
|
||||
{
|
||||
return a.type == b.type && a.val == b.val;
|
||||
}
|
||||
|
||||
static inline int rtype(Ref r)
|
||||
{
|
||||
if (req(r, R))
|
||||
return -1;
|
||||
return r.type;
|
||||
}
|
||||
|
||||
static inline int rsval(Ref r)
|
||||
{
|
||||
return ((int)r.val ^ 0x10000000) - 0x10000000;
|
||||
}
|
||||
|
||||
enum CmpI {
|
||||
Cieq,
|
||||
Cine,
|
||||
Cisge,
|
||||
Cisgt,
|
||||
Cisle,
|
||||
Cislt,
|
||||
Ciuge,
|
||||
Ciugt,
|
||||
Ciule,
|
||||
Ciult,
|
||||
NCmpI,
|
||||
};
|
||||
|
||||
enum CmpF {
|
||||
Cfeq,
|
||||
Cfge,
|
||||
Cfgt,
|
||||
Cfle,
|
||||
Cflt,
|
||||
Cfne,
|
||||
Cfo,
|
||||
Cfuo,
|
||||
NCmpF,
|
||||
NCmp = NCmpI + NCmpF,
|
||||
};
|
||||
|
||||
enum O {
|
||||
Oxxx,
|
||||
#define O(op, x, y) O##op,
|
||||
#include "ops.h"
|
||||
NOp,
|
||||
};
|
||||
|
||||
enum J {
|
||||
Jxxx,
|
||||
#define JMPS(X) \
|
||||
X(retw) X(retl) X(rets) X(retd) \
|
||||
X(retsb) X(retub) X(retsh) X(retuh) \
|
||||
X(retc) X(ret0) X(jmp) X(jnz) \
|
||||
X(jfieq) X(jfine) X(jfisge) X(jfisgt) \
|
||||
X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
|
||||
X(jfiule) X(jfiult) X(jffeq) X(jffge) \
|
||||
X(jffgt) X(jffle) X(jfflt) X(jffne) \
|
||||
X(jffo) X(jffuo) X(hlt)
|
||||
#define X(j) J##j,
|
||||
JMPS(X)
|
||||
#undef X
|
||||
NJmp
|
||||
};
|
||||
|
||||
enum {
|
||||
Ocmpw = Oceqw,
|
||||
Ocmpw1 = Ocultw,
|
||||
Ocmpl = Oceql,
|
||||
Ocmpl1 = Ocultl,
|
||||
Ocmps = Oceqs,
|
||||
Ocmps1 = Ocuos,
|
||||
Ocmpd = Oceqd,
|
||||
Ocmpd1 = Ocuod,
|
||||
Oalloc = Oalloc4,
|
||||
Oalloc1 = Oalloc16,
|
||||
Oflag = Oflagieq,
|
||||
Oflag1 = Oflagfuo,
|
||||
Oxsel = Oxselieq,
|
||||
Oxsel1 = Oxselfuo,
|
||||
NPubOp = Onop,
|
||||
Jjf = Jjfieq,
|
||||
Jjf1 = Jjffuo,
|
||||
};
|
||||
|
||||
#define INRANGE(x, l, u) ((unsigned)(x) - l <= u - l) /* linear in x */
|
||||
#define isstore(o) INRANGE(o, Ostoreb, Ostored)
|
||||
#define isload(o) INRANGE(o, Oloadsb, Oload)
|
||||
#define isalloc(o) INRANGE(o, Oalloc4, Oalloc16)
|
||||
#define isext(o) INRANGE(o, Oextsb, Oextuw)
|
||||
#define ispar(o) INRANGE(o, Opar, Opare)
|
||||
#define isarg(o) INRANGE(o, Oarg, Oargv)
|
||||
#define isret(j) INRANGE(j, Jretw, Jret0)
|
||||
#define isparbh(o) INRANGE(o, Oparsb, Oparuh)
|
||||
#define isargbh(o) INRANGE(o, Oargsb, Oarguh)
|
||||
#define isretbh(j) INRANGE(j, Jretsb, Jretuh)
|
||||
#define isxsel(o) INRANGE(o, Oxsel, Oxsel1)
|
||||
|
||||
enum {
|
||||
Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
|
||||
Kw,
|
||||
Kl,
|
||||
Ks,
|
||||
Kd
|
||||
};
|
||||
|
||||
#define KWIDE(k) ((k)&1)
|
||||
#define KBASE(k) ((k)>>1)
|
||||
|
||||
struct Op {
|
||||
char *name;
|
||||
short argcls[2][4];
|
||||
uint canfold:1;
|
||||
uint hasid:1; /* op identity value? */
|
||||
uint idval:1; /* identity value 0/1 */
|
||||
uint commutes:1; /* commutative op? */
|
||||
uint assoc:1; /* associative op? */
|
||||
uint idemp:1; /* idempotent op? */
|
||||
uint cmpeqwl:1; /* Kl/Kw cmp eq/ne? */
|
||||
uint cmplgtewl:1; /* Kl/Kw cmp lt/gt/le/ge? */
|
||||
uint eqval:1; /* 1 for eq; 0 for ne */
|
||||
uint pinned:1; /* GCM pinned op? */
|
||||
};
|
||||
|
||||
struct Ins {
|
||||
uint op:30;
|
||||
uint cls:2;
|
||||
Ref to;
|
||||
Ref arg[2];
|
||||
};
|
||||
|
||||
struct Phi {
|
||||
Ref to;
|
||||
Ref *arg;
|
||||
Blk **blk;
|
||||
uint narg;
|
||||
short cls;
|
||||
uint visit:1;
|
||||
Phi *link;
|
||||
};
|
||||
|
||||
struct Blk {
|
||||
Phi *phi;
|
||||
Ins *ins;
|
||||
uint nins;
|
||||
struct {
|
||||
short type;
|
||||
Ref arg;
|
||||
} jmp;
|
||||
Blk *s1;
|
||||
Blk *s2;
|
||||
Blk *link;
|
||||
|
||||
uint id;
|
||||
uint visit;
|
||||
|
||||
Blk *idom;
|
||||
Blk *dom, *dlink;
|
||||
Blk **fron;
|
||||
uint nfron;
|
||||
int depth;
|
||||
|
||||
Blk **pred;
|
||||
uint npred;
|
||||
BSet in[1], out[1], gen[1];
|
||||
int nlive[2];
|
||||
int loop;
|
||||
char name[NString];
|
||||
};
|
||||
|
||||
struct Use {
|
||||
enum {
|
||||
UXXX,
|
||||
UPhi,
|
||||
UIns,
|
||||
UJmp,
|
||||
} type;
|
||||
uint bid;
|
||||
union {
|
||||
Ins *ins;
|
||||
Phi *phi;
|
||||
} u;
|
||||
};
|
||||
|
||||
struct Sym {
|
||||
enum {
|
||||
SGlo,
|
||||
SThr,
|
||||
} type;
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct Num {
|
||||
uchar n;
|
||||
uchar nl, nr;
|
||||
Ref l, r;
|
||||
};
|
||||
|
||||
enum {
|
||||
NoAlias,
|
||||
MayAlias,
|
||||
MustAlias
|
||||
};
|
||||
|
||||
struct Alias {
|
||||
enum {
|
||||
ABot = 0,
|
||||
ALoc = 1, /* stack local */
|
||||
ACon = 2,
|
||||
AEsc = 3, /* stack escaping */
|
||||
ASym = 4,
|
||||
AUnk = 6,
|
||||
#define astack(t) ((t) & 1)
|
||||
} type;
|
||||
int base;
|
||||
int64_t offset;
|
||||
union {
|
||||
Sym sym;
|
||||
struct {
|
||||
int sz; /* -1 if > NBit */
|
||||
bits m;
|
||||
} loc;
|
||||
} u;
|
||||
Alias *slot;
|
||||
};
|
||||
|
||||
struct Tmp {
|
||||
char name[NString];
|
||||
Ins *def;
|
||||
Use *use;
|
||||
uint ndef, nuse;
|
||||
uint bid; /* id of a defining block */
|
||||
uint cost;
|
||||
int slot; /* -1 for unset */
|
||||
short cls;
|
||||
struct {
|
||||
int r; /* register or -1 */
|
||||
int w; /* weight */
|
||||
bits m; /* avoid these registers */
|
||||
} hint;
|
||||
int phi;
|
||||
Alias alias;
|
||||
enum {
|
||||
WFull,
|
||||
Wsb, /* must match Oload/Oext order */
|
||||
Wub,
|
||||
Wsh,
|
||||
Wuh,
|
||||
Wsw,
|
||||
Wuw
|
||||
} width;
|
||||
int visit;
|
||||
uint gcmbid;
|
||||
};
|
||||
|
||||
struct Con {
|
||||
enum {
|
||||
CUndef,
|
||||
CBits,
|
||||
CAddr,
|
||||
} type;
|
||||
Sym sym;
|
||||
union {
|
||||
int64_t i;
|
||||
double d;
|
||||
float s;
|
||||
} bits;
|
||||
char flt; /* 1 to print as s, 2 to print as d */
|
||||
};
|
||||
|
||||
typedef struct Addr Addr;
|
||||
|
||||
struct Addr { /* amd64 addressing */
|
||||
Con offset;
|
||||
Ref base;
|
||||
Ref index;
|
||||
int scale;
|
||||
};
|
||||
|
||||
struct Lnk {
|
||||
char export;
|
||||
char thread;
|
||||
char common;
|
||||
char align;
|
||||
char *sec;
|
||||
char *secf;
|
||||
};
|
||||
|
||||
struct Fn {
|
||||
Blk *start;
|
||||
Tmp *tmp;
|
||||
Con *con;
|
||||
Mem *mem;
|
||||
int ntmp;
|
||||
int ncon;
|
||||
int nmem;
|
||||
uint nblk;
|
||||
int retty; /* index in typ[], -1 if no aggregate return */
|
||||
Ref retr;
|
||||
Blk **rpo;
|
||||
bits reg;
|
||||
int slot;
|
||||
int salign;
|
||||
char vararg;
|
||||
char dynalloc;
|
||||
char leaf;
|
||||
char name[NString];
|
||||
Lnk lnk;
|
||||
};
|
||||
|
||||
struct Typ {
|
||||
char name[NString];
|
||||
char isdark;
|
||||
char isunion;
|
||||
int align;
|
||||
uint64_t size;
|
||||
uint nunion;
|
||||
struct Field {
|
||||
enum {
|
||||
FEnd,
|
||||
Fb,
|
||||
Fh,
|
||||
Fw,
|
||||
Fl,
|
||||
Fs,
|
||||
Fd,
|
||||
FPad,
|
||||
FTyp,
|
||||
} type;
|
||||
uint len; /* or index in typ[] for FTyp */
|
||||
} (*fields)[NField+1];
|
||||
};
|
||||
|
||||
struct Dat {
|
||||
enum {
|
||||
DStart,
|
||||
DEnd,
|
||||
DB,
|
||||
DH,
|
||||
DW,
|
||||
DL,
|
||||
DZ
|
||||
} type;
|
||||
char *name;
|
||||
Lnk *lnk;
|
||||
union {
|
||||
int64_t num;
|
||||
double fltd;
|
||||
float flts;
|
||||
char *str;
|
||||
struct {
|
||||
char *name;
|
||||
int64_t off;
|
||||
} ref;
|
||||
} u;
|
||||
char isref;
|
||||
char isstr;
|
||||
};
|
||||
|
||||
/* main.c */
|
||||
extern Target T;
|
||||
extern char debug['Z'+1];
|
||||
|
||||
/* util.c */
|
||||
typedef enum {
|
||||
PHeap, /* free() necessary */
|
||||
PFn, /* discarded after processing the function */
|
||||
} Pool;
|
||||
|
||||
extern Typ *typ;
|
||||
extern Ins insb[NIns], *curi;
|
||||
uint32_t hash(char *);
|
||||
void die_(char *, char *, ...) __attribute__((noreturn));
|
||||
void *emalloc(size_t);
|
||||
void *alloc(size_t);
|
||||
void freeall(void);
|
||||
void *vnew(ulong, size_t, Pool);
|
||||
void vfree(void *);
|
||||
void vgrow(void *, ulong);
|
||||
void addins(Ins **, uint *, Ins *);
|
||||
void addbins(Ins **, uint *, Blk *);
|
||||
void strf(char[NString], char *, ...);
|
||||
uint32_t intern(char *);
|
||||
char *str(uint32_t);
|
||||
int argcls(Ins *, int);
|
||||
int isreg(Ref);
|
||||
int iscmp(int, int *, int *);
|
||||
void igroup(Blk *, Ins *, Ins **, Ins **);
|
||||
void emit(int, int, Ref, Ref, Ref);
|
||||
void emiti(Ins);
|
||||
void idup(Blk *, Ins *, ulong);
|
||||
Ins *icpy(Ins *, Ins *, ulong);
|
||||
int cmpop(int);
|
||||
int cmpneg(int);
|
||||
int cmpwlneg(int);
|
||||
int clsmerge(short *, short);
|
||||
int phicls(int, Tmp *);
|
||||
uint phiargn(Phi *, Blk *);
|
||||
Ref phiarg(Phi *, Blk *);
|
||||
Ref newtmp(char *, int, Fn *);
|
||||
void chuse(Ref, int, Fn *);
|
||||
int symeq(Sym, Sym);
|
||||
Ref newcon(Con *, Fn *);
|
||||
Ref getcon(int64_t, Fn *);
|
||||
int addcon(Con *, Con *, int);
|
||||
int isconbits(Fn *fn, Ref r, int64_t *v);
|
||||
void salloc(Ref, Ref, Fn *);
|
||||
void dumpts(BSet *, Tmp *, FILE *);
|
||||
void runmatch(uchar *, Num *, Ref, Ref *);
|
||||
void bsinit(BSet *, uint);
|
||||
void bszero(BSet *);
|
||||
uint bscount(BSet *);
|
||||
void bsset(BSet *, uint);
|
||||
void bsclr(BSet *, uint);
|
||||
void bscopy(BSet *, BSet *);
|
||||
void bsunion(BSet *, BSet *);
|
||||
void bsinter(BSet *, BSet *);
|
||||
void bsdiff(BSet *, BSet *);
|
||||
int bsequal(BSet *, BSet *);
|
||||
int bsiter(BSet *, int *);
|
||||
|
||||
static inline int
|
||||
bshas(BSet *bs, uint elt)
|
||||
{
|
||||
assert(elt < bs->nt * NBit);
|
||||
return (bs->t[elt/NBit] & BIT(elt%NBit)) != 0;
|
||||
}
|
||||
|
||||
/* parse.c */
|
||||
extern Op optab[NOp];
|
||||
void parse(FILE *, char *, void (char *), void (Dat *), void (Fn *));
|
||||
void printfn(Fn *, FILE *);
|
||||
void printref(Ref, Fn *, FILE *);
|
||||
void err(char *, ...) __attribute__((noreturn));
|
||||
|
||||
/* abi.c */
|
||||
void elimsb(Fn *);
|
||||
|
||||
/* cfg.c */
|
||||
Blk *newblk(void);
|
||||
void fillpreds(Fn *);
|
||||
void fillcfg(Fn *);
|
||||
void filldom(Fn *);
|
||||
int sdom(Blk *, Blk *);
|
||||
int dom(Blk *, Blk *);
|
||||
void fillfron(Fn *);
|
||||
void loopiter(Fn *, void (*)(Blk *, Blk *));
|
||||
void filldepth(Fn *);
|
||||
Blk *lca(Blk *, Blk *);
|
||||
void fillloop(Fn *);
|
||||
void simpljmp(Fn *);
|
||||
int reaches(Fn *, Blk *, Blk *);
|
||||
int reachesnotvia(Fn *, Blk *, Blk *, Blk *);
|
||||
int ifgraph(Blk *, Blk **, Blk **, Blk **);
|
||||
void simplcfg(Fn *);
|
||||
|
||||
/* mem.c */
|
||||
void promote(Fn *);
|
||||
void coalesce(Fn *);
|
||||
|
||||
/* alias.c */
|
||||
void fillalias(Fn *);
|
||||
void getalias(Alias *, Ref, Fn *);
|
||||
int alias(Ref, int, int, Ref, int, int *, Fn *);
|
||||
int escapes(Ref, Fn *);
|
||||
|
||||
/* load.c */
|
||||
int loadsz(Ins *);
|
||||
int storesz(Ins *);
|
||||
void loadopt(Fn *);
|
||||
|
||||
/* ssa.c */
|
||||
void adduse(Tmp *, int, Blk *, ...);
|
||||
void filluse(Fn *);
|
||||
void ssa(Fn *);
|
||||
void ssacheck(Fn *);
|
||||
|
||||
/* copy.c */
|
||||
void narrowpars(Fn *fn);
|
||||
Ref copyref(Fn *, Blk *, Ins *);
|
||||
Ref phicopyref(Fn *, Blk *, Phi *);
|
||||
|
||||
/* fold.c */
|
||||
int foldint(Con *, int, int, Con *, Con *);
|
||||
Ref foldref(Fn *, Ins *);
|
||||
|
||||
/* gvn.c */
|
||||
extern Ref con01[2]; /* 0 and 1 */
|
||||
int zeroval(Fn *, Blk *, Ref, int, int *);
|
||||
void gvn(Fn *);
|
||||
|
||||
/* gcm.c */
|
||||
int pinned(Ins *);
|
||||
void gcm(Fn *);
|
||||
|
||||
/* ifopt.c */
|
||||
void ifconvert(Fn *fn);
|
||||
|
||||
/* simpl.c */
|
||||
void simpl(Fn *);
|
||||
|
||||
/* live.c */
|
||||
void liveon(BSet *, Blk *, Blk *);
|
||||
void filllive(Fn *);
|
||||
|
||||
/* spill.c */
|
||||
void fillcost(Fn *);
|
||||
void spill(Fn *);
|
||||
|
||||
/* rega.c */
|
||||
void rega(Fn *);
|
||||
|
||||
/* emit.c */
|
||||
void emitfnlnk(char *, Lnk *, FILE *);
|
||||
void emitdat(Dat *, FILE *);
|
||||
void emitdbgfile(char *, FILE *);
|
||||
void emitdbgloc(uint, uint, FILE *);
|
||||
int stashbits(bits, int);
|
||||
void elf_emitfnfin(char *, FILE *);
|
||||
void elf_emitfin(FILE *);
|
||||
void macho_emitfin(FILE *);
|
||||
void pe_emitfin(FILE *);
|
||||
82
src/qbe/amd64/all.h
Normal file
82
src/qbe/amd64/all.h
Normal file
@@ -0,0 +1,82 @@
|
||||
#include "../all.h"
|
||||
|
||||
typedef struct Amd64Op Amd64Op;
|
||||
|
||||
enum Amd64Reg {
|
||||
RAX = RXX+1, /* caller-save */
|
||||
RCX, /* caller-save */
|
||||
RDX, /* caller-save */
|
||||
RSI, /* caller-save on sysv, callee-save on win */
|
||||
RDI, /* caller-save on sysv, callee-save on win */
|
||||
R8, /* caller-save */
|
||||
R9, /* caller-save */
|
||||
R10, /* caller-save */
|
||||
R11, /* caller-save */
|
||||
|
||||
RBX, /* callee-save */
|
||||
R12,
|
||||
R13,
|
||||
R14,
|
||||
R15,
|
||||
|
||||
RBP, /* globally live */
|
||||
RSP,
|
||||
|
||||
XMM0, /* sse */
|
||||
XMM1,
|
||||
XMM2,
|
||||
XMM3,
|
||||
XMM4,
|
||||
XMM5,
|
||||
XMM6,
|
||||
XMM7,
|
||||
XMM8,
|
||||
XMM9,
|
||||
XMM10,
|
||||
XMM11,
|
||||
XMM12,
|
||||
XMM13,
|
||||
XMM14,
|
||||
XMM15,
|
||||
|
||||
NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */
|
||||
NGPR = RSP - RAX + 1,
|
||||
NFPS = NFPR,
|
||||
|
||||
NGPS_SYSV = R11 - RAX + 1,
|
||||
NCLR_SYSV = R15 - RBX + 1,
|
||||
|
||||
NGPS_WIN = R11 - RAX + 1 - 2, /* -2 for RDI/RDI */
|
||||
NCLR_WIN = R15 - RBX + 1 + 2, /* +2 for RDI/RDI */
|
||||
};
|
||||
MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);
|
||||
|
||||
struct Amd64Op {
|
||||
char nmem;
|
||||
char zflag;
|
||||
char lflag;
|
||||
};
|
||||
|
||||
/* targ.c */
|
||||
extern Amd64Op amd64_op[];
|
||||
|
||||
/* sysv.c (abi) */
|
||||
extern int amd64_sysv_rsave[];
|
||||
extern int amd64_sysv_rclob[];
|
||||
bits amd64_sysv_retregs(Ref, int[2]);
|
||||
bits amd64_sysv_argregs(Ref, int[2]);
|
||||
void amd64_sysv_abi(Fn *);
|
||||
|
||||
/* winabi.c */
|
||||
extern int amd64_winabi_rsave[];
|
||||
extern int amd64_winabi_rclob[];
|
||||
bits amd64_winabi_retregs(Ref, int[2]);
|
||||
bits amd64_winabi_argregs(Ref, int[2]);
|
||||
void amd64_winabi_abi(Fn *);
|
||||
|
||||
/* isel.c */
|
||||
void amd64_isel(Fn *);
|
||||
|
||||
/* emit.c */
|
||||
void amd64_sysv_emitfn(Fn *, FILE *);
|
||||
void amd64_winabi_emitfn(Fn *, FILE *);
|
||||
844
src/qbe/amd64/emit.c
Normal file
844
src/qbe/amd64/emit.c
Normal file
@@ -0,0 +1,844 @@
|
||||
#include "all.h"
|
||||
|
||||
|
||||
typedef struct E E;
|
||||
|
||||
struct E {
|
||||
FILE *f;
|
||||
Fn *fn;
|
||||
int fp;
|
||||
uint64_t fsz;
|
||||
int nclob;
|
||||
};
|
||||
|
||||
#define CMP(X) \
|
||||
X(Ciule, "be", "a") \
|
||||
X(Ciult, "b", "ae") \
|
||||
X(Cisle, "le", "g") \
|
||||
X(Cislt, "l", "ge") \
|
||||
X(Cisgt, "g", "le") \
|
||||
X(Cisge, "ge", "l") \
|
||||
X(Ciugt, "a", "be") \
|
||||
X(Ciuge, "ae", "b") \
|
||||
X(Cieq, "z", "nz") \
|
||||
X(Cine, "nz", "z") \
|
||||
X(NCmpI+Cfle, "be", "a") \
|
||||
X(NCmpI+Cflt, "b", "ae") \
|
||||
X(NCmpI+Cfgt, "a", "be") \
|
||||
X(NCmpI+Cfge, "ae", "b") \
|
||||
X(NCmpI+Cfo, "np", "p") \
|
||||
X(NCmpI+Cfuo, "p", "np")
|
||||
|
||||
enum {
|
||||
SLong = 0,
|
||||
SWord = 1,
|
||||
SShort = 2,
|
||||
SByte = 3,
|
||||
|
||||
Ki = -1, /* matches Kw and Kl */
|
||||
Ka = -2, /* matches all classes */
|
||||
};
|
||||
|
||||
/* Instruction format strings:
|
||||
*
|
||||
* if the format string starts with -, the instruction
|
||||
* is assumed to be 3-address and is put in 2-address
|
||||
* mode using an extra mov if necessary
|
||||
*
|
||||
* if the format string starts with +, the same as the
|
||||
* above applies, but commutativity is also assumed
|
||||
*
|
||||
* %k is used to set the class of the instruction,
|
||||
* it'll expand to "l", "q", "ss", "sd", depending
|
||||
* on the instruction class
|
||||
* %0 designates the first argument
|
||||
* %1 designates the second argument
|
||||
* %= designates the result
|
||||
*
|
||||
* if %k is not used, a prefix to 0, 1, or = must be
|
||||
* added, it can be:
|
||||
* M - memory reference
|
||||
* L - long (64 bits)
|
||||
* W - word (32 bits)
|
||||
* H - short (16 bits)
|
||||
* B - byte (8 bits)
|
||||
* S - single precision float
|
||||
* D - double precision float
|
||||
*/
|
||||
static struct {
|
||||
short op;
|
||||
short cls;
|
||||
char *fmt;
|
||||
} omap[] = {
|
||||
{ Oadd, Ka, "+add%k %1, %=" },
|
||||
{ Osub, Ka, "-sub%k %1, %=" },
|
||||
{ Oand, Ki, "+and%k %1, %=" },
|
||||
{ Oor, Ki, "+or%k %1, %=" },
|
||||
{ Oxor, Ki, "+xor%k %1, %=" },
|
||||
{ Osar, Ki, "-sar%k %B1, %=" },
|
||||
{ Oshr, Ki, "-shr%k %B1, %=" },
|
||||
{ Oshl, Ki, "-shl%k %B1, %=" },
|
||||
{ Omul, Ki, "+imul%k %1, %=" },
|
||||
{ Omul, Ks, "+mulss %1, %=" },
|
||||
{ Omul, Kd, "+mulsd %1, %=" },
|
||||
{ Odiv, Ka, "-div%k %1, %=" },
|
||||
{ Ostorel, Ka, "movq %L0, %M1" },
|
||||
{ Ostorew, Ka, "movl %W0, %M1" },
|
||||
{ Ostoreh, Ka, "movw %H0, %M1" },
|
||||
{ Ostoreb, Ka, "movb %B0, %M1" },
|
||||
{ Ostores, Ka, "movss %S0, %M1" },
|
||||
{ Ostored, Ka, "movsd %D0, %M1" },
|
||||
{ Oload, Ka, "mov%k %M0, %=" },
|
||||
{ Oloadsw, Kl, "movslq %M0, %L=" },
|
||||
{ Oloadsw, Kw, "movl %M0, %W=" },
|
||||
{ Oloaduw, Ki, "movl %M0, %W=" },
|
||||
{ Oloadsh, Ki, "movsw%k %M0, %=" },
|
||||
{ Oloaduh, Ki, "movzw%k %M0, %=" },
|
||||
{ Oloadsb, Ki, "movsb%k %M0, %=" },
|
||||
{ Oloadub, Ki, "movzb%k %M0, %=" },
|
||||
{ Oextsw, Kl, "movslq %W0, %L=" },
|
||||
{ Oextuw, Kl, "movl %W0, %W=" },
|
||||
{ Oextsh, Ki, "movsw%k %H0, %=" },
|
||||
{ Oextuh, Ki, "movzw%k %H0, %=" },
|
||||
{ Oextsb, Ki, "movsb%k %B0, %=" },
|
||||
{ Oextub, Ki, "movzb%k %B0, %=" },
|
||||
|
||||
{ Oexts, Kd, "cvtss2sd %0, %=" },
|
||||
{ Otruncd, Ks, "cvtsd2ss %0, %=" },
|
||||
{ Ostosi, Ki, "cvttss2si%k %0, %=" },
|
||||
{ Odtosi, Ki, "cvttsd2si%k %0, %=" },
|
||||
{ Oswtof, Ka, "cvtsi2%k %W0, %=" },
|
||||
{ Osltof, Ka, "cvtsi2%k %L0, %=" },
|
||||
{ Ocast, Ki, "movq %D0, %L=" },
|
||||
{ Ocast, Ka, "movq %L0, %D=" },
|
||||
|
||||
{ Oaddr, Ki, "lea%k %M0, %=" },
|
||||
{ Oswap, Ki, "xchg%k %0, %1" },
|
||||
{ Osign, Kl, "cqto" },
|
||||
{ Osign, Kw, "cltd" },
|
||||
{ Oxdiv, Ki, "div%k %0" },
|
||||
{ Oxidiv, Ki, "idiv%k %0" },
|
||||
{ Oxcmp, Ks, "ucomiss %S0, %S1" },
|
||||
{ Oxcmp, Kd, "ucomisd %D0, %D1" },
|
||||
{ Oxcmp, Ki, "cmp%k %0, %1" },
|
||||
{ Oxtest, Ki, "test%k %0, %1" },
|
||||
#define X(c, s, _) \
|
||||
{ Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
|
||||
CMP(X)
|
||||
#undef X
|
||||
{ Oflagfeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
|
||||
{ Oflagfne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
|
||||
{ NOp, 0, 0 }
|
||||
};
|
||||
|
||||
static char cmov[][2][16] = {
|
||||
#define X(c, s0, s1) \
|
||||
[c] = { \
|
||||
"cmov" s0 " %0, %=", \
|
||||
"cmov" s1 " %1, %=", \
|
||||
},
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
|
||||
static char *rname[][4] = {
|
||||
[RAX] = {"rax", "eax", "ax", "al"},
|
||||
[RBX] = {"rbx", "ebx", "bx", "bl"},
|
||||
[RCX] = {"rcx", "ecx", "cx", "cl"},
|
||||
[RDX] = {"rdx", "edx", "dx", "dl"},
|
||||
[RSI] = {"rsi", "esi", "si", "sil"},
|
||||
[RDI] = {"rdi", "edi", "di", "dil"},
|
||||
[RBP] = {"rbp", "ebp", "bp", "bpl"},
|
||||
[RSP] = {"rsp", "esp", "sp", "spl"},
|
||||
[R8 ] = {"r8" , "r8d", "r8w", "r8b"},
|
||||
[R9 ] = {"r9" , "r9d", "r9w", "r9b"},
|
||||
[R10] = {"r10", "r10d", "r10w", "r10b"},
|
||||
[R11] = {"r11", "r11d", "r11w", "r11b"},
|
||||
[R12] = {"r12", "r12d", "r12w", "r12b"},
|
||||
[R13] = {"r13", "r13d", "r13w", "r13b"},
|
||||
[R14] = {"r14", "r14d", "r14w", "r14b"},
|
||||
[R15] = {"r15", "r15d", "r15w", "r15b"},
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
slot(Ref r, E *e)
|
||||
{
|
||||
int s;
|
||||
|
||||
s = rsval(r);
|
||||
assert(s <= e->fn->slot);
|
||||
/* specific to NAlign == 3 */
|
||||
if (s < 0) {
|
||||
if (e->fp == RSP)
|
||||
return 4*-s - 8 + e->fsz + e->nclob*8;
|
||||
else
|
||||
return 4*-s;
|
||||
}
|
||||
else if (e->fp == RSP)
|
||||
return 4*s + e->nclob*8;
|
||||
else if (e->fn->vararg) {
|
||||
if (T.windows)
|
||||
return -4 * (e->fn->slot - s);
|
||||
else
|
||||
return -176 + -4 * (e->fn->slot - s);
|
||||
} else
|
||||
return -4 * (e->fn->slot - s);
|
||||
}
|
||||
|
||||
static void
|
||||
emitcon(Con *con, E *e)
|
||||
{
|
||||
char *p, *l;
|
||||
|
||||
switch (con->type) {
|
||||
case CAddr:
|
||||
l = str(con->sym.id);
|
||||
p = l[0] == '"' ? "" : T.assym;
|
||||
if (con->sym.type == SThr) {
|
||||
if (T.apple)
|
||||
fprintf(e->f, "%s%s@TLVP", p, l);
|
||||
else
|
||||
fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
|
||||
} else
|
||||
fprintf(e->f, "%s%s", p, l);
|
||||
if (con->bits.i)
|
||||
fprintf(e->f, "%+"PRId64, con->bits.i);
|
||||
break;
|
||||
case CBits:
|
||||
fprintf(e->f, "%"PRId64, con->bits.i);
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
regtoa(int reg, int sz)
|
||||
{
|
||||
static char buf[6];
|
||||
|
||||
assert(reg <= XMM15);
|
||||
if (reg >= XMM0) {
|
||||
sprintf(buf, "xmm%d", reg-XMM0);
|
||||
return buf;
|
||||
} else
|
||||
return rname[reg][sz];
|
||||
}
|
||||
|
||||
static Ref
|
||||
getarg(char c, Ins *i)
|
||||
{
|
||||
switch (c) {
|
||||
case '0':
|
||||
return i->arg[0];
|
||||
case '1':
|
||||
return i->arg[1];
|
||||
case '=':
|
||||
return i->to;
|
||||
default:
|
||||
die("invalid arg letter %c", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void emitins(Ins, E *);
|
||||
|
||||
static void
|
||||
emitcopy(Ref r1, Ref r2, int k, E *e)
|
||||
{
|
||||
Ins icp;
|
||||
|
||||
icp.op = Ocopy;
|
||||
icp.arg[0] = r2;
|
||||
icp.to = r1;
|
||||
icp.cls = k;
|
||||
emitins(icp, e);
|
||||
}
|
||||
|
||||
static void
|
||||
emitf(char *s, Ins *i, E *e)
|
||||
{
|
||||
static char clstoa[][3] = {"l", "q", "ss", "sd"};
|
||||
char c;
|
||||
int sz;
|
||||
Ref ref;
|
||||
Mem *m;
|
||||
Con off;
|
||||
|
||||
switch (*s) {
|
||||
case '+':
|
||||
if (req(i->arg[1], i->to)) {
|
||||
ref = i->arg[0];
|
||||
i->arg[0] = i->arg[1];
|
||||
i->arg[1] = ref;
|
||||
}
|
||||
/* fall through */
|
||||
case '-':
|
||||
assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
|
||||
"cannot convert to 2-address");
|
||||
emitcopy(i->to, i->arg[0], i->cls, e);
|
||||
s++;
|
||||
break;
|
||||
}
|
||||
|
||||
fputc('\t', e->f);
|
||||
Next:
|
||||
while ((c = *s++) != '%')
|
||||
if (!c) {
|
||||
fputc('\n', e->f);
|
||||
return;
|
||||
} else
|
||||
fputc(c, e->f);
|
||||
switch ((c = *s++)) {
|
||||
case '%':
|
||||
fputc('%', e->f);
|
||||
break;
|
||||
case 'k':
|
||||
fputs(clstoa[i->cls], e->f);
|
||||
break;
|
||||
case '0':
|
||||
case '1':
|
||||
case '=':
|
||||
sz = KWIDE(i->cls) ? SLong : SWord;
|
||||
s--;
|
||||
goto Ref;
|
||||
case 'D':
|
||||
case 'S':
|
||||
sz = SLong; /* does not matter for floats */
|
||||
Ref:
|
||||
c = *s++;
|
||||
ref = getarg(c, i);
|
||||
switch (rtype(ref)) {
|
||||
case RTmp:
|
||||
assert(isreg(ref));
|
||||
fprintf(e->f, "%%%s", regtoa(ref.val, sz));
|
||||
break;
|
||||
case RSlot:
|
||||
fprintf(e->f, "%d(%%%s)",
|
||||
slot(ref, e),
|
||||
regtoa(e->fp, SLong)
|
||||
);
|
||||
break;
|
||||
case RMem:
|
||||
Mem:
|
||||
m = &e->fn->mem[ref.val];
|
||||
if (rtype(m->base) == RSlot) {
|
||||
off.type = CBits;
|
||||
off.bits.i = slot(m->base, e);
|
||||
addcon(&m->offset, &off, 1);
|
||||
m->base = TMP(e->fp);
|
||||
}
|
||||
if (m->offset.type != CUndef)
|
||||
emitcon(&m->offset, e);
|
||||
fputc('(', e->f);
|
||||
if (!req(m->base, R))
|
||||
fprintf(e->f, "%%%s",
|
||||
regtoa(m->base.val, SLong)
|
||||
);
|
||||
else if (m->offset.type == CAddr)
|
||||
fprintf(e->f, "%%rip");
|
||||
if (!req(m->index, R))
|
||||
fprintf(e->f, ", %%%s, %d",
|
||||
regtoa(m->index.val, SLong),
|
||||
m->scale
|
||||
);
|
||||
fputc(')', e->f);
|
||||
break;
|
||||
case RCon:
|
||||
fputc('$', e->f);
|
||||
emitcon(&e->fn->con[ref.val], e);
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
break;
|
||||
case 'L':
|
||||
sz = SLong;
|
||||
goto Ref;
|
||||
case 'W':
|
||||
sz = SWord;
|
||||
goto Ref;
|
||||
case 'H':
|
||||
sz = SShort;
|
||||
goto Ref;
|
||||
case 'B':
|
||||
sz = SByte;
|
||||
goto Ref;
|
||||
case 'M':
|
||||
c = *s++;
|
||||
ref = getarg(c, i);
|
||||
switch (rtype(ref)) {
|
||||
case RMem:
|
||||
goto Mem;
|
||||
case RSlot:
|
||||
fprintf(e->f, "%d(%%%s)",
|
||||
slot(ref, e),
|
||||
regtoa(e->fp, SLong)
|
||||
);
|
||||
break;
|
||||
case RCon:
|
||||
off = e->fn->con[ref.val];
|
||||
emitcon(&off, e);
|
||||
if (off.type == CAddr)
|
||||
if (off.sym.type != SThr || T.apple)
|
||||
fprintf(e->f, "(%%rip)");
|
||||
break;
|
||||
case RTmp:
|
||||
assert(isreg(ref));
|
||||
fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
die("invalid format specifier %%%c", c);
|
||||
}
|
||||
goto Next;
|
||||
}
|
||||
|
||||
static bits negmask[4] = {
|
||||
[Ks] = 0x80000000,
|
||||
[Kd] = 0x8000000000000000,
|
||||
};
|
||||
|
||||
static void
|
||||
emitins(Ins i, E *e)
|
||||
{
|
||||
Ref r;
|
||||
int64_t val;
|
||||
int o, t0;
|
||||
Ins ineg;
|
||||
Con *con;
|
||||
char *sym;
|
||||
|
||||
switch (i.op) {
|
||||
default:
|
||||
if (isxsel(i.op))
|
||||
goto case_Oxsel;
|
||||
Table:
|
||||
/* most instructions are just pulled out of
|
||||
* the table omap[], some special cases are
|
||||
* detailed below */
|
||||
for (o=0;; o++) {
|
||||
/* this linear search should really be a binary
|
||||
* search */
|
||||
if (omap[o].op == NOp)
|
||||
die("no match for %s(%c)",
|
||||
optab[i.op].name, "wlsd"[i.cls]);
|
||||
if (omap[o].op == i.op)
|
||||
if (omap[o].cls == i.cls
|
||||
|| (omap[o].cls == Ki && KBASE(i.cls) == 0)
|
||||
|| (omap[o].cls == Ka))
|
||||
break;
|
||||
}
|
||||
emitf(omap[o].fmt, &i, e);
|
||||
break;
|
||||
case Onop:
|
||||
/* just do nothing for nops, they are inserted
|
||||
* by some passes */
|
||||
break;
|
||||
case Omul:
|
||||
/* here, we try to use the 3-addresss form
|
||||
* of multiplication when possible */
|
||||
if (rtype(i.arg[1]) == RCon) {
|
||||
r = i.arg[0];
|
||||
i.arg[0] = i.arg[1];
|
||||
i.arg[1] = r;
|
||||
}
|
||||
if (KBASE(i.cls) == 0 /* only available for ints */
|
||||
&& rtype(i.arg[0]) == RCon
|
||||
&& rtype(i.arg[1]) == RTmp) {
|
||||
emitf("imul%k %0, %1, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
goto Table;
|
||||
case Osub:
|
||||
/* we have to use the negation trick to handle
|
||||
* some 3-address subtractions */
|
||||
if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
|
||||
ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
|
||||
emitins(ineg, e);
|
||||
emitf("add%k %0, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
goto Table;
|
||||
case Oneg:
|
||||
if (!req(i.to, i.arg[0]))
|
||||
emitf("mov%k %0, %=", &i, e);
|
||||
if (KBASE(i.cls) == 0)
|
||||
emitf("neg%k %=", &i, e);
|
||||
else
|
||||
fprintf(e->f,
|
||||
"\txorp%c %sfp%d(%%rip), %%%s\n",
|
||||
"xxsd"[i.cls],
|
||||
T.asloc,
|
||||
stashbits(negmask[i.cls], 16),
|
||||
regtoa(i.to.val, SLong)
|
||||
);
|
||||
break;
|
||||
case Odiv:
|
||||
/* use xmm15 to adjust the instruction when the
|
||||
* conversion to 2-address in emitf() would fail */
|
||||
if (req(i.to, i.arg[1])) {
|
||||
i.arg[1] = TMP(XMM0+15);
|
||||
emitf("mov%k %=, %1", &i, e);
|
||||
emitf("mov%k %0, %=", &i, e);
|
||||
i.arg[0] = i.to;
|
||||
}
|
||||
goto Table;
|
||||
case Ocopy:
|
||||
/* copies are used for many things; see my note
|
||||
* to understand how to load big constants:
|
||||
* https://c9x.me/notes/2015-09-19.html */
|
||||
assert(rtype(i.to) != RMem);
|
||||
if (req(i.to, R) || req(i.arg[0], R))
|
||||
break;
|
||||
if (req(i.to, i.arg[0]))
|
||||
break;
|
||||
t0 = rtype(i.arg[0]);
|
||||
if (i.cls == Kl
|
||||
&& t0 == RCon
|
||||
&& e->fn->con[i.arg[0].val].type == CBits) {
|
||||
val = e->fn->con[i.arg[0].val].bits.i;
|
||||
if (isreg(i.to))
|
||||
if (val >= 0 && val <= UINT32_MAX) {
|
||||
emitf("movl %W0, %W=", &i, e);
|
||||
break;
|
||||
}
|
||||
if (rtype(i.to) == RSlot)
|
||||
if (val < INT32_MIN || val > INT32_MAX) {
|
||||
emitf("movl %0, %=", &i, e);
|
||||
emitf("movl %0>>32, 4+%=", &i, e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isreg(i.to)
|
||||
&& t0 == RCon
|
||||
&& e->fn->con[i.arg[0].val].type == CAddr) {
|
||||
emitf("lea%k %M0, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
if (rtype(i.to) == RSlot
|
||||
&& (t0 == RSlot || t0 == RMem)) {
|
||||
i.cls = KWIDE(i.cls) ? Kd : Ks;
|
||||
i.arg[1] = TMP(XMM0+15);
|
||||
emitf("mov%k %0, %1", &i, e);
|
||||
emitf("mov%k %1, %=", &i, e);
|
||||
break;
|
||||
}
|
||||
/* conveniently, the assembler knows if it
|
||||
* should use movabsq when reading movq */
|
||||
emitf("mov%k %0, %=", &i, e);
|
||||
break;
|
||||
case Oaddr:
|
||||
if (!T.apple
|
||||
&& rtype(i.arg[0]) == RCon
|
||||
&& e->fn->con[i.arg[0].val].sym.type == SThr) {
|
||||
/* derive the symbol address from the TCB
|
||||
* address at offset 0 of %fs */
|
||||
assert(isreg(i.to));
|
||||
con = &e->fn->con[i.arg[0].val];
|
||||
sym = str(con->sym.id);
|
||||
emitf("movq %%fs:0, %L=", &i, e);
|
||||
fprintf(e->f, "\tleaq %s%s@tpoff",
|
||||
sym[0] == '"' ? "" : T.assym, sym);
|
||||
if (con->bits.i)
|
||||
fprintf(e->f, "%+"PRId64,
|
||||
con->bits.i);
|
||||
fprintf(e->f, "(%%%s), %%%s\n",
|
||||
regtoa(i.to.val, SLong),
|
||||
regtoa(i.to.val, SLong));
|
||||
break;
|
||||
}
|
||||
goto Table;
|
||||
case Ocall:
|
||||
/* calls simply have a weird syntax in AT&T
|
||||
* assembly... */
|
||||
switch (rtype(i.arg[0])) {
|
||||
case RCon:
|
||||
fprintf(e->f, "\tcallq ");
|
||||
emitcon(&e->fn->con[i.arg[0].val], e);
|
||||
fprintf(e->f, "\n");
|
||||
break;
|
||||
case RTmp:
|
||||
emitf("callq *%L0", &i, e);
|
||||
break;
|
||||
default:
|
||||
die("invalid call argument");
|
||||
}
|
||||
break;
|
||||
case Osalloc:
|
||||
/* there is no good reason why this is here
|
||||
* maybe we should split Osalloc in 2 different
|
||||
* instructions depending on the result
|
||||
*/
|
||||
assert(e->fp == RBP);
|
||||
emitf("subq %L0, %%rsp", &i, e);
|
||||
if (!req(i.to, R))
|
||||
emitcopy(i.to, TMP(RSP), Kl, e);
|
||||
break;
|
||||
case Oswap:
|
||||
if (KBASE(i.cls) == 0)
|
||||
goto Table;
|
||||
/* for floats, there is no swap instruction
|
||||
* so we use xmm15 as a temporary
|
||||
*/
|
||||
emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
|
||||
emitcopy(i.arg[0], i.arg[1], i.cls, e);
|
||||
emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
|
||||
break;
|
||||
case Odbgloc:
|
||||
emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
|
||||
break;
|
||||
case_Oxsel:
|
||||
if (req(i.to, i.arg[1]))
|
||||
emitf(cmov[i.op-Oxsel][0], &i, e);
|
||||
else {
|
||||
if (!req(i.to, i.arg[0]))
|
||||
emitf("mov %0, %=", &i, e);
|
||||
emitf(cmov[i.op-Oxsel][1], &i, e);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sysv_framesz(E *e)
|
||||
{
|
||||
uint64_t i, o, f;
|
||||
|
||||
/* specific to NAlign == 3 */
|
||||
o = 0;
|
||||
if (!e->fn->leaf) {
|
||||
for (i=0, o=0; i<NCLR_SYSV; i++)
|
||||
o ^= e->fn->reg >> amd64_sysv_rclob[i];
|
||||
o &= 1;
|
||||
}
|
||||
f = e->fn->slot;
|
||||
f = (f + 3) & -4;
|
||||
if (f > 0
|
||||
&& e->fp == RSP
|
||||
&& e->fn->salign == 4)
|
||||
f += 2;
|
||||
e->fsz = 4*f + 8*o + 176*e->fn->vararg;
|
||||
}
|
||||
|
||||
void
|
||||
amd64_sysv_emitfn(Fn *fn, FILE *f)
|
||||
{
|
||||
static char *ctoa[] = {
|
||||
#define X(c, s, _) [c] = s,
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
static int id0;
|
||||
Blk *b, *s;
|
||||
Ins *i, itmp;
|
||||
int *r, c, o, n, lbl;
|
||||
uint p;
|
||||
E *e;
|
||||
|
||||
e = &(E){.f = f, .fn = fn};
|
||||
emitfnlnk(fn->name, &fn->lnk, f);
|
||||
fputs("\tendbr64\n", f);
|
||||
if (!fn->leaf || fn->vararg || fn->dynalloc) {
|
||||
e->fp = RBP;
|
||||
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
|
||||
} else
|
||||
e->fp = RSP;
|
||||
sysv_framesz(e);
|
||||
if (e->fsz)
|
||||
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
|
||||
if (fn->vararg) {
|
||||
o = -176;
|
||||
for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
|
||||
fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
|
||||
for (n=0; n<8; ++n, o+=16)
|
||||
fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
|
||||
}
|
||||
for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR_SYSV]; r++)
|
||||
if (fn->reg & BIT(*r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("pushq %L0", &itmp, e);
|
||||
e->nclob++;
|
||||
}
|
||||
|
||||
for (lbl=0, b=fn->start; b; b=b->link) {
|
||||
if (lbl || b->npred > 1) {
|
||||
for (p=0; p<b->npred; p++)
|
||||
if (b->pred[p]->id >= b->id)
|
||||
break;
|
||||
if (p != b->npred)
|
||||
fprintf(f, ".p2align 4\n");
|
||||
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
|
||||
}
|
||||
for (i=b->ins; i!=&b->ins[b->nins]; i++)
|
||||
emitins(*i, e);
|
||||
lbl = 1;
|
||||
switch (b->jmp.type) {
|
||||
case Jhlt:
|
||||
fprintf(f, "\tud2\n");
|
||||
break;
|
||||
case Jret0:
|
||||
if (fn->dynalloc)
|
||||
fprintf(f,
|
||||
"\tmovq %%rbp, %%rsp\n"
|
||||
"\tsubq $%"PRIu64", %%rsp\n",
|
||||
e->fsz + e->nclob * 8);
|
||||
for (r=&amd64_sysv_rclob[NCLR_SYSV]; r>amd64_sysv_rclob;)
|
||||
if (fn->reg & BIT(*--r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("popq %L0", &itmp, e);
|
||||
}
|
||||
if (e->fp == RBP)
|
||||
fputs("\tleave\n", f);
|
||||
else if (e->fsz)
|
||||
fprintf(f,
|
||||
"\taddq $%"PRIu64", %%rsp\n",
|
||||
e->fsz);
|
||||
fputs("\tret\n", f);
|
||||
break;
|
||||
case Jjmp:
|
||||
Jmp:
|
||||
if (b->s1 != b->link)
|
||||
fprintf(f, "\tjmp %sbb%d\n",
|
||||
T.asloc, id0+b->s1->id);
|
||||
else
|
||||
lbl = 0;
|
||||
break;
|
||||
default:
|
||||
c = b->jmp.type - Jjf;
|
||||
if (0 <= c && c <= NCmp) {
|
||||
if (b->link == b->s2) {
|
||||
s = b->s1;
|
||||
b->s1 = b->s2;
|
||||
b->s2 = s;
|
||||
} else
|
||||
c = cmpneg(c);
|
||||
fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
|
||||
T.asloc, id0+b->s2->id);
|
||||
goto Jmp;
|
||||
}
|
||||
die("unhandled jump %d", b->jmp.type);
|
||||
}
|
||||
}
|
||||
id0 += fn->nblk;
|
||||
if (!T.apple)
|
||||
elf_emitfnfin(fn->name, f);
|
||||
}
|
||||
|
||||
static void
|
||||
winabi_framesz(E *e)
|
||||
{
|
||||
uint64_t i, o, f;
|
||||
|
||||
/* specific to NAlign == 3 */
|
||||
o = 0;
|
||||
if (!e->fn->leaf) {
|
||||
for (i=0, o=0; i<NCLR_WIN; i++)
|
||||
o ^= e->fn->reg >> amd64_winabi_rclob[i];
|
||||
o &= 1;
|
||||
}
|
||||
f = e->fn->slot;
|
||||
f = (f + 3) & -4;
|
||||
if (f > 0
|
||||
&& e->fp == RSP
|
||||
&& e->fn->salign == 4)
|
||||
f += 2;
|
||||
e->fsz = 4*f + 8*o;
|
||||
}
|
||||
|
||||
void
|
||||
amd64_winabi_emitfn(Fn *fn, FILE *f)
|
||||
{
|
||||
static char *ctoa[] = {
|
||||
#define X(c, s, _) [c] = s,
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
static int id0;
|
||||
Blk *b, *s;
|
||||
Ins *i, itmp;
|
||||
int *r, c, lbl;
|
||||
E *e;
|
||||
|
||||
e = &(E){.f = f, .fn = fn};
|
||||
emitfnlnk(fn->name, &fn->lnk, f);
|
||||
fputs("\tendbr64\n", f);
|
||||
if (fn->vararg) {
|
||||
fprintf(f, "\tmovq %%rcx, 0x8(%%rsp)\n");
|
||||
fprintf(f, "\tmovq %%rdx, 0x10(%%rsp)\n");
|
||||
fprintf(f, "\tmovq %%r8, 0x18(%%rsp)\n");
|
||||
fprintf(f, "\tmovq %%r9, 0x20(%%rsp)\n");
|
||||
}
|
||||
if (!fn->leaf || fn->vararg || fn->dynalloc) {
|
||||
e->fp = RBP;
|
||||
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
|
||||
} else
|
||||
e->fp = RSP;
|
||||
winabi_framesz(e);
|
||||
if (e->fsz)
|
||||
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
|
||||
for (r=amd64_winabi_rclob; r<&amd64_winabi_rclob[NCLR_WIN]; r++)
|
||||
if (fn->reg & BIT(*r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("pushq %L0", &itmp, e);
|
||||
e->nclob++;
|
||||
}
|
||||
|
||||
for (lbl=0, b=fn->start; b; b=b->link) {
|
||||
if (lbl || b->npred > 1)
|
||||
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
|
||||
for (i=b->ins; i!=&b->ins[b->nins]; i++)
|
||||
emitins(*i, e);
|
||||
lbl = 1;
|
||||
switch (b->jmp.type) {
|
||||
case Jhlt:
|
||||
fprintf(f, "\tud2\n");
|
||||
break;
|
||||
case Jret0:
|
||||
if (fn->dynalloc)
|
||||
fprintf(f,
|
||||
"\tmovq %%rbp, %%rsp\n"
|
||||
"\tsubq $%"PRIu64", %%rsp\n",
|
||||
e->fsz + e->nclob * 8);
|
||||
for (r=&amd64_winabi_rclob[NCLR_WIN]; r>amd64_winabi_rclob;)
|
||||
if (fn->reg & BIT(*--r)) {
|
||||
itmp.arg[0] = TMP(*r);
|
||||
emitf("popq %L0", &itmp, e);
|
||||
}
|
||||
if (e->fp == RBP)
|
||||
fputs("\tleave\n", f);
|
||||
else if (e->fsz)
|
||||
fprintf(f,
|
||||
"\taddq $%"PRIu64", %%rsp\n",
|
||||
e->fsz);
|
||||
fputs("\tret\n", f);
|
||||
break;
|
||||
case Jjmp:
|
||||
Jmp:
|
||||
if (b->s1 != b->link)
|
||||
fprintf(f, "\tjmp %sbb%d\n",
|
||||
T.asloc, id0+b->s1->id);
|
||||
else
|
||||
lbl = 0;
|
||||
break;
|
||||
default:
|
||||
c = b->jmp.type - Jjf;
|
||||
if (0 <= c && c <= NCmp) {
|
||||
if (b->link == b->s2) {
|
||||
s = b->s1;
|
||||
b->s1 = b->s2;
|
||||
b->s2 = s;
|
||||
} else
|
||||
c = cmpneg(c);
|
||||
fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
|
||||
T.asloc, id0+b->s2->id);
|
||||
goto Jmp;
|
||||
}
|
||||
die("unhandled jump %d", b->jmp.type);
|
||||
}
|
||||
}
|
||||
id0 += fn->nblk;
|
||||
}
|
||||
942
src/qbe/amd64/isel.c
Normal file
942
src/qbe/amd64/isel.c
Normal file
@@ -0,0 +1,942 @@
|
||||
#include "all.h"
|
||||
#include <limits.h>
|
||||
|
||||
/* For x86_64, do the following:
|
||||
*
|
||||
* - check that constants are used only in
|
||||
* places allowed
|
||||
* - ensure immediates always fit in 32b
|
||||
* - expose machine register contraints
|
||||
* on instructions like division.
|
||||
* - implement fast locals (the streak of
|
||||
* constant allocX in the first basic block)
|
||||
* - recognize complex addressing modes
|
||||
*
|
||||
* Invariant: the use counts that are used
|
||||
* in sel() must be sound. This
|
||||
* is not so trivial, maybe the
|
||||
* dce should be moved out...
|
||||
*/
|
||||
|
||||
static int amatch(Addr *, Num *, Ref, Fn *);
|
||||
|
||||
static int
|
||||
noimm(Ref r, Fn *fn)
|
||||
{
|
||||
int64_t val;
|
||||
|
||||
if (rtype(r) != RCon)
|
||||
return 0;
|
||||
switch (fn->con[r.val].type) {
|
||||
case CAddr:
|
||||
/* we only support the 'small'
|
||||
* code model of the ABI, this
|
||||
* means that we can always
|
||||
* address data with 32bits
|
||||
*/
|
||||
return 0;
|
||||
case CBits:
|
||||
val = fn->con[r.val].bits.i;
|
||||
return (val < INT32_MIN || val > INT32_MAX);
|
||||
default:
|
||||
die("invalid constant");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rslot(Ref r, Fn *fn)
|
||||
{
|
||||
if (rtype(r) != RTmp)
|
||||
return -1;
|
||||
return fn->tmp[r.val].slot;
|
||||
}
|
||||
|
||||
static int
|
||||
hascon(Ref r, Con **pc, Fn *fn)
|
||||
{
|
||||
switch (rtype(r)) {
|
||||
case RCon:
|
||||
*pc = &fn->con[r.val];
|
||||
return 1;
|
||||
case RMem:
|
||||
*pc = &fn->mem[r.val].offset;
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fixarg(Ref *r, int k, Ins *i, Fn *fn)
|
||||
{
|
||||
char buf[32];
|
||||
Addr a, *m;
|
||||
Con cc, *c;
|
||||
Ref r0, r1, r2, r3;
|
||||
int s, n, op;
|
||||
|
||||
r1 = r0 = *r;
|
||||
s = rslot(r0, fn);
|
||||
op = i ? i->op : Ocopy;
|
||||
if (KBASE(k) == 1 && rtype(r0) == RCon) {
|
||||
/* load floating points from memory
|
||||
* slots, they can't be used as
|
||||
* immediates
|
||||
*/
|
||||
r1 = MEM(fn->nmem);
|
||||
vgrow(&fn->mem, ++fn->nmem);
|
||||
memset(&a, 0, sizeof a);
|
||||
a.offset.type = CAddr;
|
||||
n = stashbits(fn->con[r0.val].bits.i, KWIDE(k) ? 8 : 4);
|
||||
/* quote the name so that we do not
|
||||
* add symbol prefixes on the apple
|
||||
* target variant
|
||||
*/
|
||||
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
|
||||
a.offset.sym.id = intern(buf);
|
||||
fn->mem[fn->nmem-1] = a;
|
||||
}
|
||||
else if (op == Ocall && r == &i->arg[0]
|
||||
&& rtype(r0) == RCon && fn->con[r0.val].type != CAddr) {
|
||||
/* use a temporary register so that we
|
||||
* produce an indirect call
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Ocopy, Kl, r1, r0, R);
|
||||
}
|
||||
else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
|
||||
/* load constants that do not fit in
|
||||
* a 32bit signed integer into a
|
||||
* long temporary
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Ocopy, Kl, r1, r0, R);
|
||||
}
|
||||
else if (s != -1) {
|
||||
/* load fast locals' addresses into
|
||||
* temporaries right before the
|
||||
* instruction
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r1, SLOT(s), R);
|
||||
}
|
||||
else if (T.apple && hascon(r0, &c, fn)
|
||||
&& c->type == CAddr && c->sym.type == SThr) {
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
if (c->bits.i) {
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
cc = (Con){.type = CBits};
|
||||
cc.bits.i = c->bits.i;
|
||||
r3 = newcon(&cc, fn);
|
||||
emit(Oadd, Kl, r1, r2, r3);
|
||||
} else
|
||||
r2 = r1;
|
||||
emit(Ocopy, Kl, r2, TMP(RAX), R);
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
r3 = newtmp("isel", Kl, fn);
|
||||
emit(Ocall, 0, R, r3, CALL(17));
|
||||
emit(Ocopy, Kl, TMP(RDI), r2, R);
|
||||
emit(Oload, Kl, r3, r2, R);
|
||||
cc = *c;
|
||||
cc.bits.i = 0;
|
||||
r3 = newcon(&cc, fn);
|
||||
emit(Oload, Kl, r2, r3, R);
|
||||
if (rtype(r0) == RMem) {
|
||||
m = &fn->mem[r0.val];
|
||||
m->offset.type = CUndef;
|
||||
m->base = r1;
|
||||
r1 = r0;
|
||||
}
|
||||
}
|
||||
else if (!(isstore(op) && r == &i->arg[1])
|
||||
&& !isload(op) && op != Ocall && rtype(r0) == RCon
|
||||
&& fn->con[r0.val].type == CAddr) {
|
||||
/* apple as does not support 32-bit
|
||||
* absolute addressing, use a rip-
|
||||
* relative leaq instead
|
||||
*/
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r1, r0, R);
|
||||
}
|
||||
else if (rtype(r0) == RMem) {
|
||||
/* eliminate memory operands of
|
||||
* the form $foo(%rip, ...)
|
||||
*/
|
||||
m = &fn->mem[r0.val];
|
||||
if (req(m->base, R))
|
||||
if (m->offset.type == CAddr) {
|
||||
r0 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
|
||||
m->offset.type = CUndef;
|
||||
m->base = r0;
|
||||
}
|
||||
}
|
||||
else if (isxsel(op) && rtype(*r) == RCon) {
|
||||
r1 = newtmp("isel", i->cls, fn);
|
||||
emit(Ocopy, i->cls, r1, *r, R);
|
||||
}
|
||||
*r = r1;
|
||||
}
|
||||
|
||||
static void
|
||||
seladdr(Ref *r, Num *tn, Fn *fn)
|
||||
{
|
||||
Addr a;
|
||||
Ref r0;
|
||||
|
||||
r0 = *r;
|
||||
if (rtype(r0) == RTmp) {
|
||||
memset(&a, 0, sizeof a);
|
||||
if (!amatch(&a, tn, r0, fn))
|
||||
return;
|
||||
if (!req(a.base, R))
|
||||
if (a.offset.type == CAddr) {
|
||||
/* apple as does not support
|
||||
* $foo(%r0, %r1, M); try to
|
||||
* rewrite it or bail out if
|
||||
* impossible
|
||||
*/
|
||||
if (!req(a.index, R) || rtype(a.base) != RTmp)
|
||||
return;
|
||||
else {
|
||||
a.index = a.base;
|
||||
a.scale = 1;
|
||||
a.base = R;
|
||||
}
|
||||
}
|
||||
chuse(r0, -1, fn);
|
||||
vgrow(&fn->mem, ++fn->nmem);
|
||||
fn->mem[fn->nmem-1] = a;
|
||||
chuse(a.base, +1, fn);
|
||||
chuse(a.index, +1, fn);
|
||||
*r = MEM(fn->nmem-1);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
cmpswap(Ref arg[2], int op)
|
||||
{
|
||||
switch (op) {
|
||||
case NCmpI+Cflt:
|
||||
case NCmpI+Cfle:
|
||||
return 1;
|
||||
case NCmpI+Cfgt:
|
||||
case NCmpI+Cfge:
|
||||
return 0;
|
||||
}
|
||||
return rtype(arg[0]) == RCon;
|
||||
}
|
||||
|
||||
static void
|
||||
selcmp(Ref arg[2], int k, int swap, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
Ins *icmp;
|
||||
|
||||
if (swap) {
|
||||
r = arg[1];
|
||||
arg[1] = arg[0];
|
||||
arg[0] = r;
|
||||
}
|
||||
emit(Oxcmp, k, R, arg[1], arg[0]);
|
||||
icmp = curi;
|
||||
if (rtype(arg[0]) == RCon) {
|
||||
assert(k != Kw);
|
||||
icmp->arg[1] = newtmp("isel", k, fn);
|
||||
emit(Ocopy, k, icmp->arg[1], arg[0], R);
|
||||
fixarg(&curi->arg[0], k, curi, fn);
|
||||
}
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
}
|
||||
|
||||
static void
|
||||
sel(Ins i, Num *tn, Fn *fn)
|
||||
{
|
||||
Ref r0, r1, tmp[7];
|
||||
int x, j, k, kc, sh, swap;
|
||||
Ins *i0, *i1;
|
||||
|
||||
if (rtype(i.to) == RTmp)
|
||||
if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
|
||||
if (fn->tmp[i.to.val].nuse == 0) {
|
||||
chuse(i.arg[0], -1, fn);
|
||||
chuse(i.arg[1], -1, fn);
|
||||
return;
|
||||
}
|
||||
i0 = curi;
|
||||
k = i.cls;
|
||||
switch (i.op) {
|
||||
case Odiv:
|
||||
case Orem:
|
||||
case Oudiv:
|
||||
case Ourem:
|
||||
if (KBASE(k) == 1)
|
||||
goto Emit;
|
||||
if (i.op == Odiv || i.op == Oudiv)
|
||||
r0 = TMP(RAX), r1 = TMP(RDX);
|
||||
else
|
||||
r0 = TMP(RDX), r1 = TMP(RAX);
|
||||
emit(Ocopy, k, i.to, r0, R);
|
||||
emit(Ocopy, k, R, r1, R);
|
||||
if (rtype(i.arg[1]) == RCon) {
|
||||
/* immediates not allowed for
|
||||
* divisions in x86
|
||||
*/
|
||||
r0 = newtmp("isel", k, fn);
|
||||
} else
|
||||
r0 = i.arg[1];
|
||||
if (fn->tmp[r0.val].slot != -1)
|
||||
err("unlikely argument %%%s in %s",
|
||||
fn->tmp[r0.val].name, optab[i.op].name);
|
||||
if (i.op == Odiv || i.op == Orem) {
|
||||
emit(Oxidiv, k, R, r0, R);
|
||||
emit(Osign, k, TMP(RDX), TMP(RAX), R);
|
||||
} else {
|
||||
emit(Oxdiv, k, R, r0, R);
|
||||
emit(Ocopy, k, TMP(RDX), CON_Z, R);
|
||||
}
|
||||
emit(Ocopy, k, TMP(RAX), i.arg[0], R);
|
||||
fixarg(&curi->arg[0], k, curi, fn);
|
||||
if (rtype(i.arg[1]) == RCon)
|
||||
emit(Ocopy, k, r0, i.arg[1], R);
|
||||
break;
|
||||
case Osar:
|
||||
case Oshr:
|
||||
case Oshl:
|
||||
r0 = i.arg[1];
|
||||
if (rtype(r0) == RCon)
|
||||
goto Emit;
|
||||
if (fn->tmp[r0.val].slot != -1)
|
||||
err("unlikely argument %%%s in %s",
|
||||
fn->tmp[r0.val].name, optab[i.op].name);
|
||||
i.arg[1] = TMP(RCX);
|
||||
emit(Ocopy, Kw, R, TMP(RCX), R);
|
||||
emiti(i);
|
||||
i1 = curi;
|
||||
emit(Ocopy, Kw, TMP(RCX), r0, R);
|
||||
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
|
||||
break;
|
||||
case Ouwtof:
|
||||
r0 = newtmp("utof", Kl, fn);
|
||||
emit(Osltof, k, i.to, r0, R);
|
||||
emit(Oextuw, Kl, r0, i.arg[0], R);
|
||||
fixarg(&curi->arg[0], k, curi, fn);
|
||||
break;
|
||||
case Oultof:
|
||||
/* %mask =l and %arg.0, 1
|
||||
* %isbig =l shr %arg.0, 63
|
||||
* %divided =l shr %arg.0, %isbig
|
||||
* %or =l or %mask, %divided
|
||||
* %float =d sltof %or
|
||||
* %cast =l cast %float
|
||||
* %addend =l shl %isbig, 52
|
||||
* %sum =l add %cast, %addend
|
||||
* %result =d cast %sum
|
||||
*/
|
||||
r0 = newtmp("utof", k, fn);
|
||||
if (k == Ks)
|
||||
kc = Kw, sh = 23;
|
||||
else
|
||||
kc = Kl, sh = 52;
|
||||
for (j=0; j<4; j++)
|
||||
tmp[j] = newtmp("utof", Kl, fn);
|
||||
for (; j<7; j++)
|
||||
tmp[j] = newtmp("utof", kc, fn);
|
||||
emit(Ocast, k, i.to, tmp[6], R);
|
||||
emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
|
||||
emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
|
||||
emit(Ocast, kc, tmp[4], r0, R);
|
||||
emit(Osltof, k, r0, tmp[3], R);
|
||||
emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
|
||||
emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
|
||||
sel(*curi++, 0, fn);
|
||||
emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
|
||||
fixarg(&curi->arg[0], Kl, curi, fn);
|
||||
emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
|
||||
fixarg(&curi->arg[0], Kl, curi, fn);
|
||||
break;
|
||||
case Ostoui:
|
||||
i.op = Ostosi;
|
||||
kc = Ks;
|
||||
tmp[4] = getcon(0xdf000000, fn);
|
||||
goto Oftoui;
|
||||
case Odtoui:
|
||||
i.op = Odtosi;
|
||||
kc = Kd;
|
||||
tmp[4] = getcon(0xc3e0000000000000, fn);
|
||||
Oftoui:
|
||||
if (k == Kw) {
|
||||
r0 = newtmp("ftou", Kl, fn);
|
||||
emit(Ocopy, Kw, i.to, r0, R);
|
||||
i.cls = Kl;
|
||||
i.to = r0;
|
||||
goto Emit;
|
||||
}
|
||||
/* %try0 =l {s,d}tosi %fp
|
||||
* %mask =l sar %try0, 63
|
||||
*
|
||||
* mask is all ones if the first
|
||||
* try was oob, all zeroes o.w.
|
||||
*
|
||||
* %fps ={s,d} sub %fp, (1<<63)
|
||||
* %try1 =l {s,d}tosi %fps
|
||||
*
|
||||
* %tmp =l and %mask, %try1
|
||||
* %res =l or %tmp, %try0
|
||||
*/
|
||||
r0 = newtmp("ftou", kc, fn);
|
||||
for (j=0; j<4; j++)
|
||||
tmp[j] = newtmp("ftou", Kl, fn);
|
||||
emit(Oor, Kl, i.to, tmp[0], tmp[3]);
|
||||
emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
|
||||
emit(i.op, Kl, tmp[2], r0, R);
|
||||
emit(Oadd, kc, r0, tmp[4], i.arg[0]);
|
||||
i1 = curi; /* fixarg() can change curi */
|
||||
fixarg(&i1->arg[0], kc, i1, fn);
|
||||
fixarg(&i1->arg[1], kc, i1, fn);
|
||||
emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
|
||||
emit(i.op, Kl, tmp[0], i.arg[0], R);
|
||||
fixarg(&curi->arg[0], Kl, curi, fn);
|
||||
break;
|
||||
case Onop:
|
||||
break;
|
||||
case Ostored:
|
||||
case Ostores:
|
||||
case Ostorel:
|
||||
case Ostorew:
|
||||
case Ostoreh:
|
||||
case Ostoreb:
|
||||
if (rtype(i.arg[0]) == RCon) {
|
||||
if (i.op == Ostored)
|
||||
i.op = Ostorel;
|
||||
if (i.op == Ostores)
|
||||
i.op = Ostorew;
|
||||
}
|
||||
seladdr(&i.arg[1], tn, fn);
|
||||
goto Emit;
|
||||
case_Oload:
|
||||
seladdr(&i.arg[0], tn, fn);
|
||||
goto Emit;
|
||||
case Odbgloc:
|
||||
case Ocall:
|
||||
case Osalloc:
|
||||
case Ocopy:
|
||||
case Oadd:
|
||||
case Osub:
|
||||
case Oneg:
|
||||
case Omul:
|
||||
case Oand:
|
||||
case Oor:
|
||||
case Oxor:
|
||||
case Oxtest:
|
||||
case Ostosi:
|
||||
case Odtosi:
|
||||
case Oswtof:
|
||||
case Osltof:
|
||||
case Oexts:
|
||||
case Otruncd:
|
||||
case Ocast:
|
||||
case_Oxsel:
|
||||
case_Oext:
|
||||
Emit:
|
||||
emiti(i);
|
||||
i1 = curi; /* fixarg() can change curi */
|
||||
fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
|
||||
fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
|
||||
break;
|
||||
case Oalloc4:
|
||||
case Oalloc8:
|
||||
case Oalloc16:
|
||||
salloc(i.to, i.arg[0], fn);
|
||||
break;
|
||||
default:
|
||||
if (isext(i.op))
|
||||
goto case_Oext;
|
||||
if (isxsel(i.op))
|
||||
goto case_Oxsel;
|
||||
if (isload(i.op))
|
||||
goto case_Oload;
|
||||
if (iscmp(i.op, &kc, &x)) {
|
||||
switch (x) {
|
||||
case NCmpI+Cfeq:
|
||||
/* zf is set when operands are
|
||||
* unordered, so we may have to
|
||||
* check pf
|
||||
*/
|
||||
r0 = newtmp("isel", Kw, fn);
|
||||
r1 = newtmp("isel", Kw, fn);
|
||||
emit(Oand, Kw, i.to, r0, r1);
|
||||
emit(Oflagfo, k, r1, R, R);
|
||||
i.to = r0;
|
||||
break;
|
||||
case NCmpI+Cfne:
|
||||
r0 = newtmp("isel", Kw, fn);
|
||||
r1 = newtmp("isel", Kw, fn);
|
||||
emit(Oor, Kw, i.to, r0, r1);
|
||||
emit(Oflagfuo, k, r1, R, R);
|
||||
i.to = r0;
|
||||
break;
|
||||
}
|
||||
swap = cmpswap(i.arg, x);
|
||||
if (swap)
|
||||
x = cmpop(x);
|
||||
emit(Oflag+x, k, i.to, R, R);
|
||||
selcmp(i.arg, kc, swap, fn);
|
||||
break;
|
||||
}
|
||||
die("unknown instruction %s", optab[i.op].name);
|
||||
}
|
||||
|
||||
while (i0>curi && --i0) {
|
||||
assert(rslot(i0->arg[0], fn) == -1);
|
||||
assert(rslot(i0->arg[1], fn) == -1);
|
||||
}
|
||||
}
|
||||
|
||||
static Ins *
|
||||
flagi(Ins *i0, Ins *i)
|
||||
{
|
||||
while (i>i0) {
|
||||
i--;
|
||||
if (amd64_op[i->op].zflag)
|
||||
return i;
|
||||
if (amd64_op[i->op].lflag)
|
||||
continue;
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Ins*
|
||||
selsel(Fn *fn, Blk *b, Ins *i, Num *tn)
|
||||
{
|
||||
Ref r, cr[2];
|
||||
int c, k, swap, gencmp, gencpy;
|
||||
Ins *isel0, *isel1, *fi;
|
||||
Tmp *t;
|
||||
|
||||
assert(i->op == Osel1);
|
||||
for (isel0=i; b->ins<isel0; isel0--) {
|
||||
if (isel0->op == Osel0)
|
||||
break;
|
||||
assert(isel0->op == Osel1);
|
||||
}
|
||||
assert(isel0->op == Osel0);
|
||||
r = isel0->arg[0];
|
||||
assert(rtype(r) == RTmp);
|
||||
t = &fn->tmp[r.val];
|
||||
fi = flagi(b->ins, isel0);
|
||||
cr[0] = cr[1] = R;
|
||||
gencmp = gencpy = swap = 0;
|
||||
k = Kw;
|
||||
c = Cine;
|
||||
if (!fi || !req(fi->to, r)) {
|
||||
gencmp = 1;
|
||||
cr[0] = r;
|
||||
cr[1] = CON_Z;
|
||||
}
|
||||
else if (iscmp(fi->op, &k, &c)) {
|
||||
if (c == NCmpI+Cfeq
|
||||
|| c == NCmpI+Cfne) {
|
||||
/* these are selected as 'and'
|
||||
* or 'or', so we check their
|
||||
* result with Cine
|
||||
*/
|
||||
c = Cine;
|
||||
goto Other;
|
||||
}
|
||||
swap = cmpswap(fi->arg, c);
|
||||
if (swap)
|
||||
c = cmpop(c);
|
||||
if (t->nuse == 1) {
|
||||
gencmp = 1;
|
||||
cr[0] = fi->arg[0];
|
||||
cr[1] = fi->arg[1];
|
||||
*fi = (Ins){.op = Onop};
|
||||
}
|
||||
}
|
||||
else if (fi->op == Oand && t->nuse == 1
|
||||
&& (rtype(fi->arg[0]) == RTmp ||
|
||||
rtype(fi->arg[1]) == RTmp)) {
|
||||
fi->op = Oxtest;
|
||||
fi->to = R;
|
||||
if (rtype(fi->arg[1]) == RCon) {
|
||||
r = fi->arg[1];
|
||||
fi->arg[1] = fi->arg[0];
|
||||
fi->arg[0] = r;
|
||||
}
|
||||
}
|
||||
else {
|
||||
Other:
|
||||
/* since flags are not tracked in liveness,
|
||||
* the result of the flag-setting instruction
|
||||
* has to be marked as live
|
||||
*/
|
||||
if (t->nuse == 1)
|
||||
gencpy = 1;
|
||||
}
|
||||
/* generate conditional moves */
|
||||
for (isel1=i; isel0<isel1; --isel1) {
|
||||
isel1->op = Oxsel+c;
|
||||
sel(*isel1, tn, fn);
|
||||
}
|
||||
assert(!gencmp || !gencpy);
|
||||
if (gencmp)
|
||||
selcmp(cr, k, swap, fn);
|
||||
if (gencpy)
|
||||
emit(Ocopy, Kw, R, r, R);
|
||||
*isel0 = (Ins){.op = Onop};
|
||||
return isel0;
|
||||
}
|
||||
|
||||
static void
|
||||
seljmp(Blk *b, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
int c, k, swap;
|
||||
Ins *fi;
|
||||
Tmp *t;
|
||||
|
||||
if (b->jmp.type == Jret0
|
||||
|| b->jmp.type == Jjmp
|
||||
|| b->jmp.type == Jhlt)
|
||||
return;
|
||||
assert(b->jmp.type == Jjnz);
|
||||
r = b->jmp.arg;
|
||||
t = &fn->tmp[r.val];
|
||||
b->jmp.arg = R;
|
||||
assert(rtype(r) == RTmp);
|
||||
if (b->s1 == b->s2) {
|
||||
chuse(r, -1, fn);
|
||||
b->jmp.type = Jjmp;
|
||||
b->s2 = 0;
|
||||
return;
|
||||
}
|
||||
fi = flagi(b->ins, &b->ins[b->nins]);
|
||||
if (!fi || !req(fi->to, r)) {
|
||||
selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
|
||||
b->jmp.type = Jjf + Cine;
|
||||
}
|
||||
else if (iscmp(fi->op, &k, &c)
|
||||
&& c != NCmpI+Cfeq /* see sel(), selsel() */
|
||||
&& c != NCmpI+Cfne) {
|
||||
swap = cmpswap(fi->arg, c);
|
||||
if (swap)
|
||||
c = cmpop(c);
|
||||
if (t->nuse == 1) {
|
||||
selcmp(fi->arg, k, swap, fn);
|
||||
*fi = (Ins){.op = Onop};
|
||||
}
|
||||
b->jmp.type = Jjf + c;
|
||||
}
|
||||
else if (fi->op == Oand && t->nuse == 1
|
||||
&& (rtype(fi->arg[0]) == RTmp ||
|
||||
rtype(fi->arg[1]) == RTmp)) {
|
||||
fi->op = Oxtest;
|
||||
fi->to = R;
|
||||
b->jmp.type = Jjf + Cine;
|
||||
if (rtype(fi->arg[1]) == RCon) {
|
||||
r = fi->arg[1];
|
||||
fi->arg[1] = fi->arg[0];
|
||||
fi->arg[0] = r;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* since flags are not tracked in liveness,
|
||||
* the result of the flag-setting instruction
|
||||
* has to be marked as live
|
||||
*/
|
||||
if (t->nuse == 1)
|
||||
emit(Ocopy, Kw, R, r, R);
|
||||
b->jmp.type = Jjf + Cine;
|
||||
}
|
||||
}
|
||||
|
||||
enum {
|
||||
Pob,
|
||||
Pbis,
|
||||
Pois,
|
||||
Pobis,
|
||||
Pbi1,
|
||||
Pobi1,
|
||||
};
|
||||
|
||||
/* mgen generated code
|
||||
*
|
||||
* (with-vars (o b i s)
|
||||
* (patterns
|
||||
* (ob (add (con o) (tmp b)))
|
||||
* (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
|
||||
* (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
|
||||
* (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
|
||||
* (bi1 (add (tmp b) (tmp i)))
|
||||
* (obi1 (add (con o) (tmp b) (tmp i)))
|
||||
* ))
|
||||
*/
|
||||
|
||||
static int
|
||||
opn(int op, int l, int r)
|
||||
{
|
||||
static uchar Oaddtbl[91] = {
|
||||
2,
|
||||
2,2,
|
||||
4,4,5,
|
||||
6,6,8,8,
|
||||
4,4,9,10,9,
|
||||
7,7,5,8,9,5,
|
||||
4,4,12,10,12,12,12,
|
||||
4,4,9,10,9,9,12,9,
|
||||
11,11,5,8,9,5,12,9,5,
|
||||
7,7,5,8,9,5,12,9,5,5,
|
||||
11,11,5,8,9,5,12,9,5,5,5,
|
||||
4,4,9,10,9,9,12,9,9,9,9,9,
|
||||
7,7,5,8,9,5,12,9,5,5,5,9,5,
|
||||
};
|
||||
int t;
|
||||
|
||||
if (l < r)
|
||||
t = l, l = r, r = t;
|
||||
switch (op) {
|
||||
case Omul:
|
||||
if (2 <= l)
|
||||
if (r == 0) {
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
case Oadd:
|
||||
return Oaddtbl[(l + l*l)/2 + r];
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
refn(Ref r, Num *tn, Con *con)
|
||||
{
|
||||
int64_t n;
|
||||
|
||||
switch (rtype(r)) {
|
||||
case RTmp:
|
||||
if (!tn[r.val].n)
|
||||
tn[r.val].n = 2;
|
||||
return tn[r.val].n;
|
||||
case RCon:
|
||||
if (con[r.val].type != CBits)
|
||||
return 1;
|
||||
n = con[r.val].bits.i;
|
||||
if (n == 8 || n == 4 || n == 2 || n == 1)
|
||||
return 0;
|
||||
return 1;
|
||||
default:
|
||||
return INT_MIN;
|
||||
}
|
||||
}
|
||||
|
||||
static bits match[13] = {
|
||||
[4] = BIT(Pob),
|
||||
[5] = BIT(Pbi1),
|
||||
[6] = BIT(Pob) | BIT(Pois),
|
||||
[7] = BIT(Pob) | BIT(Pobi1),
|
||||
[8] = BIT(Pbi1) | BIT(Pbis),
|
||||
[9] = BIT(Pbi1) | BIT(Pobi1),
|
||||
[10] = BIT(Pbi1) | BIT(Pbis) | BIT(Pobi1) | BIT(Pobis),
|
||||
[11] = BIT(Pob) | BIT(Pobi1) | BIT(Pobis),
|
||||
[12] = BIT(Pbi1) | BIT(Pobi1) | BIT(Pobis),
|
||||
};
|
||||
|
||||
static uchar *matcher[] = {
|
||||
[Pbi1] = (uchar[]){
|
||||
1,3,1,3,2,0
|
||||
},
|
||||
[Pbis] = (uchar[]){
|
||||
5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
|
||||
3,3,3,2,3,1,0,1,29
|
||||
},
|
||||
[Pob] = (uchar[]){
|
||||
1,3,0,3,1,0
|
||||
},
|
||||
[Pobi1] = (uchar[]){
|
||||
5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
|
||||
17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
|
||||
5,7,2,7,8,37,29,1,3,0,1,32
|
||||
},
|
||||
[Pobis] = (uchar[]){
|
||||
5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
|
||||
1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
|
||||
2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
|
||||
3,1,1,3,0,45
|
||||
},
|
||||
[Pois] = (uchar[]){
|
||||
1,3,0,1,3,3,3,2,0
|
||||
},
|
||||
};
|
||||
|
||||
/* end of generated code */
|
||||
|
||||
static void
|
||||
anumber(Num *tn, Blk *b, Con *con)
|
||||
{
|
||||
Ins *i;
|
||||
Num *n;
|
||||
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (rtype(i->to) != RTmp)
|
||||
continue;
|
||||
n = &tn[i->to.val];
|
||||
n->l = i->arg[0];
|
||||
n->r = i->arg[1];
|
||||
n->nl = refn(n->l, tn, con);
|
||||
n->nr = refn(n->r, tn, con);
|
||||
n->n = opn(i->op, n->nl, n->nr);
|
||||
}
|
||||
}
|
||||
|
||||
static Ref
|
||||
adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
|
||||
{
|
||||
Ref v[2];
|
||||
int n;
|
||||
|
||||
while (!req(r, R)) {
|
||||
assert(rtype(r) == RTmp);
|
||||
n = refn(r, tn, fn->con);
|
||||
if (!(match[n] & BIT(Pob)))
|
||||
break;
|
||||
runmatch(matcher[Pob], tn, r, v);
|
||||
assert(rtype(v[0]) == RCon);
|
||||
addcon(c, &fn->con[v[0].val], s);
|
||||
r = v[1];
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
amatch(Addr *a, Num *tn, Ref r, Fn *fn)
|
||||
{
|
||||
static int pat[] = {Pobis, Pobi1, Pbis, Pois, Pbi1, -1};
|
||||
Ref ro, rb, ri, rs, v[4];
|
||||
Con *c, co;
|
||||
int s, n, *p;
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return 0;
|
||||
|
||||
n = refn(r, tn, fn->con);
|
||||
memset(v, 0, sizeof v);
|
||||
for (p=pat; *p>=0; p++)
|
||||
if (match[n] & BIT(*p)) {
|
||||
runmatch(matcher[*p], tn, r, v);
|
||||
break;
|
||||
}
|
||||
if (*p < 0)
|
||||
v[1] = r;
|
||||
|
||||
memset(&co, 0, sizeof co);
|
||||
ro = v[0];
|
||||
rb = adisp(&co, tn, v[1], fn, 1);
|
||||
ri = v[2];
|
||||
rs = v[3];
|
||||
s = 1;
|
||||
|
||||
if (*p < 0 && co.type != CUndef)
|
||||
if (amatch(a, tn, rb, fn))
|
||||
return addcon(&a->offset, &co, 1);
|
||||
if (!req(ro, R)) {
|
||||
assert(rtype(ro) == RCon);
|
||||
c = &fn->con[ro.val];
|
||||
if (!addcon(&co, c, 1))
|
||||
return 0;
|
||||
}
|
||||
if (!req(rs, R)) {
|
||||
assert(rtype(rs) == RCon);
|
||||
c = &fn->con[rs.val];
|
||||
assert(c->type == CBits);
|
||||
s = c->bits.i;
|
||||
}
|
||||
ri = adisp(&co, tn, ri, fn, s);
|
||||
*a = (Addr){co, rb, ri, s};
|
||||
|
||||
if (rtype(ri) == RTmp)
|
||||
if (fn->tmp[ri.val].slot != -1) {
|
||||
if (a->scale != 1
|
||||
|| fn->tmp[rb.val].slot != -1)
|
||||
return 0;
|
||||
a->base = ri;
|
||||
a->index = rb;
|
||||
}
|
||||
if (!req(a->base, R)) {
|
||||
assert(rtype(a->base) == RTmp);
|
||||
s = fn->tmp[a->base.val].slot;
|
||||
if (s != -1)
|
||||
a->base = SLOT(s);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* instruction selection
|
||||
* requires use counts (as given by parsing)
|
||||
*/
|
||||
void
|
||||
amd64_isel(Fn *fn)
|
||||
{
|
||||
Blk *b, **sb;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
uint a;
|
||||
int n, al;
|
||||
int64_t sz;
|
||||
Num *num;
|
||||
|
||||
/* assign slots to fast allocs */
|
||||
b = fn->start;
|
||||
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
|
||||
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op == al) {
|
||||
if (rtype(i->arg[0]) != RCon)
|
||||
break;
|
||||
sz = fn->con[i->arg[0].val].bits.i;
|
||||
if (sz < 0 || sz >= INT_MAX-15)
|
||||
err("invalid alloc size %"PRId64, sz);
|
||||
sz = (sz + n-1) & -n;
|
||||
sz /= 4;
|
||||
if (sz > INT_MAX - fn->slot)
|
||||
die("alloc too large");
|
||||
fn->tmp[i->to.val].slot = fn->slot;
|
||||
fn->slot += sz;
|
||||
fn->salign = 2 + al - Oalloc;
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
|
||||
/* process basic blocks */
|
||||
n = fn->ntmp;
|
||||
num = emalloc(n * sizeof num[0]);
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
curi = &insb[NIns];
|
||||
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
|
||||
for (p=(*sb)->phi; p; p=p->link) {
|
||||
for (a=0; p->blk[a] != b; a++)
|
||||
assert(a+1 < p->narg);
|
||||
fixarg(&p->arg[a], p->cls, 0, fn);
|
||||
}
|
||||
memset(num, 0, n * sizeof num[0]);
|
||||
anumber(num, b, fn->con);
|
||||
seljmp(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
||||
--i;
|
||||
assert(i->op != Osel0);
|
||||
if (i->op == Osel1)
|
||||
i = selsel(fn, b, i, num);
|
||||
else
|
||||
sel(*i, num, fn);
|
||||
}
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
free(num);
|
||||
|
||||
if (debug['I']) {
|
||||
fprintf(stderr, "\n> After instruction selection:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
721
src/qbe/amd64/sysv.c
Normal file
721
src/qbe/amd64/sysv.c
Normal file
@@ -0,0 +1,721 @@
|
||||
#include "all.h"
|
||||
|
||||
typedef struct AClass AClass;
|
||||
typedef struct RAlloc RAlloc;
|
||||
|
||||
struct AClass {
|
||||
Typ *type;
|
||||
int inmem;
|
||||
int align;
|
||||
uint size;
|
||||
int cls[2];
|
||||
Ref ref[2];
|
||||
};
|
||||
|
||||
struct RAlloc {
|
||||
Ins i;
|
||||
RAlloc *link;
|
||||
};
|
||||
|
||||
static void
|
||||
classify(AClass *a, Typ *t, uint s)
|
||||
{
|
||||
Field *f;
|
||||
int *cls;
|
||||
uint n, s1;
|
||||
|
||||
for (n=0, s1=s; n<t->nunion; n++, s=s1)
|
||||
for (f=t->fields[n]; f->type!=FEnd; f++) {
|
||||
assert(s <= 16);
|
||||
cls = &a->cls[s/8];
|
||||
switch (f->type) {
|
||||
case FEnd:
|
||||
die("unreachable");
|
||||
case FPad:
|
||||
/* don't change anything */
|
||||
s += f->len;
|
||||
break;
|
||||
case Fs:
|
||||
case Fd:
|
||||
if (*cls == Kx)
|
||||
*cls = Kd;
|
||||
s += f->len;
|
||||
break;
|
||||
case Fb:
|
||||
case Fh:
|
||||
case Fw:
|
||||
case Fl:
|
||||
*cls = Kl;
|
||||
s += f->len;
|
||||
break;
|
||||
case FTyp:
|
||||
classify(a, &typ[f->len], s);
|
||||
s += typ[f->len].size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
typclass(AClass *a, Typ *t)
|
||||
{
|
||||
uint sz, al;
|
||||
|
||||
sz = t->size;
|
||||
al = 1u << t->align;
|
||||
|
||||
/* the ABI requires sizes to be rounded
|
||||
* up to the nearest multiple of 8, moreover
|
||||
* it makes it easy load and store structures
|
||||
* in registers
|
||||
*/
|
||||
if (al < 8)
|
||||
al = 8;
|
||||
sz = (sz + al-1) & -al;
|
||||
|
||||
a->type = t;
|
||||
a->size = sz;
|
||||
a->align = t->align;
|
||||
|
||||
if (t->isdark || sz > 16 || sz == 0) {
|
||||
/* large or unaligned structures are
|
||||
* required to be passed in memory
|
||||
*/
|
||||
a->inmem = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
a->cls[0] = Kx;
|
||||
a->cls[1] = Kx;
|
||||
a->inmem = 0;
|
||||
classify(a, t, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
retr(Ref reg[2], AClass *aret)
|
||||
{
|
||||
static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
|
||||
int n, k, ca, nr[2];
|
||||
|
||||
nr[0] = nr[1] = 0;
|
||||
ca = 0;
|
||||
for (n=0; (uint)n*8<aret->size; n++) {
|
||||
k = KBASE(aret->cls[n]);
|
||||
reg[n] = TMP(retreg[k][nr[k]++]);
|
||||
ca += 1 << (2 * k);
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
static void
|
||||
selret(Blk *b, Fn *fn)
|
||||
{
|
||||
int j, k, ca;
|
||||
Ref r, r0, reg[2];
|
||||
AClass aret;
|
||||
|
||||
j = b->jmp.type;
|
||||
|
||||
if (!isret(j) || j == Jret0)
|
||||
return;
|
||||
|
||||
r0 = b->jmp.arg;
|
||||
b->jmp.type = Jret0;
|
||||
|
||||
if (j == Jretc) {
|
||||
typclass(&aret, &typ[fn->retty]);
|
||||
if (aret.inmem) {
|
||||
assert(rtype(fn->retr) == RTmp);
|
||||
emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
|
||||
emit(Oblit1, 0, R, INT(aret.type->size), R);
|
||||
emit(Oblit0, 0, R, r0, fn->retr);
|
||||
ca = 1;
|
||||
} else {
|
||||
ca = retr(reg, &aret);
|
||||
if (aret.size > 8) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Oload, Kl, reg[1], r, R);
|
||||
emit(Oadd, Kl, r, r0, getcon(8, fn));
|
||||
}
|
||||
emit(Oload, Kl, reg[0], r0, R);
|
||||
}
|
||||
} else {
|
||||
k = j - Jretw;
|
||||
if (KBASE(k) == 0) {
|
||||
emit(Ocopy, k, TMP(RAX), r0, R);
|
||||
ca = 1;
|
||||
} else {
|
||||
emit(Ocopy, k, TMP(XMM0), r0, R);
|
||||
ca = 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
b->jmp.arg = CALL(ca);
|
||||
}
|
||||
|
||||
static int
|
||||
argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
|
||||
{
|
||||
int varc, envc, nint, ni, nsse, ns, n, *pn;
|
||||
AClass *a;
|
||||
Ins *i;
|
||||
|
||||
if (aret && aret->inmem)
|
||||
nint = 5; /* hidden argument */
|
||||
else
|
||||
nint = 6;
|
||||
nsse = 8;
|
||||
varc = 0;
|
||||
envc = 0;
|
||||
for (i=i0, a=ac; i<i1; i++, a++)
|
||||
switch (i->op - op + Oarg) {
|
||||
case Oarg:
|
||||
if (KBASE(i->cls) == 0)
|
||||
pn = &nint;
|
||||
else
|
||||
pn = &nsse;
|
||||
if (*pn > 0) {
|
||||
--*pn;
|
||||
a->inmem = 0;
|
||||
} else
|
||||
a->inmem = 2;
|
||||
a->align = 3;
|
||||
a->size = 8;
|
||||
a->cls[0] = i->cls;
|
||||
break;
|
||||
case Oargc:
|
||||
n = i->arg[0].val;
|
||||
typclass(a, &typ[n]);
|
||||
if (a->inmem)
|
||||
continue;
|
||||
ni = ns = 0;
|
||||
for (n=0; (uint)n*8<a->size; n++)
|
||||
if (KBASE(a->cls[n]) == 0)
|
||||
ni++;
|
||||
else
|
||||
ns++;
|
||||
if (nint >= ni && nsse >= ns) {
|
||||
nint -= ni;
|
||||
nsse -= ns;
|
||||
} else
|
||||
a->inmem = 1;
|
||||
break;
|
||||
case Oarge:
|
||||
envc = 1;
|
||||
if (op == Opar)
|
||||
*env = i->to;
|
||||
else
|
||||
*env = i->arg[0];
|
||||
break;
|
||||
case Oargv:
|
||||
varc = 1;
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
|
||||
if (varc && envc)
|
||||
err("sysv abi does not support variadic env calls");
|
||||
|
||||
return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8);
|
||||
}
|
||||
|
||||
int amd64_sysv_rsave[] = {
|
||||
RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
|
||||
};
|
||||
int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
|
||||
|
||||
MAKESURE(sysv_arrays_ok,
|
||||
sizeof amd64_sysv_rsave == (NGPS_SYSV+NFPS+1) * sizeof(int) &&
|
||||
sizeof amd64_sysv_rclob == (NCLR_SYSV+1) * sizeof(int)
|
||||
);
|
||||
|
||||
/* layout of call's second argument (RCall)
|
||||
*
|
||||
* 29 12 8 4 3 0
|
||||
* |0...00|x|xxxx|xxxx|xx|xx| range
|
||||
* | | | | ` gp regs returned (0..2)
|
||||
* | | | ` sse regs returned (0..2)
|
||||
* | | ` gp regs passed (0..6)
|
||||
* | ` sse regs passed (0..8)
|
||||
* ` 1 if rax is used to pass data (0..1)
|
||||
*/
|
||||
|
||||
bits
|
||||
amd64_sysv_retregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int ni, nf;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
b = 0;
|
||||
ni = r.val & 3;
|
||||
nf = (r.val >> 2) & 3;
|
||||
if (ni >= 1)
|
||||
b |= BIT(RAX);
|
||||
if (ni >= 2)
|
||||
b |= BIT(RDX);
|
||||
if (nf >= 1)
|
||||
b |= BIT(XMM0);
|
||||
if (nf >= 2)
|
||||
b |= BIT(XMM1);
|
||||
if (p) {
|
||||
p[0] = ni;
|
||||
p[1] = nf;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
bits
|
||||
amd64_sysv_argregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int j, ni, nf, ra;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
b = 0;
|
||||
ni = (r.val >> 4) & 15;
|
||||
nf = (r.val >> 8) & 15;
|
||||
ra = (r.val >> 12) & 1;
|
||||
for (j=0; j<ni; j++)
|
||||
b |= BIT(amd64_sysv_rsave[j]);
|
||||
for (j=0; j<nf; j++)
|
||||
b |= BIT(XMM0+j);
|
||||
if (p) {
|
||||
p[0] = ni + ra;
|
||||
p[1] = nf;
|
||||
}
|
||||
return b | (ra ? BIT(RAX) : 0);
|
||||
}
|
||||
|
||||
static Ref
|
||||
rarg(int ty, int *ni, int *ns)
|
||||
{
|
||||
if (KBASE(ty) == 0)
|
||||
return TMP(amd64_sysv_rsave[(*ni)++]);
|
||||
else
|
||||
return TMP(XMM0 + (*ns)++);
|
||||
}
|
||||
|
||||
static void
|
||||
selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
|
||||
{
|
||||
Ins *i;
|
||||
AClass *ac, *a, aret;
|
||||
int ca, ni, ns, al;
|
||||
uint stk, off;
|
||||
Ref r, r1, r2, reg[2], env;
|
||||
RAlloc *ra;
|
||||
|
||||
env = R;
|
||||
ac = alloc((i1-i0) * sizeof ac[0]);
|
||||
|
||||
if (!req(i1->arg[1], R)) {
|
||||
assert(rtype(i1->arg[1]) == RType);
|
||||
typclass(&aret, &typ[i1->arg[1].val]);
|
||||
ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
|
||||
} else
|
||||
ca = argsclass(i0, i1, ac, Oarg, 0, &env);
|
||||
|
||||
for (stk=0, a=&ac[i1-i0]; a>ac;)
|
||||
if ((--a)->inmem) {
|
||||
if (a->align > 4)
|
||||
err("sysv abi requires alignments of 16 or less");
|
||||
stk += a->size;
|
||||
if (a->align == 4)
|
||||
stk += stk & 15;
|
||||
}
|
||||
stk += stk & 15;
|
||||
if (stk) {
|
||||
r = getcon(-(int64_t)stk, fn);
|
||||
emit(Osalloc, Kl, R, r, R);
|
||||
}
|
||||
|
||||
if (!req(i1->arg[1], R)) {
|
||||
if (aret.inmem) {
|
||||
/* get the return location from eax
|
||||
* it saves one callee-save reg */
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ocopy, Kl, i1->to, TMP(RAX), R);
|
||||
ca += 1;
|
||||
} else {
|
||||
/* todo, may read out of bounds.
|
||||
* gcc did this up until 5.2, but
|
||||
* this should still be fixed.
|
||||
*/
|
||||
if (aret.size > 8) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
aret.ref[1] = newtmp("abi", aret.cls[1], fn);
|
||||
emit(Ostorel, 0, R, aret.ref[1], r);
|
||||
emit(Oadd, Kl, r, i1->to, getcon(8, fn));
|
||||
}
|
||||
aret.ref[0] = newtmp("abi", aret.cls[0], fn);
|
||||
emit(Ostorel, 0, R, aret.ref[0], i1->to);
|
||||
ca += retr(reg, &aret);
|
||||
if (aret.size > 8)
|
||||
emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
|
||||
emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
|
||||
r1 = i1->to;
|
||||
}
|
||||
/* allocate return pad */
|
||||
ra = alloc(sizeof *ra);
|
||||
/* specific to NAlign == 3 */
|
||||
al = aret.align >= 2 ? aret.align - 2 : 0;
|
||||
ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
|
||||
ra->link = (*rap);
|
||||
*rap = ra;
|
||||
} else {
|
||||
ra = 0;
|
||||
if (KBASE(i1->cls) == 0) {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
|
||||
ca += 1;
|
||||
} else {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
|
||||
ca += 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
|
||||
|
||||
if (!req(R, env))
|
||||
emit(Ocopy, Kl, TMP(RAX), env, R);
|
||||
else if ((ca >> 12) & 1) /* vararg call */
|
||||
emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
|
||||
|
||||
ni = ns = 0;
|
||||
if (ra && aret.inmem)
|
||||
emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
|
||||
|
||||
for (i=i0, a=ac; i<i1; i++, a++) {
|
||||
if (i->op >= Oarge || a->inmem)
|
||||
continue;
|
||||
r1 = rarg(a->cls[0], &ni, &ns);
|
||||
if (i->op == Oargc) {
|
||||
if (a->size > 8) {
|
||||
r2 = rarg(a->cls[1], &ni, &ns);
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Oload, a->cls[1], r2, r, R);
|
||||
emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
|
||||
}
|
||||
emit(Oload, a->cls[0], r1, i->arg[1], R);
|
||||
} else
|
||||
emit(Ocopy, i->cls, r1, i->arg[0], R);
|
||||
}
|
||||
|
||||
if (!stk)
|
||||
return;
|
||||
|
||||
r = newtmp("abi", Kl, fn);
|
||||
for (i=i0, a=ac, off=0; i<i1; i++, a++) {
|
||||
if (i->op >= Oarge || !a->inmem)
|
||||
continue;
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
if (i->op == Oargc) {
|
||||
if (a->align == 4)
|
||||
off += off & 15;
|
||||
emit(Oblit1, 0, R, INT(a->type->size), R);
|
||||
emit(Oblit0, 0, R, i->arg[1], r1);
|
||||
} else
|
||||
emit(Ostorel, 0, R, i->arg[0], r1);
|
||||
emit(Oadd, Kl, r1, r, getcon(off, fn));
|
||||
off += a->size;
|
||||
}
|
||||
emit(Osalloc, Kl, r, getcon(stk, fn), R);
|
||||
}
|
||||
|
||||
static int
|
||||
selpar(Fn *fn, Ins *i0, Ins *i1)
|
||||
{
|
||||
AClass *ac, *a, aret;
|
||||
Ins *i;
|
||||
int ni, ns, s, al, fa;
|
||||
Ref r, env;
|
||||
|
||||
env = R;
|
||||
ac = alloc((i1-i0) * sizeof ac[0]);
|
||||
curi = &insb[NIns];
|
||||
ni = ns = 0;
|
||||
|
||||
if (fn->retty >= 0) {
|
||||
typclass(&aret, &typ[fn->retty]);
|
||||
fa = argsclass(i0, i1, ac, Opar, &aret, &env);
|
||||
} else
|
||||
fa = argsclass(i0, i1, ac, Opar, 0, &env);
|
||||
fn->reg = amd64_sysv_argregs(CALL(fa), 0);
|
||||
|
||||
for (i=i0, a=ac; i<i1; i++, a++) {
|
||||
if (i->op != Oparc || a->inmem)
|
||||
continue;
|
||||
if (a->size > 8) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
a->ref[1] = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, 0, R, a->ref[1], r);
|
||||
emit(Oadd, Kl, r, i->to, getcon(8, fn));
|
||||
}
|
||||
a->ref[0] = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, 0, R, a->ref[0], i->to);
|
||||
/* specific to NAlign == 3 */
|
||||
al = a->align >= 2 ? a->align - 2 : 0;
|
||||
emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
|
||||
}
|
||||
|
||||
if (fn->retty >= 0 && aret.inmem) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
|
||||
fn->retr = r;
|
||||
}
|
||||
|
||||
for (i=i0, a=ac, s=4; i<i1; i++, a++) {
|
||||
switch (a->inmem) {
|
||||
case 1:
|
||||
if (a->align > 4)
|
||||
err("sysv abi requires alignments of 16 or less");
|
||||
if (a->align == 4)
|
||||
s = (s+3) & -4;
|
||||
fn->tmp[i->to.val].slot = -s;
|
||||
s += a->size / 4;
|
||||
continue;
|
||||
case 2:
|
||||
emit(Oload, i->cls, i->to, SLOT(-s), R);
|
||||
s += 2;
|
||||
continue;
|
||||
}
|
||||
if (i->op == Opare)
|
||||
continue;
|
||||
r = rarg(a->cls[0], &ni, &ns);
|
||||
if (i->op == Oparc) {
|
||||
emit(Ocopy, a->cls[0], a->ref[0], r, R);
|
||||
if (a->size > 8) {
|
||||
r = rarg(a->cls[1], &ni, &ns);
|
||||
emit(Ocopy, a->cls[1], a->ref[1], r, R);
|
||||
}
|
||||
} else
|
||||
emit(Ocopy, i->cls, i->to, r, R);
|
||||
}
|
||||
|
||||
if (!req(R, env))
|
||||
emit(Ocopy, Kl, env, TMP(RAX), R);
|
||||
|
||||
return fa | (s*4)<<12;
|
||||
}
|
||||
|
||||
static Blk *
|
||||
split(Fn *fn, Blk *b)
|
||||
{
|
||||
Blk *bn;
|
||||
|
||||
++fn->nblk;
|
||||
bn = newblk();
|
||||
idup(bn, curi, &insb[NIns]-curi);
|
||||
curi = &insb[NIns];
|
||||
bn->visit = ++b->visit;
|
||||
strf(bn->name, "%s.%d", b->name, b->visit);
|
||||
bn->loop = b->loop;
|
||||
bn->link = b->link;
|
||||
b->link = bn;
|
||||
return bn;
|
||||
}
|
||||
|
||||
static void
|
||||
chpred(Blk *b, Blk *bp, Blk *bp1)
|
||||
{
|
||||
Phi *p;
|
||||
uint a;
|
||||
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
for (a=0; p->blk[a]!=bp; a++)
|
||||
assert(a+1<p->narg);
|
||||
p->blk[a] = bp1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
selvaarg(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
|
||||
Blk *b0, *bstk, *breg;
|
||||
int isint;
|
||||
|
||||
c4 = getcon(4, fn);
|
||||
c8 = getcon(8, fn);
|
||||
c16 = getcon(16, fn);
|
||||
ap = i->arg[0];
|
||||
isint = KBASE(i->cls) == 0;
|
||||
|
||||
/* @b [...]
|
||||
r0 =l add ap, (0 or 4)
|
||||
nr =l loadsw r0
|
||||
r1 =w cultw nr, (48 or 176)
|
||||
jnz r1, @breg, @bstk
|
||||
@breg
|
||||
r0 =l add ap, 16
|
||||
r1 =l loadl r0
|
||||
lreg =l add r1, nr
|
||||
r0 =w add nr, (8 or 16)
|
||||
r1 =l add ap, (0 or 4)
|
||||
storew r0, r1
|
||||
@bstk
|
||||
r0 =l add ap, 8
|
||||
lstk =l loadl r0
|
||||
r1 =l add lstk, 8
|
||||
storel r1, r0
|
||||
@b0
|
||||
%loc =l phi @breg %lreg, @bstk %lstk
|
||||
i->to =(i->cls) load %loc
|
||||
*/
|
||||
|
||||
loc = newtmp("abi", Kl, fn);
|
||||
emit(Oload, i->cls, i->to, loc, R);
|
||||
b0 = split(fn, b);
|
||||
b0->jmp = b->jmp;
|
||||
b0->s1 = b->s1;
|
||||
b0->s2 = b->s2;
|
||||
if (b->s1)
|
||||
chpred(b->s1, b, b0);
|
||||
if (b->s2 && b->s2 != b->s1)
|
||||
chpred(b->s2, b, b0);
|
||||
|
||||
lreg = newtmp("abi", Kl, fn);
|
||||
nr = newtmp("abi", Kl, fn);
|
||||
r0 = newtmp("abi", Kw, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, r0, r1);
|
||||
emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
|
||||
emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Oadd, Kl, lreg, r1, nr);
|
||||
emit(Oload, Kl, r1, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, c16);
|
||||
breg = split(fn, b);
|
||||
breg->jmp.type = Jjmp;
|
||||
breg->s1 = b0;
|
||||
|
||||
lstk = newtmp("abi", Kl, fn);
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, lstk, c8);
|
||||
emit(Oload, Kl, lstk, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, c8);
|
||||
bstk = split(fn, b);
|
||||
bstk->jmp.type = Jjmp;
|
||||
bstk->s1 = b0;
|
||||
|
||||
b0->phi = alloc(sizeof *b0->phi);
|
||||
*b0->phi = (Phi){
|
||||
.cls = Kl, .to = loc,
|
||||
.narg = 2,
|
||||
.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
|
||||
.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
|
||||
};
|
||||
b0->phi->blk[0] = bstk;
|
||||
b0->phi->blk[1] = breg;
|
||||
b0->phi->arg[0] = lstk;
|
||||
b0->phi->arg[1] = lreg;
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kw, fn);
|
||||
b->jmp.type = Jjnz;
|
||||
b->jmp.arg = r1;
|
||||
b->s1 = breg;
|
||||
b->s2 = bstk;
|
||||
c = getcon(isint ? 48 : 176, fn);
|
||||
emit(Ocmpw+Ciult, Kw, r1, nr, c);
|
||||
emit(Oloadsw, Kl, nr, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
|
||||
}
|
||||
|
||||
static void
|
||||
selvastart(Fn *fn, int fa, Ref ap)
|
||||
{
|
||||
Ref r0, r1;
|
||||
int gp, fp, sp;
|
||||
|
||||
gp = ((fa >> 4) & 15) * 8;
|
||||
fp = 48 + ((fa >> 8) & 15) * 16;
|
||||
sp = fa >> 12;
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
|
||||
emit(Oadd, Kl, r0, ap, getcon(16, fn));
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
|
||||
emit(Oadd, Kl, r0, ap, getcon(8, fn));
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, getcon(fp, fn), r0);
|
||||
emit(Oadd, Kl, r0, ap, getcon(4, fn));
|
||||
emit(Ostorew, Kw, R, getcon(gp, fn), ap);
|
||||
}
|
||||
|
||||
void
|
||||
amd64_sysv_abi(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i, *i0;
|
||||
RAlloc *ral;
|
||||
int n0, n1, ioff, fa;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->visit = 0;
|
||||
|
||||
/* lower parameters */
|
||||
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (!ispar(i->op))
|
||||
break;
|
||||
fa = selpar(fn, b->ins, i);
|
||||
n0 = &insb[NIns] - curi;
|
||||
ioff = i - b->ins;
|
||||
n1 = b->nins - ioff;
|
||||
vgrow(&b->ins, n0+n1);
|
||||
icpy(b->ins+n0, b->ins+ioff, n1);
|
||||
icpy(b->ins, curi, n0);
|
||||
b->nins = n0+n1;
|
||||
|
||||
/* lower calls, returns, and vararg instructions */
|
||||
ral = 0;
|
||||
b = fn->start;
|
||||
do {
|
||||
if (!(b = b->link))
|
||||
b = fn->start; /* do it last */
|
||||
if (b->visit)
|
||||
continue;
|
||||
curi = &insb[NIns];
|
||||
selret(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;)
|
||||
switch ((--i)->op) {
|
||||
default:
|
||||
emiti(*i);
|
||||
break;
|
||||
case Ocall:
|
||||
for (i0=i; i0>b->ins; i0--)
|
||||
if (!isarg((i0-1)->op))
|
||||
break;
|
||||
selcall(fn, i0, i, &ral);
|
||||
i = i0;
|
||||
break;
|
||||
case Ovastart:
|
||||
selvastart(fn, fa, i->arg[0]);
|
||||
break;
|
||||
case Ovaarg:
|
||||
selvaarg(fn, b, i);
|
||||
break;
|
||||
case Oarg:
|
||||
case Oargc:
|
||||
die("unreachable");
|
||||
}
|
||||
if (b == fn->start)
|
||||
for (; ral; ral=ral->link)
|
||||
emiti(ral->i);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
} while (b != fn->start);
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After ABI lowering:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
67
src/qbe/amd64/targ.c
Normal file
67
src/qbe/amd64/targ.c
Normal file
@@ -0,0 +1,67 @@
|
||||
#include "all.h"
|
||||
|
||||
Amd64Op amd64_op[NOp] = {
|
||||
#define O(op, t, x) [O##op] =
|
||||
#define X(nm, zf, lf) { nm, zf, lf, },
|
||||
#include "../ops.h"
|
||||
};
|
||||
|
||||
static int
|
||||
amd64_memargs(int op)
|
||||
{
|
||||
return amd64_op[op].nmem;
|
||||
}
|
||||
|
||||
#define AMD64_COMMON \
|
||||
.gpr0 = RAX, \
|
||||
.ngpr = NGPR, \
|
||||
.fpr0 = XMM0, \
|
||||
.nfpr = NFPR, \
|
||||
.rglob = BIT(RBP) | BIT(RSP), \
|
||||
.nrglob = 2, \
|
||||
.memargs = amd64_memargs, \
|
||||
.abi0 = elimsb, \
|
||||
.isel = amd64_isel, \
|
||||
.cansel = 1,
|
||||
|
||||
Target T_amd64_sysv = {
|
||||
.name = "amd64_sysv",
|
||||
.emitfin = elf_emitfin,
|
||||
.asloc = ".L",
|
||||
.abi1 = amd64_sysv_abi,
|
||||
.rsave = amd64_sysv_rsave,
|
||||
.nrsave = {NGPS_SYSV, NFPS},
|
||||
.retregs = amd64_sysv_retregs,
|
||||
.argregs = amd64_sysv_argregs,
|
||||
.emitfn = amd64_sysv_emitfn,
|
||||
AMD64_COMMON
|
||||
};
|
||||
|
||||
Target T_amd64_apple = {
|
||||
.name = "amd64_apple",
|
||||
.apple = 1,
|
||||
.emitfin = macho_emitfin,
|
||||
.asloc = "L",
|
||||
.assym = "_",
|
||||
.abi1 = amd64_sysv_abi,
|
||||
.rsave = amd64_sysv_rsave,
|
||||
.nrsave = {NGPS_SYSV, NFPS},
|
||||
.retregs = amd64_sysv_retregs,
|
||||
.argregs = amd64_sysv_argregs,
|
||||
.emitfn = amd64_sysv_emitfn,
|
||||
AMD64_COMMON
|
||||
};
|
||||
|
||||
Target T_amd64_win = {
|
||||
.name = "amd64_win",
|
||||
.windows = 1,
|
||||
.emitfin = pe_emitfin,
|
||||
.asloc = "L",
|
||||
.abi1 = amd64_winabi_abi,
|
||||
.rsave = amd64_winabi_rsave,
|
||||
.nrsave = {NGPS_WIN, NFPS},
|
||||
.retregs = amd64_winabi_retregs,
|
||||
.argregs = amd64_winabi_argregs,
|
||||
.emitfn = amd64_winabi_emitfn,
|
||||
AMD64_COMMON
|
||||
};
|
||||
763
src/qbe/amd64/winabi.c
Executable file
763
src/qbe/amd64/winabi.c
Executable file
@@ -0,0 +1,763 @@
|
||||
#include "all.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef enum ArgPassStyle {
|
||||
APS_Invalid = 0,
|
||||
APS_Register,
|
||||
APS_InlineOnStack,
|
||||
APS_CopyAndPointerInRegister,
|
||||
APS_CopyAndPointerOnStack,
|
||||
APS_VarargsTag,
|
||||
APS_EnvTag,
|
||||
} ArgPassStyle;
|
||||
|
||||
typedef struct ArgClass {
|
||||
Typ* type;
|
||||
ArgPassStyle style;
|
||||
int align;
|
||||
uint size;
|
||||
int cls;
|
||||
Ref ref;
|
||||
} ArgClass;
|
||||
|
||||
typedef struct ExtraAlloc ExtraAlloc;
|
||||
struct ExtraAlloc {
|
||||
Ins instr;
|
||||
ExtraAlloc* link;
|
||||
};
|
||||
|
||||
#define ALIGN_DOWN(n, a) ((n) & ~((a)-1))
|
||||
#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a))
|
||||
|
||||
// Number of stack bytes required be reserved for the callee.
|
||||
#define SHADOW_SPACE_SIZE 32
|
||||
|
||||
int amd64_winabi_rsave[] = {RCX, RDX, R8, R9, R10, R11, RAX, XMM0,
|
||||
XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8,
|
||||
XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1};
|
||||
int amd64_winabi_rclob[] = {RBX, R12, R13, R14, R15, RSI, RDI, -1};
|
||||
|
||||
MAKESURE(winabi_arrays_ok,
|
||||
sizeof amd64_winabi_rsave == (NGPS_WIN + NFPS + 1) * sizeof(int) &&
|
||||
sizeof amd64_winabi_rclob == (NCLR_WIN + 1) * sizeof(int));
|
||||
|
||||
// layout of call's second argument (RCall)
|
||||
//
|
||||
// bit 0: rax returned
|
||||
// bit 1: xmm0 returned
|
||||
// bits 23: 0
|
||||
// bits 4567: rcx, rdx, r8, r9 passed
|
||||
// bits 89ab: xmm0,1,2,3 passed
|
||||
// bit c: env call (rax passed)
|
||||
// bits d..1f: 0
|
||||
|
||||
bits amd64_winabi_retregs(Ref r, int p[2]) {
|
||||
assert(rtype(r) == RCall);
|
||||
|
||||
bits b = 0;
|
||||
int num_int_returns = r.val & 1;
|
||||
int num_float_returns = r.val & 2;
|
||||
if (num_int_returns == 1) {
|
||||
b |= BIT(RAX);
|
||||
} else {
|
||||
b |= BIT(XMM0);
|
||||
}
|
||||
if (p) {
|
||||
p[0] = num_int_returns;
|
||||
p[1] = num_float_returns;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
static uint popcnt(bits b) {
|
||||
b = (b & 0x5555555555555555) + ((b >> 1) & 0x5555555555555555);
|
||||
b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
|
||||
b = (b & 0x0f0f0f0f0f0f0f0f) + ((b >> 4) & 0x0f0f0f0f0f0f0f0f);
|
||||
b += (b >> 8);
|
||||
b += (b >> 16);
|
||||
b += (b >> 32);
|
||||
return b & 0xff;
|
||||
}
|
||||
|
||||
bits amd64_winabi_argregs(Ref r, int p[2]) {
|
||||
assert(rtype(r) == RCall);
|
||||
|
||||
// On SysV, these are counts. Here, a count isn't sufficient, we actually need
|
||||
// to know which ones are in use because they're not necessarily contiguous.
|
||||
int int_passed = (r.val >> 4) & 15;
|
||||
int float_passed = (r.val >> 8) & 15;
|
||||
bool env_param = (r.val >> 12) & 1;
|
||||
|
||||
bits b = 0;
|
||||
b |= (int_passed & 1) ? BIT(RCX) : 0;
|
||||
b |= (int_passed & 2) ? BIT(RDX) : 0;
|
||||
b |= (int_passed & 4) ? BIT(R8) : 0;
|
||||
b |= (int_passed & 8) ? BIT(R9) : 0;
|
||||
b |= (float_passed & 1) ? BIT(XMM0) : 0;
|
||||
b |= (float_passed & 2) ? BIT(XMM1) : 0;
|
||||
b |= (float_passed & 4) ? BIT(XMM2) : 0;
|
||||
b |= (float_passed & 8) ? BIT(XMM3) : 0;
|
||||
b |= env_param ? BIT(RAX) : 0;
|
||||
if (p) {
|
||||
// TODO: The only place this is used is live.c. I'm not sure what should be
|
||||
// returned here wrt to using the same counter for int/float regs on win.
|
||||
// For now, try the number of registers in use even though they're not
|
||||
// contiguous.
|
||||
p[0] = popcnt(int_passed);
|
||||
p[1] = popcnt(float_passed);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
typedef struct RegisterUsage {
|
||||
// Counter for both int/float as they're counted together. Only if the bool's
|
||||
// set in regs_passed is the given register *actually* needed for a value
|
||||
// (i.e. needs to be saved, etc.).
|
||||
int num_regs_passed;
|
||||
|
||||
// Indexed first by 0=int, 1=float, use KBASE(cls).
|
||||
// Indexed second by register index in calling convention, so for integer,
|
||||
// 0=RCX, 1=RDX, 2=R8, 3=R9, and for float XMM0, XMM1, XMM2, XMM3.
|
||||
bool regs_passed[2][4];
|
||||
|
||||
bool rax_returned;
|
||||
bool xmm0_returned;
|
||||
|
||||
// This is also used as where the va_start will start for varargs functions
|
||||
// (there's no 'Oparv', so we need to keep track of a count here.)
|
||||
int num_named_args_passed;
|
||||
|
||||
// This is set when classifying the arguments for a call (but not when
|
||||
// classifying the parameters of a function definition).
|
||||
bool is_varargs_call;
|
||||
|
||||
bool has_env;
|
||||
} RegisterUsage;
|
||||
|
||||
static int register_usage_to_call_arg_value(RegisterUsage reg_usage) {
|
||||
return (reg_usage.rax_returned << 0) | //
|
||||
(reg_usage.xmm0_returned << 1) | //
|
||||
(reg_usage.regs_passed[0][0] << 4) | //
|
||||
(reg_usage.regs_passed[0][1] << 5) | //
|
||||
(reg_usage.regs_passed[0][2] << 6) | //
|
||||
(reg_usage.regs_passed[0][3] << 7) | //
|
||||
(reg_usage.regs_passed[1][0] << 8) | //
|
||||
(reg_usage.regs_passed[1][1] << 9) | //
|
||||
(reg_usage.regs_passed[1][2] << 10) | //
|
||||
(reg_usage.regs_passed[1][3] << 11) | //
|
||||
(reg_usage.has_env << 12);
|
||||
}
|
||||
|
||||
// Assigns the argument to a register if there's any left according to the
|
||||
// calling convention, and updates the regs_passed bools. Otherwise marks the
|
||||
// value as needing stack space to be passed.
|
||||
static void assign_register_or_stack(RegisterUsage* reg_usage,
|
||||
ArgClass* arg,
|
||||
bool is_float,
|
||||
bool by_copy) {
|
||||
if (reg_usage->num_regs_passed == 4) {
|
||||
arg->style = by_copy ? APS_CopyAndPointerOnStack : APS_InlineOnStack;
|
||||
} else {
|
||||
reg_usage->regs_passed[is_float][reg_usage->num_regs_passed] = true;
|
||||
++reg_usage->num_regs_passed;
|
||||
arg->style = by_copy ? APS_CopyAndPointerInRegister : APS_Register;
|
||||
}
|
||||
++reg_usage->num_named_args_passed;
|
||||
}
|
||||
|
||||
static bool type_is_by_copy(Typ* type) {
|
||||
// Note that only these sizes are passed by register, even though e.g. a
|
||||
// 5 byte struct would "fit", it still is passed by copy-and-pointer.
|
||||
return type->isdark || (type->size != 1 && type->size != 2 &&
|
||||
type->size != 4 && type->size != 8);
|
||||
}
|
||||
|
||||
// This function is used for both arguments and parameters.
|
||||
// begin_instr should either point at the first Oarg or Opar, and end_instr
|
||||
// should point past the last one (so to the Ocall for arguments, or to the
|
||||
// first 'real' instruction of the function for parameters).
|
||||
static void classify_arguments(RegisterUsage* reg_usage,
|
||||
Ins* begin_instr,
|
||||
Ins* end_instr,
|
||||
ArgClass* arg_classes,
|
||||
Ref* env) {
|
||||
ArgClass* arg = arg_classes;
|
||||
// For each argument, determine how it will be passed (int, float, stack)
|
||||
// and update the `reg_usage` counts. Additionally, fill out arg_classes for
|
||||
// each argument.
|
||||
for (Ins* instr = begin_instr; instr < end_instr; ++instr, ++arg) {
|
||||
switch (instr->op) {
|
||||
case Oarg:
|
||||
case Opar:
|
||||
assign_register_or_stack(reg_usage, arg, KBASE(instr->cls),
|
||||
/*by_copy=*/false);
|
||||
arg->cls = instr->cls;
|
||||
arg->align = 3;
|
||||
arg->size = 8;
|
||||
break;
|
||||
case Oargc:
|
||||
case Oparc: {
|
||||
int typ_index = instr->arg[0].val;
|
||||
Typ* type = &typ[typ_index];
|
||||
bool by_copy = type_is_by_copy(type);
|
||||
assign_register_or_stack(reg_usage, arg, /*is_float=*/false, by_copy);
|
||||
arg->cls = Kl;
|
||||
if (!by_copy && type->size <= 4) {
|
||||
arg->cls = Kw;
|
||||
}
|
||||
arg->align = 3;
|
||||
arg->size = type->size;
|
||||
break;
|
||||
}
|
||||
case Oarge:
|
||||
*env = instr->arg[0];
|
||||
arg->style = APS_EnvTag;
|
||||
reg_usage->has_env = true;
|
||||
break;
|
||||
case Opare:
|
||||
*env = instr->to;
|
||||
arg->style = APS_EnvTag;
|
||||
reg_usage->has_env = true;
|
||||
break;
|
||||
case Oargv:
|
||||
reg_usage->is_varargs_call = true;
|
||||
arg->style = APS_VarargsTag;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (reg_usage->has_env && reg_usage->is_varargs_call) {
|
||||
die("can't use env with varargs");
|
||||
}
|
||||
|
||||
// During a varargs call, float arguments have to be duplicated to their
|
||||
// associated integer register, so mark them as in-use too.
|
||||
if (reg_usage->is_varargs_call) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (reg_usage->regs_passed[/*float*/ 1][i]) {
|
||||
reg_usage->regs_passed[/*int*/ 0][i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_integer_type(int ty) {
|
||||
assert(ty >= 0 && ty < 4 && "expecting Kw Kl Ks Kd");
|
||||
return KBASE(ty) == 0;
|
||||
}
|
||||
|
||||
static Ref register_for_arg(int cls, int counter) {
|
||||
assert(counter < 4);
|
||||
if (is_integer_type(cls)) {
|
||||
return TMP(amd64_winabi_rsave[counter]);
|
||||
} else {
|
||||
return TMP(XMM0 + counter);
|
||||
}
|
||||
}
|
||||
|
||||
static Ins* lower_call(Fn* func,
|
||||
Blk* block,
|
||||
Ins* call_instr,
|
||||
ExtraAlloc** pextra_alloc) {
|
||||
// Call arguments are instructions. Walk through them to find the end of the
|
||||
// call+args that we need to process (and return the instruction past the body
|
||||
// of the instruction for continuing processing).
|
||||
Ins* instr_past_args = call_instr - 1;
|
||||
for (; instr_past_args >= block->ins; --instr_past_args) {
|
||||
if (!isarg(instr_past_args->op)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ins* earliest_arg_instr = instr_past_args + 1;
|
||||
|
||||
// Don't need an ArgClass for the call itself, so one less than the total
|
||||
// number of instructions we're dealing with.
|
||||
uint num_args = call_instr - earliest_arg_instr;
|
||||
ArgClass* arg_classes = alloc(num_args * sizeof(ArgClass));
|
||||
|
||||
RegisterUsage reg_usage = {0};
|
||||
ArgClass ret_arg_class = {0};
|
||||
|
||||
// Ocall's two arguments are the the function to be called in 0, and, if the
|
||||
// the function returns a non-basic type, then arg[1] is a reference to the
|
||||
// type of the return. req checks if Refs are equal; `R` is 0.
|
||||
bool il_has_struct_return = !req(call_instr->arg[1], R);
|
||||
bool is_struct_return = false;
|
||||
if (il_has_struct_return) {
|
||||
Typ* ret_type = &typ[call_instr->arg[1].val];
|
||||
is_struct_return = type_is_by_copy(ret_type);
|
||||
if (is_struct_return) {
|
||||
assign_register_or_stack(®_usage, &ret_arg_class, /*is_float=*/false,
|
||||
/*by_copy=*/true);
|
||||
}
|
||||
ret_arg_class.size = ret_type->size;
|
||||
}
|
||||
Ref env = R;
|
||||
classify_arguments(®_usage, earliest_arg_instr, call_instr, arg_classes,
|
||||
&env);
|
||||
|
||||
// We now know which arguments are on the stack and which are in registers, so
|
||||
// we can allocate the correct amount of space to stash the stack-located ones
|
||||
// into.
|
||||
uint stack_usage = 0;
|
||||
for (uint i = 0; i < num_args; ++i) {
|
||||
ArgClass* arg = &arg_classes[i];
|
||||
// stack_usage only accounts for pushes that are for values that don't have
|
||||
// enough registers. Large struct copies are alloca'd separately, and then
|
||||
// only have (potentially) 8 bytes to add to stack_usage here.
|
||||
if (arg->style == APS_InlineOnStack) {
|
||||
if (arg->align > 4) {
|
||||
err("win abi cannot pass alignments > 16");
|
||||
}
|
||||
stack_usage += arg->size;
|
||||
} else if (arg->style == APS_CopyAndPointerOnStack) {
|
||||
stack_usage += 8;
|
||||
}
|
||||
}
|
||||
stack_usage = ALIGN_UP(stack_usage, 16);
|
||||
|
||||
// Note that here we're logically 'after' the call (due to emitting
|
||||
// instructions in reverse order), so we're doing a negative stack
|
||||
// allocation to clean up after the call.
|
||||
Ref stack_size_ref =
|
||||
getcon(-(int64_t)(stack_usage + SHADOW_SPACE_SIZE), func);
|
||||
emit(Osalloc, Kl, R, stack_size_ref, R);
|
||||
|
||||
ExtraAlloc* return_pad = NULL;
|
||||
if (is_struct_return) {
|
||||
return_pad = alloc(sizeof(ExtraAlloc));
|
||||
Ref ret_pad_ref = newtmp("abi.ret_pad", Kl, func);
|
||||
return_pad->instr =
|
||||
(Ins){Oalloc8, Kl, ret_pad_ref, {getcon(ret_arg_class.size, func)}};
|
||||
return_pad->link = (*pextra_alloc);
|
||||
*pextra_alloc = return_pad;
|
||||
reg_usage.rax_returned = true;
|
||||
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
|
||||
} else {
|
||||
if (il_has_struct_return) {
|
||||
// In the case that at the IL level, a struct return was specified, but as
|
||||
// far as the calling convention is concerned it's not actually by
|
||||
// pointer, we need to store the return value into an alloca because
|
||||
// subsequent IL will still be treating the function return as a pointer.
|
||||
ExtraAlloc* return_copy = alloc(sizeof(ExtraAlloc));
|
||||
return_copy->instr =
|
||||
(Ins){Oalloc8, Kl, call_instr->to, {getcon(8, func)}};
|
||||
return_copy->link = (*pextra_alloc);
|
||||
*pextra_alloc = return_copy;
|
||||
Ref copy = newtmp("abi.copy", Kl, func);
|
||||
emit(Ostorel, 0, R, copy, call_instr->to);
|
||||
emit(Ocopy, Kl, copy, TMP(RAX), R);
|
||||
reg_usage.rax_returned = true;
|
||||
} else if (is_integer_type(call_instr->cls)) {
|
||||
// Only a basic type returned from the call, integer.
|
||||
emit(Ocopy, call_instr->cls, call_instr->to, TMP(RAX), R);
|
||||
reg_usage.rax_returned = true;
|
||||
} else {
|
||||
// Basic type, floating point.
|
||||
emit(Ocopy, call_instr->cls, call_instr->to, TMP(XMM0), R);
|
||||
reg_usage.xmm0_returned = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Emit the actual call instruction. There's no 'to' value by this point
|
||||
// because we've lowered it into register manipulation (that's the `R`),
|
||||
// arg[0] of the call is the function, and arg[1] is register usage is
|
||||
// documented as above (copied from SysV).
|
||||
emit(Ocall, call_instr->cls, R, call_instr->arg[0],
|
||||
CALL(register_usage_to_call_arg_value(reg_usage)));
|
||||
|
||||
if (!req(R, env)) {
|
||||
// If there's an env arg to be passed, it gets stashed in RAX.
|
||||
emit(Ocopy, Kl, TMP(RAX), env, R);
|
||||
}
|
||||
|
||||
if (reg_usage.is_varargs_call) {
|
||||
// Any float arguments need to be duplicated to integer registers. This is
|
||||
// required by the calling convention so that dumping to shadow space can be
|
||||
// done without a prototype and for varargs.
|
||||
#define DUP_IF_USED(index, floatreg, intreg) \
|
||||
if (reg_usage.regs_passed[/*float*/ 1][index]) { \
|
||||
emit(Ocast, Kl, TMP(intreg), TMP(floatreg), R); \
|
||||
}
|
||||
DUP_IF_USED(0, XMM0, RCX);
|
||||
DUP_IF_USED(1, XMM1, RDX);
|
||||
DUP_IF_USED(2, XMM2, R8);
|
||||
DUP_IF_USED(3, XMM3, R9);
|
||||
#undef DUP_IF_USED
|
||||
}
|
||||
|
||||
int reg_counter = 0;
|
||||
if (is_struct_return) {
|
||||
Ref first_reg = register_for_arg(Kl, reg_counter++);
|
||||
emit(Ocopy, Kl, first_reg, return_pad->instr.to, R);
|
||||
}
|
||||
|
||||
// This is where we actually do the load of values into registers or into
|
||||
// stack slots.
|
||||
Ref arg_stack_slots = newtmp("abi.args", Kl, func);
|
||||
uint slot_offset = SHADOW_SPACE_SIZE;
|
||||
ArgClass* arg = arg_classes;
|
||||
for (Ins* instr = earliest_arg_instr; instr != call_instr; ++instr, ++arg) {
|
||||
switch (arg->style) {
|
||||
case APS_Register: {
|
||||
Ref into = register_for_arg(arg->cls, reg_counter++);
|
||||
if (instr->op == Oargc) {
|
||||
// If this is a small struct being passed by value. The value in the
|
||||
// instruction in this case is a pointer, but it needs to be loaded
|
||||
// into the register.
|
||||
emit(Oload, arg->cls, into, instr->arg[1], R);
|
||||
} else {
|
||||
// Otherwise, a normal value passed in a register.
|
||||
emit(Ocopy, instr->cls, into, instr->arg[0], R);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case APS_InlineOnStack: {
|
||||
Ref slot = newtmp("abi.off", Kl, func);
|
||||
if (instr->op == Oargc) {
|
||||
// This is a small struct, so it's not passed by copy, but the
|
||||
// instruction is a pointer. So we need to copy it into the stack
|
||||
// slot. (And, remember that these are emitted backwards, so store,
|
||||
// then load.)
|
||||
Ref smalltmp = newtmp("abi.smalltmp", arg->cls, func);
|
||||
emit(Ostorel, 0, R, smalltmp, slot);
|
||||
emit(Oload, arg->cls, smalltmp, instr->arg[1], R);
|
||||
} else {
|
||||
// Stash the value into the stack slot.
|
||||
emit(Ostorel, 0, R, instr->arg[0], slot);
|
||||
}
|
||||
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
|
||||
slot_offset += arg->size;
|
||||
break;
|
||||
}
|
||||
case APS_CopyAndPointerInRegister:
|
||||
case APS_CopyAndPointerOnStack: {
|
||||
// Alloca a space to copy into, and blit the value from the instr to the
|
||||
// copied location.
|
||||
ExtraAlloc* arg_copy = alloc(sizeof(ExtraAlloc));
|
||||
Ref copy_ref = newtmp("abi.copy", Kl, func);
|
||||
arg_copy->instr =
|
||||
(Ins){Oalloc8, Kl, copy_ref, {getcon(arg->size, func)}};
|
||||
arg_copy->link = (*pextra_alloc);
|
||||
*pextra_alloc = arg_copy;
|
||||
emit(Oblit1, 0, R, INT(arg->size), R);
|
||||
emit(Oblit0, 0, R, instr->arg[1], copy_ref);
|
||||
|
||||
// Now load the pointer into the correct register or stack slot.
|
||||
if (arg->style == APS_CopyAndPointerInRegister) {
|
||||
Ref into = register_for_arg(arg->cls, reg_counter++);
|
||||
emit(Ocopy, Kl, into, copy_ref, R);
|
||||
} else {
|
||||
assert(arg->style == APS_CopyAndPointerOnStack);
|
||||
Ref slot = newtmp("abi.off", Kl, func);
|
||||
emit(Ostorel, 0, R, copy_ref, slot);
|
||||
emit(Oadd, Kl, slot, arg_stack_slots, getcon(slot_offset, func));
|
||||
slot_offset += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case APS_EnvTag:
|
||||
case APS_VarargsTag:
|
||||
// Nothing to do here, see right before the call for reg dupe.
|
||||
break;
|
||||
case APS_Invalid:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
if (stack_usage) {
|
||||
// The last (first in call order) thing we do is allocate the the stack
|
||||
// space we're going to fill with temporaries.
|
||||
emit(Osalloc, Kl, arg_stack_slots,
|
||||
getcon(stack_usage + SHADOW_SPACE_SIZE, func), R);
|
||||
} else {
|
||||
// When there's no usage for temporaries, we can add this into the other
|
||||
// alloca, but otherwise emit it separately (not storing into a reference)
|
||||
// so that it doesn't get removed later for being useless.
|
||||
emit(Osalloc, Kl, R, getcon(SHADOW_SPACE_SIZE, func), R);
|
||||
}
|
||||
|
||||
return instr_past_args;
|
||||
}
|
||||
|
||||
static void lower_block_return(Fn* func, Blk* block) {
|
||||
int jmp_type = block->jmp.type;
|
||||
|
||||
if (!isret(jmp_type) || jmp_type == Jret0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Save the argument, and set the block to be a void return because once it's
|
||||
// lowered it's handled by the the register/stack manipulation.
|
||||
Ref ret_arg = block->jmp.arg;
|
||||
block->jmp.type = Jret0;
|
||||
|
||||
RegisterUsage reg_usage = {0};
|
||||
|
||||
if (jmp_type == Jretc) {
|
||||
Typ* type = &typ[func->retty];
|
||||
if (type_is_by_copy(type)) {
|
||||
assert(rtype(func->retr) == RTmp);
|
||||
emit(Ocopy, Kl, TMP(RAX), func->retr, R);
|
||||
emit(Oblit1, 0, R, INT(type->size), R);
|
||||
emit(Oblit0, 0, R, ret_arg, func->retr);
|
||||
} else {
|
||||
emit(Oload, Kl, TMP(RAX), ret_arg, R);
|
||||
}
|
||||
reg_usage.rax_returned = true;
|
||||
} else {
|
||||
int k = jmp_type - Jretw;
|
||||
if (is_integer_type(k)) {
|
||||
emit(Ocopy, k, TMP(RAX), ret_arg, R);
|
||||
reg_usage.rax_returned = true;
|
||||
} else {
|
||||
emit(Ocopy, k, TMP(XMM0), ret_arg, R);
|
||||
reg_usage.xmm0_returned = true;
|
||||
}
|
||||
}
|
||||
block->jmp.arg = CALL(register_usage_to_call_arg_value(reg_usage));
|
||||
}
|
||||
|
||||
static void lower_vastart(Fn* func,
|
||||
RegisterUsage* param_reg_usage,
|
||||
Ref valist) {
|
||||
assert(func->vararg);
|
||||
// In varargs functions:
|
||||
// 1. the int registers are already dumped to the shadow stack space;
|
||||
// 2. any parameters passed in floating point registers have
|
||||
// been duplicated to the integer registers
|
||||
// 3. we ensure (later) that for varargs functions we're always using an rbp
|
||||
// frame pointer.
|
||||
// So, the ... argument is just indexed past rbp by the number of named values
|
||||
// that were actually passed.
|
||||
|
||||
Ref offset = newtmp("abi.vastart", Kl, func);
|
||||
emit(Ostorel, 0, R, offset, valist);
|
||||
|
||||
// *8 for sizeof(u64), +16 because the return address and rbp have been pushed
|
||||
// by the time we get to the body of the function.
|
||||
emit(Oadd, Kl, offset, TMP(RBP),
|
||||
getcon(param_reg_usage->num_named_args_passed * 8 + 16, func));
|
||||
}
|
||||
|
||||
static void lower_vaarg(Fn* func, Ins* vaarg_instr) {
|
||||
// va_list is just a void** on winx64, so load the pointer, then load the
|
||||
// argument from that pointer, then increment the pointer to the next arg.
|
||||
// (All emitted backwards as usual.)
|
||||
Ref inc = newtmp("abi.vaarg.inc", Kl, func);
|
||||
Ref ptr = newtmp("abi.vaarg.ptr", Kl, func);
|
||||
emit(Ostorel, 0, R, inc, vaarg_instr->arg[0]);
|
||||
emit(Oadd, Kl, inc, ptr, getcon(8, func));
|
||||
emit(Oload, vaarg_instr->cls, vaarg_instr->to, ptr, R);
|
||||
emit(Oload, Kl, ptr, vaarg_instr->arg[0], R);
|
||||
}
|
||||
|
||||
static void lower_args_for_block(Fn* func,
|
||||
Blk* block,
|
||||
RegisterUsage* param_reg_usage,
|
||||
ExtraAlloc** pextra_alloc) {
|
||||
// global temporary buffer used by emit. Reset to the end, and predecremented
|
||||
// when adding to it.
|
||||
curi = &insb[NIns];
|
||||
|
||||
lower_block_return(func, block);
|
||||
|
||||
if (block->nins) {
|
||||
// Work backwards through the instructions, either copying them unchanged,
|
||||
// or modifying as necessary.
|
||||
for (Ins* instr = &block->ins[block->nins - 1]; instr >= block->ins;) {
|
||||
switch (instr->op) {
|
||||
case Ocall:
|
||||
instr = lower_call(func, block, instr, pextra_alloc);
|
||||
break;
|
||||
case Ovastart:
|
||||
lower_vastart(func, param_reg_usage, instr->arg[0]);
|
||||
--instr;
|
||||
break;
|
||||
case Ovaarg:
|
||||
lower_vaarg(func, instr);
|
||||
--instr;
|
||||
break;
|
||||
case Oarg:
|
||||
case Oargc:
|
||||
die("unreachable");
|
||||
default:
|
||||
emiti(*instr);
|
||||
--instr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This it the start block, which is processed last. Add any allocas that
|
||||
// other blocks needed.
|
||||
bool is_start_block = block == func->start;
|
||||
if (is_start_block) {
|
||||
for (ExtraAlloc* ea = *pextra_alloc; ea; ea = ea->link) {
|
||||
emiti(ea->instr);
|
||||
}
|
||||
}
|
||||
|
||||
// emit/emiti add instructions from the end to the beginning of the temporary
|
||||
// global buffer. dup the final version into the final block storage.
|
||||
block->nins = &insb[NIns] - curi;
|
||||
idup(block, curi, block->nins);
|
||||
}
|
||||
|
||||
static Ins* find_end_of_func_parameters(Blk* start_block) {
|
||||
Ins* i;
|
||||
for (i = start_block->ins; i < &start_block->ins[start_block->nins]; ++i) {
|
||||
if (!ispar(i->op)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// Copy from registers/stack into values.
|
||||
static RegisterUsage lower_func_parameters(Fn* func) {
|
||||
// This is half-open, so end points after the last Opar.
|
||||
Blk* start_block = func->start;
|
||||
Ins* start_of_params = start_block->ins;
|
||||
Ins* end_of_params = find_end_of_func_parameters(start_block);
|
||||
|
||||
size_t num_params = end_of_params - start_of_params;
|
||||
ArgClass* arg_classes = alloc(num_params * sizeof(ArgClass));
|
||||
ArgClass arg_ret = {0};
|
||||
|
||||
// global temporary buffer used by emit. Reset to the end, and predecremented
|
||||
// when adding to it.
|
||||
curi = &insb[NIns];
|
||||
|
||||
int reg_counter = 0;
|
||||
RegisterUsage reg_usage = {0};
|
||||
if (func->retty >= 0) {
|
||||
bool by_copy = type_is_by_copy(&typ[func->retty]);
|
||||
if (by_copy) {
|
||||
assign_register_or_stack(®_usage, &arg_ret, /*is_float=*/false,
|
||||
by_copy);
|
||||
Ref ret_ref = newtmp("abi.ret", Kl, func);
|
||||
emit(Ocopy, Kl, ret_ref, TMP(RCX), R);
|
||||
func->retr = ret_ref;
|
||||
++reg_counter;
|
||||
}
|
||||
}
|
||||
Ref env = R;
|
||||
classify_arguments(®_usage, start_of_params, end_of_params, arg_classes,
|
||||
&env);
|
||||
func->reg = amd64_winabi_argregs(
|
||||
CALL(register_usage_to_call_arg_value(reg_usage)), NULL);
|
||||
|
||||
// Copy from the registers or stack slots into the named parameters. Depending
|
||||
// on how they're passed, they either need to be copied or loaded.
|
||||
ArgClass* arg = arg_classes;
|
||||
uint slot_offset = SHADOW_SPACE_SIZE / 4 + 4;
|
||||
for (Ins* instr = start_of_params; instr < end_of_params; ++instr, ++arg) {
|
||||
switch (arg->style) {
|
||||
case APS_Register: {
|
||||
Ref from = register_for_arg(arg->cls, reg_counter++);
|
||||
// If it's a struct at the IL level, we need to copy the register into
|
||||
// an alloca so we have something to point at (same for InlineOnStack).
|
||||
if (instr->op == Oparc) {
|
||||
arg->ref = newtmp("abi", Kl, func);
|
||||
emit(Ostorel, 0, R, arg->ref, instr->to);
|
||||
emit(Ocopy, instr->cls, arg->ref, from, R);
|
||||
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
|
||||
} else {
|
||||
emit(Ocopy, instr->cls, instr->to, from, R);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case APS_InlineOnStack:
|
||||
if (instr->op == Oparc) {
|
||||
arg->ref = newtmp("abi", Kl, func);
|
||||
emit(Ostorel, 0, R, arg->ref, instr->to);
|
||||
emit(Ocopy, instr->cls, arg->ref, SLOT(-slot_offset), R);
|
||||
emit(Oalloc8, Kl, instr->to, getcon(arg->size, func), R);
|
||||
} else {
|
||||
emit(Ocopy, Kl, instr->to, SLOT(-slot_offset), R);
|
||||
}
|
||||
slot_offset += 2;
|
||||
break;
|
||||
case APS_CopyAndPointerOnStack:
|
||||
emit(Oload, Kl, instr->to, SLOT(-slot_offset), R);
|
||||
slot_offset += 2;
|
||||
break;
|
||||
case APS_CopyAndPointerInRegister: {
|
||||
// Because this has to be a copy (that we own), it is sufficient to just
|
||||
// copy the register to the target.
|
||||
Ref from = register_for_arg(Kl, reg_counter++);
|
||||
emit(Ocopy, Kl, instr->to, from, R);
|
||||
break;
|
||||
}
|
||||
case APS_EnvTag:
|
||||
break;
|
||||
case APS_VarargsTag:
|
||||
case APS_Invalid:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
// If there was an `env`, it was passed in RAX, so copy it into the env ref.
|
||||
if (!req(R, env)) {
|
||||
emit(Ocopy, Kl, env, TMP(RAX), R);
|
||||
}
|
||||
|
||||
int num_created_instrs = &insb[NIns] - curi;
|
||||
int num_other_after_instrs = (int)(start_block->nins - num_params);
|
||||
int new_total_instrs = num_other_after_instrs + num_created_instrs;
|
||||
Ins* new_instrs = vnew(new_total_instrs, sizeof(Ins), PFn);
|
||||
Ins* instr_p = icpy(new_instrs, curi, num_created_instrs);
|
||||
icpy(instr_p, end_of_params, num_other_after_instrs);
|
||||
start_block->nins = new_total_instrs;
|
||||
start_block->ins = new_instrs;
|
||||
|
||||
return reg_usage;
|
||||
}
|
||||
|
||||
// The main job of this function is to lower generic instructions into the
|
||||
// specific details of how arguments are passed, and parameters are
|
||||
// interpreted for win x64. A useful reference is
|
||||
// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention .
|
||||
//
|
||||
// Some of the major differences from SysV if you're comparing the code
|
||||
// (non-exhaustive):
|
||||
// - only 4 int and 4 float regs are used
|
||||
// - when an int register is assigned a value, its associated float register is
|
||||
// left unused (and vice versa). i.e. there's only one counter as you assign
|
||||
// arguments to registers.
|
||||
// - any structs that aren't 1/2/4/8 bytes in size are passed by pointer, not
|
||||
// by copying them into the stack. So e.g. if you pass something like
|
||||
// `struct { void*, int64_t }` by value, it first needs to be copied to
|
||||
// another alloca (in order to maintain value semantics at the language
|
||||
// level), then the pointer to that copy is treated as a regular integer
|
||||
// argument (which then itself may *also* be copied to the stack in the case
|
||||
// there's no integer register remaining.)
|
||||
// - when calling a varargs functions, floating point values must be duplicated
|
||||
// integer registers. Along with the above restrictions, this makes varargs
|
||||
// handling simpler for the callee than SysV.
|
||||
void amd64_winabi_abi(Fn* func) {
|
||||
// The first thing to do is lower incoming parameters to this function.
|
||||
RegisterUsage param_reg_usage = lower_func_parameters(func);
|
||||
|
||||
// This is the second larger part of the job. We walk all blocks, and rewrite
|
||||
// instructions returns, calls, and handling of varargs into their win x64
|
||||
// specific versions. Any other instructions are just passed through unchanged
|
||||
// by using `emiti`.
|
||||
|
||||
// Skip over the entry block, and do it at the end so that our later
|
||||
// modifications can add allocations to the start block. In particular, we
|
||||
// need to add stack allocas for copies when structs are passed or returned by
|
||||
// value.
|
||||
ExtraAlloc* extra_alloc = NULL;
|
||||
for (Blk* block = func->start->link; block; block = block->link) {
|
||||
lower_args_for_block(func, block, ¶m_reg_usage, &extra_alloc);
|
||||
}
|
||||
lower_args_for_block(func, func->start, ¶m_reg_usage, &extra_alloc);
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After ABI lowering:\n");
|
||||
printfn(func, stderr);
|
||||
}
|
||||
}
|
||||
852
src/qbe/arm64/abi.c
Normal file
852
src/qbe/arm64/abi.c
Normal file
@@ -0,0 +1,852 @@
|
||||
#include "all.h"
|
||||
|
||||
typedef struct Abi Abi;
|
||||
typedef struct Class Class;
|
||||
typedef struct Insl Insl;
|
||||
typedef struct Params Params;
|
||||
|
||||
enum {
|
||||
Cstk = 1, /* pass on the stack */
|
||||
Cptr = 2, /* replaced by a pointer */
|
||||
};
|
||||
|
||||
struct Class {
|
||||
char class;
|
||||
char ishfa;
|
||||
struct {
|
||||
char base;
|
||||
uchar size;
|
||||
} hfa;
|
||||
uint size;
|
||||
uint align;
|
||||
Typ *t;
|
||||
uchar nreg;
|
||||
uchar ngp;
|
||||
uchar nfp;
|
||||
int reg[4];
|
||||
int cls[4];
|
||||
};
|
||||
|
||||
struct Insl {
|
||||
Ins i;
|
||||
Insl *link;
|
||||
};
|
||||
|
||||
struct Params {
|
||||
uint ngp;
|
||||
uint nfp;
|
||||
uint stk;
|
||||
};
|
||||
|
||||
static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
|
||||
static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
|
||||
static int store[] = {
|
||||
[Kw] = Ostorew, [Kl] = Ostorel,
|
||||
[Ks] = Ostores, [Kd] = Ostored
|
||||
};
|
||||
|
||||
/* layout of call's second argument (RCall)
|
||||
*
|
||||
* 13
|
||||
* 29 14 | 9 5 2 0
|
||||
* |0.00|x|x|xxxx|xxxx|xxx|xx| range
|
||||
* | | | | | ` gp regs returned (0..2)
|
||||
* | | | | ` fp regs returned (0..4)
|
||||
* | | | ` gp regs passed (0..8)
|
||||
* | | ` fp regs passed (0..8)
|
||||
* | ` indirect result register x8 used (0..1)
|
||||
* ` env pointer passed in x9 (0..1)
|
||||
*/
|
||||
|
||||
static int
|
||||
isfloatv(Typ *t, char *cls)
|
||||
{
|
||||
Field *f;
|
||||
uint n;
|
||||
|
||||
for (n=0; n<t->nunion; n++)
|
||||
for (f=t->fields[n]; f->type != FEnd; f++)
|
||||
switch (f->type) {
|
||||
case Fs:
|
||||
if (*cls == Kd)
|
||||
return 0;
|
||||
*cls = Ks;
|
||||
break;
|
||||
case Fd:
|
||||
if (*cls == Ks)
|
||||
return 0;
|
||||
*cls = Kd;
|
||||
break;
|
||||
case FTyp:
|
||||
if (isfloatv(&typ[f->len], cls))
|
||||
break;
|
||||
/* fall through */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
typclass(Class *c, Typ *t, int *gp, int *fp)
|
||||
{
|
||||
uint64_t sz, hfasz;
|
||||
uint n;
|
||||
|
||||
sz = (t->size + 7) & -8;
|
||||
c->t = t;
|
||||
c->class = 0;
|
||||
c->ngp = 0;
|
||||
c->nfp = 0;
|
||||
c->align = 8;
|
||||
|
||||
if (t->align > 3)
|
||||
err("alignments larger than 8 are not supported");
|
||||
|
||||
c->size = sz;
|
||||
c->hfa.base = Kx;
|
||||
c->ishfa = isfloatv(t, &c->hfa.base);
|
||||
hfasz = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
|
||||
c->ishfa &= !t->isdark && hfasz <= 4;
|
||||
c->hfa.size = hfasz;
|
||||
|
||||
if (c->ishfa) {
|
||||
for (n=0; n<hfasz; n++, c->nfp++) {
|
||||
c->reg[n] = *fp++;
|
||||
c->cls[n] = c->hfa.base;
|
||||
}
|
||||
c->nreg = n;
|
||||
}
|
||||
else if (t->isdark || sz > 16 || sz == 0) {
|
||||
/* large structs are replaced by a
|
||||
* pointer to some caller-allocated
|
||||
* memory */
|
||||
c->class |= Cptr;
|
||||
c->size = 8;
|
||||
c->ngp = 1;
|
||||
*c->reg = *gp;
|
||||
*c->cls = Kl;
|
||||
}
|
||||
else {
|
||||
for (n=0; n<sz/8; n++, c->ngp++) {
|
||||
c->reg[n] = *gp++;
|
||||
c->cls[n] = Kl;
|
||||
}
|
||||
c->nreg = n;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
|
||||
{
|
||||
uint n;
|
||||
uint64_t off;
|
||||
Ref r;
|
||||
|
||||
assert(nreg <= 4);
|
||||
off = 0;
|
||||
for (n=0; n<nreg; n++) {
|
||||
tmp[n] = newtmp("abi", cls[n], fn);
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(store[cls[n]], 0, R, tmp[n], r);
|
||||
emit(Oadd, Kl, r, mem, getcon(off, fn));
|
||||
off += KWIDE(cls[n]) ? 8 : 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* todo, may read out of bounds */
|
||||
static void
|
||||
ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
|
||||
{
|
||||
int i;
|
||||
uint64_t off;
|
||||
Ref r;
|
||||
|
||||
off = 0;
|
||||
for (i=0; i<n; i++) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Oload, cls[i], TMP(reg[i]), r, R);
|
||||
emit(Oadd, Kl, r, mem, getcon(off, fn));
|
||||
off += KWIDE(cls[i]) ? 8 : 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
selret(Blk *b, Fn *fn)
|
||||
{
|
||||
int j, k, cty;
|
||||
Ref r;
|
||||
Class cr;
|
||||
|
||||
j = b->jmp.type;
|
||||
|
||||
if (!isret(j) || j == Jret0)
|
||||
return;
|
||||
|
||||
r = b->jmp.arg;
|
||||
b->jmp.type = Jret0;
|
||||
|
||||
if (j == Jretc) {
|
||||
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
|
||||
if (cr.class & Cptr) {
|
||||
assert(rtype(fn->retr) == RTmp);
|
||||
emit(Oblit1, 0, R, INT(cr.t->size), R);
|
||||
emit(Oblit0, 0, R, r, fn->retr);
|
||||
cty = 0;
|
||||
} else {
|
||||
ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
|
||||
cty = (cr.nfp << 2) | cr.ngp;
|
||||
}
|
||||
} else {
|
||||
k = j - Jretw;
|
||||
if (KBASE(k) == 0) {
|
||||
emit(Ocopy, k, TMP(R0), r, R);
|
||||
cty = 1;
|
||||
} else {
|
||||
emit(Ocopy, k, TMP(V0), r, R);
|
||||
cty = 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
b->jmp.arg = CALL(cty);
|
||||
}
|
||||
|
||||
static int
|
||||
argsclass(Ins *i0, Ins *i1, Class *carg)
|
||||
{
|
||||
int va, envc, ngp, nfp, *gp, *fp;
|
||||
Class *c;
|
||||
Ins *i;
|
||||
|
||||
va = 0;
|
||||
envc = 0;
|
||||
gp = gpreg;
|
||||
fp = fpreg;
|
||||
ngp = 8;
|
||||
nfp = 8;
|
||||
for (i=i0, c=carg; i<i1; i++, c++)
|
||||
switch (i->op) {
|
||||
case Oargsb:
|
||||
case Oargub:
|
||||
case Oparsb:
|
||||
case Oparub:
|
||||
c->size = 1;
|
||||
goto Scalar;
|
||||
case Oargsh:
|
||||
case Oarguh:
|
||||
case Oparsh:
|
||||
case Oparuh:
|
||||
c->size = 2;
|
||||
goto Scalar;
|
||||
case Opar:
|
||||
case Oarg:
|
||||
c->size = 8;
|
||||
if (T.apple && !KWIDE(i->cls))
|
||||
c->size = 4;
|
||||
Scalar:
|
||||
c->align = c->size;
|
||||
*c->cls = i->cls;
|
||||
if (va) {
|
||||
c->class |= Cstk;
|
||||
c->size = 8;
|
||||
c->align = 8;
|
||||
break;
|
||||
}
|
||||
if (KBASE(i->cls) == 0 && ngp > 0) {
|
||||
ngp--;
|
||||
*c->reg = *gp++;
|
||||
break;
|
||||
}
|
||||
if (KBASE(i->cls) == 1 && nfp > 0) {
|
||||
nfp--;
|
||||
*c->reg = *fp++;
|
||||
break;
|
||||
}
|
||||
c->class |= Cstk;
|
||||
break;
|
||||
case Oparc:
|
||||
case Oargc:
|
||||
typclass(c, &typ[i->arg[0].val], gp, fp);
|
||||
if (c->ngp <= ngp) {
|
||||
if (c->nfp <= nfp) {
|
||||
ngp -= c->ngp;
|
||||
nfp -= c->nfp;
|
||||
gp += c->ngp;
|
||||
fp += c->nfp;
|
||||
break;
|
||||
} else
|
||||
nfp = 0;
|
||||
} else
|
||||
ngp = 0;
|
||||
c->class |= Cstk;
|
||||
break;
|
||||
case Opare:
|
||||
case Oarge:
|
||||
*c->reg = R9;
|
||||
*c->cls = Kl;
|
||||
envc = 1;
|
||||
break;
|
||||
case Oargv:
|
||||
va = T.apple != 0;
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
|
||||
return envc << 14 | (gp-gpreg) << 5 | (fp-fpreg) << 9;
|
||||
}
|
||||
|
||||
bits
|
||||
arm64_retregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int ngp, nfp;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
ngp = r.val & 3;
|
||||
nfp = (r.val >> 2) & 7;
|
||||
if (p) {
|
||||
p[0] = ngp;
|
||||
p[1] = nfp;
|
||||
}
|
||||
b = 0;
|
||||
while (ngp--)
|
||||
b |= BIT(R0+ngp);
|
||||
while (nfp--)
|
||||
b |= BIT(V0+nfp);
|
||||
return b;
|
||||
}
|
||||
|
||||
bits
|
||||
arm64_argregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int ngp, nfp, x8, x9;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
ngp = (r.val >> 5) & 15;
|
||||
nfp = (r.val >> 9) & 15;
|
||||
x8 = (r.val >> 13) & 1;
|
||||
x9 = (r.val >> 14) & 1;
|
||||
if (p) {
|
||||
p[0] = ngp + x8 + x9;
|
||||
p[1] = nfp;
|
||||
}
|
||||
b = 0;
|
||||
while (ngp--)
|
||||
b |= BIT(R0+ngp);
|
||||
while (nfp--)
|
||||
b |= BIT(V0+nfp);
|
||||
return b | ((bits)x8 << R8) | ((bits)x9 << R9);
|
||||
}
|
||||
|
||||
static void
|
||||
stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
|
||||
{
|
||||
Insl *il;
|
||||
int al;
|
||||
uint64_t sz;
|
||||
|
||||
il = alloc(sizeof *il);
|
||||
al = c->t->align - 2; /* NAlign == 3 */
|
||||
if (al < 0)
|
||||
al = 0;
|
||||
sz = c->class & Cptr ? c->t->size : c->size;
|
||||
il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
|
||||
il->link = *ilp;
|
||||
*ilp = il;
|
||||
}
|
||||
|
||||
static uint
|
||||
align(uint x, uint al)
|
||||
{
|
||||
return (x + al-1) & -al;
|
||||
}
|
||||
|
||||
static void
|
||||
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
|
||||
{
|
||||
Ins *i;
|
||||
Class *ca, *c, cr;
|
||||
int op, cty;
|
||||
uint n, stk, off;;
|
||||
Ref r, rstk, tmp[4];
|
||||
|
||||
ca = alloc((i1-i0) * sizeof ca[0]);
|
||||
cty = argsclass(i0, i1, ca);
|
||||
|
||||
stk = 0;
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (c->class & Cptr) {
|
||||
i->arg[0] = newtmp("abi", Kl, fn);
|
||||
stkblob(i->arg[0], c, fn, ilp);
|
||||
i->op = Oarg;
|
||||
}
|
||||
if (c->class & Cstk) {
|
||||
stk = align(stk, c->align);
|
||||
stk += c->size;
|
||||
}
|
||||
}
|
||||
stk = align(stk, 16);
|
||||
rstk = getcon(stk, fn);
|
||||
if (stk)
|
||||
emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
|
||||
|
||||
if (!req(i1->arg[1], R)) {
|
||||
typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
|
||||
stkblob(i1->to, &cr, fn, ilp);
|
||||
cty |= (cr.nfp << 2) | cr.ngp;
|
||||
if (cr.class & Cptr) {
|
||||
/* spill & rega expect calls to be
|
||||
* followed by copies from regs,
|
||||
* so we emit a dummy
|
||||
*/
|
||||
cty |= 1 << 13 | 1;
|
||||
emit(Ocopy, Kw, R, TMP(R0), R);
|
||||
} else {
|
||||
sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
|
||||
for (n=0; n<cr.nreg; n++) {
|
||||
r = TMP(cr.reg[n]);
|
||||
emit(Ocopy, cr.cls[n], tmp[n], r, R);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (KBASE(i1->cls) == 0) {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
|
||||
cty |= 1;
|
||||
} else {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
|
||||
cty |= 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
emit(Ocall, 0, R, i1->arg[0], CALL(cty));
|
||||
|
||||
if (cty & (1 << 13))
|
||||
/* struct return argument */
|
||||
emit(Ocopy, Kl, TMP(R8), i1->to, R);
|
||||
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if ((c->class & Cstk) != 0)
|
||||
continue;
|
||||
if (i->op == Oarg || i->op == Oarge || isargbh(i->op))
|
||||
emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
|
||||
if (i->op == Oargc)
|
||||
ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
|
||||
}
|
||||
|
||||
/* populate the stack */
|
||||
off = 0;
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if ((c->class & Cstk) == 0)
|
||||
continue;
|
||||
off = align(off, c->align);
|
||||
r = newtmp("abi", Kl, fn);
|
||||
if (i->op == Oarg || isargbh(i->op)) {
|
||||
switch (c->size) {
|
||||
case 1: op = Ostoreb; break;
|
||||
case 2: op = Ostoreh; break;
|
||||
case 4:
|
||||
case 8: op = store[*c->cls]; break;
|
||||
default: die("unreachable");
|
||||
}
|
||||
emit(op, 0, R, i->arg[0], r);
|
||||
} else {
|
||||
assert(i->op == Oargc);
|
||||
emit(Oblit1, 0, R, INT(c->size), R);
|
||||
emit(Oblit0, 0, R, i->arg[1], r);
|
||||
}
|
||||
emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
|
||||
off += c->size;
|
||||
}
|
||||
if (stk)
|
||||
emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
|
||||
|
||||
for (i=i0, c=ca; i<i1; i++, c++)
|
||||
if (c->class & Cptr) {
|
||||
emit(Oblit1, 0, R, INT(c->t->size), R);
|
||||
emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
|
||||
}
|
||||
}
|
||||
|
||||
static Params
|
||||
selpar(Fn *fn, Ins *i0, Ins *i1)
|
||||
{
|
||||
Class *ca, *c, cr;
|
||||
Insl *il;
|
||||
Ins *i;
|
||||
int op, n, cty;
|
||||
uint off;
|
||||
Ref r, tmp[16], *t;
|
||||
|
||||
ca = alloc((i1-i0) * sizeof ca[0]);
|
||||
curi = &insb[NIns];
|
||||
|
||||
cty = argsclass(i0, i1, ca);
|
||||
fn->reg = arm64_argregs(CALL(cty), 0);
|
||||
|
||||
il = 0;
|
||||
t = tmp;
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (i->op != Oparc || (c->class & (Cptr|Cstk)))
|
||||
continue;
|
||||
sttmps(t, c->cls, c->nreg, i->to, fn);
|
||||
stkblob(i->to, c, fn, &il);
|
||||
t += c->nreg;
|
||||
}
|
||||
for (; il; il=il->link)
|
||||
emiti(il->i);
|
||||
|
||||
if (fn->retty >= 0) {
|
||||
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
|
||||
if (cr.class & Cptr) {
|
||||
fn->retr = newtmp("abi", Kl, fn);
|
||||
emit(Ocopy, Kl, fn->retr, TMP(R8), R);
|
||||
fn->reg |= BIT(R8);
|
||||
}
|
||||
}
|
||||
|
||||
t = tmp;
|
||||
off = 0;
|
||||
for (i=i0, c=ca; i<i1; i++, c++)
|
||||
if (i->op == Oparc && !(c->class & Cptr)) {
|
||||
if (c->class & Cstk) {
|
||||
off = align(off, c->align);
|
||||
fn->tmp[i->to.val].slot = -(off+2);
|
||||
off += c->size;
|
||||
} else
|
||||
for (n=0; n<c->nreg; n++) {
|
||||
r = TMP(c->reg[n]);
|
||||
emit(Ocopy, c->cls[n], *t++, r, R);
|
||||
}
|
||||
} else if (c->class & Cstk) {
|
||||
off = align(off, c->align);
|
||||
if (isparbh(i->op))
|
||||
op = Oloadsb + (i->op - Oparsb);
|
||||
else
|
||||
op = Oload;
|
||||
emit(op, *c->cls, i->to, SLOT(-(off+2)), R);
|
||||
off += c->size;
|
||||
} else {
|
||||
emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
|
||||
}
|
||||
|
||||
return (Params){
|
||||
.stk = align(off, 8),
|
||||
.ngp = (cty >> 5) & 15,
|
||||
.nfp = (cty >> 9) & 15
|
||||
};
|
||||
}
|
||||
|
||||
static Blk *
|
||||
split(Fn *fn, Blk *b)
|
||||
{
|
||||
Blk *bn;
|
||||
|
||||
++fn->nblk;
|
||||
bn = newblk();
|
||||
idup(bn, curi, &insb[NIns]-curi);
|
||||
curi = &insb[NIns];
|
||||
bn->visit = ++b->visit;
|
||||
strf(bn->name, "%s.%d", b->name, b->visit);
|
||||
bn->loop = b->loop;
|
||||
bn->link = b->link;
|
||||
b->link = bn;
|
||||
return bn;
|
||||
}
|
||||
|
||||
static void
|
||||
chpred(Blk *b, Blk *bp, Blk *bp1)
|
||||
{
|
||||
Phi *p;
|
||||
uint a;
|
||||
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
for (a=0; p->blk[a]!=bp; a++)
|
||||
assert(a+1<p->narg);
|
||||
p->blk[a] = bp1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
apple_selvaarg(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
Ref ap, stk, stk8, c8;
|
||||
|
||||
(void)b;
|
||||
c8 = getcon(8, fn);
|
||||
ap = i->arg[0];
|
||||
stk8 = newtmp("abi", Kl, fn);
|
||||
stk = newtmp("abi", Kl, fn);
|
||||
|
||||
emit(Ostorel, 0, R, stk8, ap);
|
||||
emit(Oadd, Kl, stk8, stk, c8);
|
||||
emit(Oload, i->cls, i->to, stk, R);
|
||||
emit(Oload, Kl, stk, ap, R);
|
||||
}
|
||||
|
||||
static void
|
||||
arm64_selvaarg(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
|
||||
Blk *b0, *bstk, *breg;
|
||||
int isgp;
|
||||
|
||||
c8 = getcon(8, fn);
|
||||
c16 = getcon(16, fn);
|
||||
c24 = getcon(24, fn);
|
||||
c28 = getcon(28, fn);
|
||||
ap = i->arg[0];
|
||||
isgp = KBASE(i->cls) == 0;
|
||||
|
||||
/* @b [...]
|
||||
r0 =l add ap, (24 or 28)
|
||||
nr =l loadsw r0
|
||||
r1 =w csltw nr, 0
|
||||
jnz r1, @breg, @bstk
|
||||
@breg
|
||||
r0 =l add ap, (8 or 16)
|
||||
r1 =l loadl r0
|
||||
lreg =l add r1, nr
|
||||
r0 =w add nr, (8 or 16)
|
||||
r1 =l add ap, (24 or 28)
|
||||
storew r0, r1
|
||||
@bstk
|
||||
lstk =l loadl ap
|
||||
r0 =l add lstk, 8
|
||||
storel r0, ap
|
||||
@b0
|
||||
%loc =l phi @breg %lreg, @bstk %lstk
|
||||
i->to =(i->cls) load %loc
|
||||
*/
|
||||
|
||||
loc = newtmp("abi", Kl, fn);
|
||||
emit(Oload, i->cls, i->to, loc, R);
|
||||
b0 = split(fn, b);
|
||||
b0->jmp = b->jmp;
|
||||
b0->s1 = b->s1;
|
||||
b0->s2 = b->s2;
|
||||
if (b->s1)
|
||||
chpred(b->s1, b, b0);
|
||||
if (b->s2 && b->s2 != b->s1)
|
||||
chpred(b->s2, b, b0);
|
||||
|
||||
lreg = newtmp("abi", Kl, fn);
|
||||
nr = newtmp("abi", Kl, fn);
|
||||
r0 = newtmp("abi", Kw, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, r0, r1);
|
||||
emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
|
||||
emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Oadd, Kl, lreg, r1, nr);
|
||||
emit(Oload, Kl, r1, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
|
||||
breg = split(fn, b);
|
||||
breg->jmp.type = Jjmp;
|
||||
breg->s1 = b0;
|
||||
|
||||
lstk = newtmp("abi", Kl, fn);
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r0, ap);
|
||||
emit(Oadd, Kl, r0, lstk, c8);
|
||||
emit(Oload, Kl, lstk, ap, R);
|
||||
bstk = split(fn, b);
|
||||
bstk->jmp.type = Jjmp;
|
||||
bstk->s1 = b0;
|
||||
|
||||
b0->phi = alloc(sizeof *b0->phi);
|
||||
*b0->phi = (Phi){
|
||||
.cls = Kl, .to = loc,
|
||||
.narg = 2,
|
||||
.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
|
||||
.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
|
||||
};
|
||||
b0->phi->blk[0] = bstk;
|
||||
b0->phi->blk[1] = breg;
|
||||
b0->phi->arg[0] = lstk;
|
||||
b0->phi->arg[1] = lreg;
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kw, fn);
|
||||
b->jmp.type = Jjnz;
|
||||
b->jmp.arg = r1;
|
||||
b->s1 = breg;
|
||||
b->s2 = bstk;
|
||||
emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
|
||||
emit(Oloadsw, Kl, nr, r0, R);
|
||||
emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
|
||||
}
|
||||
|
||||
static void
|
||||
apple_selvastart(Fn *fn, Params p, Ref ap)
|
||||
{
|
||||
Ref off, stk, arg;
|
||||
|
||||
off = getcon(p.stk, fn);
|
||||
stk = newtmp("abi", Kl, fn);
|
||||
arg = newtmp("abi", Kl, fn);
|
||||
|
||||
emit(Ostorel, 0, R, arg, ap);
|
||||
emit(Oadd, Kl, arg, stk, off);
|
||||
emit(Oaddr, Kl, stk, SLOT(-1), R);
|
||||
}
|
||||
|
||||
static void
|
||||
arm64_selvastart(Fn *fn, Params p, Ref ap)
|
||||
{
|
||||
Ref r0, r1, rsave;
|
||||
|
||||
rsave = newtmp("abi", Kl, fn);
|
||||
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r0, ap);
|
||||
emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn));
|
||||
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, rsave, getcon(64, fn));
|
||||
emit(Oadd, Kl, r0, ap, getcon(8, fn));
|
||||
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, r1, r0);
|
||||
emit(Oadd, Kl, r1, rsave, getcon(192, fn));
|
||||
emit(Oaddr, Kl, rsave, SLOT(-1), R);
|
||||
emit(Oadd, Kl, r0, ap, getcon(16, fn));
|
||||
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
|
||||
emit(Oadd, Kl, r0, ap, getcon(24, fn));
|
||||
|
||||
r0 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
|
||||
emit(Oadd, Kl, r0, ap, getcon(28, fn));
|
||||
}
|
||||
|
||||
void
|
||||
arm64_abi(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i, *i0;
|
||||
Insl *il;
|
||||
int n0, n1, ioff;
|
||||
Params p;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->visit = 0;
|
||||
|
||||
/* lower parameters */
|
||||
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (!ispar(i->op))
|
||||
break;
|
||||
p = selpar(fn, b->ins, i);
|
||||
n0 = &insb[NIns] - curi;
|
||||
ioff = i - b->ins;
|
||||
n1 = b->nins - ioff;
|
||||
vgrow(&b->ins, n0+n1);
|
||||
icpy(b->ins+n0, b->ins+ioff, n1);
|
||||
icpy(b->ins, curi, n0);
|
||||
b->nins = n0+n1;
|
||||
|
||||
/* lower calls, returns, and vararg instructions */
|
||||
il = 0;
|
||||
b = fn->start;
|
||||
do {
|
||||
if (!(b = b->link))
|
||||
b = fn->start; /* do it last */
|
||||
if (b->visit)
|
||||
continue;
|
||||
curi = &insb[NIns];
|
||||
selret(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;)
|
||||
switch ((--i)->op) {
|
||||
default:
|
||||
emiti(*i);
|
||||
break;
|
||||
case Ocall:
|
||||
for (i0=i; i0>b->ins; i0--)
|
||||
if (!isarg((i0-1)->op))
|
||||
break;
|
||||
selcall(fn, i0, i, &il);
|
||||
i = i0;
|
||||
break;
|
||||
case Ovastart:
|
||||
if (T.apple)
|
||||
apple_selvastart(fn, p, i->arg[0]);
|
||||
else
|
||||
arm64_selvastart(fn, p, i->arg[0]);
|
||||
break;
|
||||
case Ovaarg:
|
||||
if (T.apple)
|
||||
apple_selvaarg(fn, b, i);
|
||||
else
|
||||
arm64_selvaarg(fn, b, i);
|
||||
break;
|
||||
case Oarg:
|
||||
case Oargc:
|
||||
die("unreachable");
|
||||
}
|
||||
if (b == fn->start)
|
||||
for (; il; il=il->link)
|
||||
emiti(il->i);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
} while (b != fn->start);
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After ABI lowering:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
|
||||
/* abi0 for apple target; introduces
|
||||
* necessary sign extensions in calls
|
||||
* and returns
|
||||
*/
|
||||
void
|
||||
apple_extsb(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i0, *i1, *i;
|
||||
int j, op;
|
||||
Ref r;
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
curi = &insb[NIns];
|
||||
j = b->jmp.type;
|
||||
if (isretbh(j)) {
|
||||
r = newtmp("abi", Kw, fn);
|
||||
op = Oextsb + (j - Jretsb);
|
||||
emit(op, Kw, r, b->jmp.arg, R);
|
||||
b->jmp.arg = r;
|
||||
b->jmp.type = Jretw;
|
||||
}
|
||||
for (i=&b->ins[b->nins]; i>b->ins;) {
|
||||
emiti(*--i);
|
||||
if (i->op != Ocall)
|
||||
continue;
|
||||
for (i0=i1=i; i0>b->ins; i0--)
|
||||
if (!isarg((i0-1)->op))
|
||||
break;
|
||||
for (i=i1; i>i0;) {
|
||||
emiti(*--i);
|
||||
if (isargbh(i->op)) {
|
||||
i->to = newtmp("abi", Kl, fn);
|
||||
curi->arg[0] = i->to;
|
||||
}
|
||||
}
|
||||
for (i=i1; i>i0;)
|
||||
if (isargbh((--i)->op)) {
|
||||
op = Oextsb + (i->op - Oargsb);
|
||||
emit(op, Kw, i->to, i->arg[0], R);
|
||||
}
|
||||
}
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After Apple pre-ABI:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
38
src/qbe/arm64/all.h
Normal file
38
src/qbe/arm64/all.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "../all.h"
|
||||
|
||||
enum Arm64Reg {
|
||||
R0 = RXX + 1,
|
||||
R1, R2, R3, R4, R5, R6, R7,
|
||||
R8, R9, R10, R11, R12, R13, R14, R15,
|
||||
IP0, IP1, R18, R19, R20, R21, R22, R23,
|
||||
R24, R25, R26, R27, R28, FP, LR, SP,
|
||||
|
||||
V0, V1, V2, V3, V4, V5, V6, V7,
|
||||
V8, V9, V10, V11, V12, V13, V14, V15,
|
||||
V16, V17, V18, V19, V20, V21, V22, V23,
|
||||
V24, V25, V26, V27, V28, V29, V30, /* V31, */
|
||||
|
||||
NFPR = V30 - V0 + 1,
|
||||
NGPR = SP - R0 + 1,
|
||||
NGPS = R18 - R0 + 1 /* LR */ + 1,
|
||||
NFPS = (V7 - V0 + 1) + (V30 - V16 + 1),
|
||||
NCLR = (R28 - R19 + 1) + (V15 - V8 + 1),
|
||||
};
|
||||
MAKESURE(reg_not_tmp, V30 < (int)Tmp0);
|
||||
|
||||
/* targ.c */
|
||||
extern int arm64_rsave[];
|
||||
extern int arm64_rclob[];
|
||||
|
||||
/* abi.c */
|
||||
bits arm64_retregs(Ref, int[2]);
|
||||
bits arm64_argregs(Ref, int[2]);
|
||||
void arm64_abi(Fn *);
|
||||
void apple_extsb(Fn *);
|
||||
|
||||
/* isel.c */
|
||||
int arm64_logimm(uint64_t, int);
|
||||
void arm64_isel(Fn *);
|
||||
|
||||
/* emit.c */
|
||||
void arm64_emitfn(Fn *, FILE *);
|
||||
679
src/qbe/arm64/emit.c
Normal file
679
src/qbe/arm64/emit.c
Normal file
@@ -0,0 +1,679 @@
|
||||
#include "all.h"
|
||||
|
||||
typedef struct E E;
|
||||
|
||||
struct E {
|
||||
FILE *f;
|
||||
Fn *fn;
|
||||
uint64_t frame;
|
||||
uint padding;
|
||||
};
|
||||
|
||||
#define CMP(X) \
|
||||
X(Cieq, "eq") \
|
||||
X(Cine, "ne") \
|
||||
X(Cisge, "ge") \
|
||||
X(Cisgt, "gt") \
|
||||
X(Cisle, "le") \
|
||||
X(Cislt, "lt") \
|
||||
X(Ciuge, "cs") \
|
||||
X(Ciugt, "hi") \
|
||||
X(Ciule, "ls") \
|
||||
X(Ciult, "cc") \
|
||||
X(NCmpI+Cfeq, "eq") \
|
||||
X(NCmpI+Cfge, "ge") \
|
||||
X(NCmpI+Cfgt, "gt") \
|
||||
X(NCmpI+Cfle, "ls") \
|
||||
X(NCmpI+Cflt, "mi") \
|
||||
X(NCmpI+Cfne, "ne") \
|
||||
X(NCmpI+Cfo, "vc") \
|
||||
X(NCmpI+Cfuo, "vs")
|
||||
|
||||
enum {
|
||||
Ki = -1, /* matches Kw and Kl */
|
||||
Ka = -2, /* matches all classes */
|
||||
};
|
||||
|
||||
static struct {
|
||||
short op;
|
||||
short cls;
|
||||
char *fmt;
|
||||
} omap[] = {
|
||||
{ Oadd, Ki, "add %=, %0, %1" },
|
||||
{ Oadd, Ka, "fadd %=, %0, %1" },
|
||||
{ Osub, Ki, "sub %=, %0, %1" },
|
||||
{ Osub, Ka, "fsub %=, %0, %1" },
|
||||
{ Oneg, Ki, "neg %=, %0" },
|
||||
{ Oneg, Ka, "fneg %=, %0" },
|
||||
{ Oand, Ki, "and %=, %0, %1" },
|
||||
{ Oor, Ki, "orr %=, %0, %1" },
|
||||
{ Oxor, Ki, "eor %=, %0, %1" },
|
||||
{ Osar, Ki, "asr %=, %0, %1" },
|
||||
{ Oshr, Ki, "lsr %=, %0, %1" },
|
||||
{ Oshl, Ki, "lsl %=, %0, %1" },
|
||||
{ Omul, Ki, "mul %=, %0, %1" },
|
||||
{ Omul, Ka, "fmul %=, %0, %1" },
|
||||
{ Odiv, Ki, "sdiv %=, %0, %1" },
|
||||
{ Odiv, Ka, "fdiv %=, %0, %1" },
|
||||
{ Oudiv, Ki, "udiv %=, %0, %1" },
|
||||
{ Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
|
||||
{ Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
|
||||
{ Ocopy, Ki, "mov %=, %0" },
|
||||
{ Ocopy, Ka, "fmov %=, %0" },
|
||||
{ Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
|
||||
{ Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
|
||||
{ Ostoreb, Kw, "strb %W0, %M1" },
|
||||
{ Ostoreh, Kw, "strh %W0, %M1" },
|
||||
{ Ostorew, Kw, "str %W0, %M1" },
|
||||
{ Ostorel, Kw, "str %L0, %M1" },
|
||||
{ Ostores, Kw, "str %S0, %M1" },
|
||||
{ Ostored, Kw, "str %D0, %M1" },
|
||||
{ Oloadsb, Ki, "ldrsb %=, %M0" },
|
||||
{ Oloadub, Ki, "ldrb %W=, %M0" },
|
||||
{ Oloadsh, Ki, "ldrsh %=, %M0" },
|
||||
{ Oloaduh, Ki, "ldrh %W=, %M0" },
|
||||
{ Oloadsw, Kw, "ldr %=, %M0" },
|
||||
{ Oloadsw, Kl, "ldrsw %=, %M0" },
|
||||
{ Oloaduw, Ki, "ldr %W=, %M0" },
|
||||
{ Oload, Ka, "ldr %=, %M0" },
|
||||
{ Oextsb, Ki, "sxtb %=, %W0" },
|
||||
{ Oextub, Ki, "uxtb %W=, %W0" },
|
||||
{ Oextsh, Ki, "sxth %=, %W0" },
|
||||
{ Oextuh, Ki, "uxth %W=, %W0" },
|
||||
{ Oextsw, Ki, "sxtw %L=, %W0" },
|
||||
{ Oextuw, Ki, "mov %W=, %W0" },
|
||||
{ Oexts, Kd, "fcvt %=, %S0" },
|
||||
{ Otruncd, Ks, "fcvt %=, %D0" },
|
||||
{ Ocast, Kw, "fmov %=, %S0" },
|
||||
{ Ocast, Kl, "fmov %=, %D0" },
|
||||
{ Ocast, Ks, "fmov %=, %W0" },
|
||||
{ Ocast, Kd, "fmov %=, %L0" },
|
||||
{ Ostosi, Ka, "fcvtzs %=, %S0" },
|
||||
{ Ostoui, Ka, "fcvtzu %=, %S0" },
|
||||
{ Odtosi, Ka, "fcvtzs %=, %D0" },
|
||||
{ Odtoui, Ka, "fcvtzu %=, %D0" },
|
||||
{ Oswtof, Ka, "scvtf %=, %W0" },
|
||||
{ Ouwtof, Ka, "ucvtf %=, %W0" },
|
||||
{ Osltof, Ka, "scvtf %=, %L0" },
|
||||
{ Oultof, Ka, "ucvtf %=, %L0" },
|
||||
{ Ocall, Kw, "blr %L0" },
|
||||
|
||||
{ Oacmp, Ki, "cmp %0, %1" },
|
||||
{ Oacmn, Ki, "cmn %0, %1" },
|
||||
{ Oafcmp, Ka, "fcmpe %0, %1" },
|
||||
|
||||
#define X(c, str) \
|
||||
{ Oflag+c, Ki, "cset %=, " str },
|
||||
CMP(X)
|
||||
#undef X
|
||||
{ NOp, 0, 0 }
|
||||
};
|
||||
|
||||
enum {
|
||||
V31 = 0x1fffffff, /* local name for V31 */
|
||||
};
|
||||
|
||||
static char *
|
||||
rname(int r, int k)
|
||||
{
|
||||
static char buf[4];
|
||||
|
||||
if (r == SP) {
|
||||
assert(k == Kl);
|
||||
sprintf(buf, "sp");
|
||||
}
|
||||
else if (R0 <= r && r <= LR)
|
||||
switch (k) {
|
||||
default: die("invalid class");
|
||||
case Kw: sprintf(buf, "w%d", r-R0); break;
|
||||
case Kx:
|
||||
case Kl: sprintf(buf, "x%d", r-R0); break;
|
||||
}
|
||||
else if (V0 <= r && r <= V30)
|
||||
switch (k) {
|
||||
default: die("invalid class");
|
||||
case Ks: sprintf(buf, "s%d", r-V0); break;
|
||||
case Kx:
|
||||
case Kd: sprintf(buf, "d%d", r-V0); break;
|
||||
}
|
||||
else if (r == V31)
|
||||
switch (k) {
|
||||
default: die("invalid class");
|
||||
case Ks: sprintf(buf, "s31"); break;
|
||||
case Kd: sprintf(buf, "d31"); break;
|
||||
}
|
||||
else
|
||||
die("invalid register");
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
slot(Ref r, E *e)
|
||||
{
|
||||
int s;
|
||||
|
||||
s = rsval(r);
|
||||
if (s == -1)
|
||||
return 16 + e->frame;
|
||||
if (s < 0) {
|
||||
if (e->fn->vararg && !T.apple)
|
||||
return 16 + e->frame + 192 - (s+2);
|
||||
else
|
||||
return 16 + e->frame - (s+2);
|
||||
} else
|
||||
return 16 + e->padding + 4 * s;
|
||||
}
|
||||
|
||||
static void
|
||||
emitf(char *s, Ins *i, E *e)
|
||||
{
|
||||
Ref r;
|
||||
int k, c;
|
||||
Con *pc;
|
||||
uint64_t n;
|
||||
uint sp;
|
||||
|
||||
fputc('\t', e->f);
|
||||
|
||||
sp = 0;
|
||||
for (;;) {
|
||||
k = i->cls;
|
||||
while ((c = *s++) != '%')
|
||||
if (c == ' ' && !sp) {
|
||||
fputc('\t', e->f);
|
||||
sp = 1;
|
||||
} else if (!c) {
|
||||
fputc('\n', e->f);
|
||||
return;
|
||||
} else
|
||||
fputc(c, e->f);
|
||||
Switch:
|
||||
switch ((c = *s++)) {
|
||||
default:
|
||||
die("invalid escape");
|
||||
case 'W':
|
||||
k = Kw;
|
||||
goto Switch;
|
||||
case 'L':
|
||||
k = Kl;
|
||||
goto Switch;
|
||||
case 'S':
|
||||
k = Ks;
|
||||
goto Switch;
|
||||
case 'D':
|
||||
k = Kd;
|
||||
goto Switch;
|
||||
case '?':
|
||||
if (KBASE(k) == 0)
|
||||
fputs(rname(IP1, k), e->f);
|
||||
else
|
||||
fputs(rname(V31, k), e->f);
|
||||
break;
|
||||
case '=':
|
||||
case '0':
|
||||
r = c == '=' ? i->to : i->arg[0];
|
||||
assert(isreg(r) || req(r, TMP(V31)));
|
||||
fputs(rname(r.val, k), e->f);
|
||||
break;
|
||||
case '1':
|
||||
r = i->arg[1];
|
||||
switch (rtype(r)) {
|
||||
default:
|
||||
die("invalid second argument");
|
||||
case RTmp:
|
||||
assert(isreg(r));
|
||||
fputs(rname(r.val, k), e->f);
|
||||
break;
|
||||
case RCon:
|
||||
pc = &e->fn->con[r.val];
|
||||
n = pc->bits.i;
|
||||
assert(pc->type == CBits);
|
||||
if (n >> 24) {
|
||||
assert(arm64_logimm(n, k));
|
||||
fprintf(e->f, "#%"PRIu64, n);
|
||||
} else if (n & 0xfff000) {
|
||||
assert(!(n & ~0xfff000ull));
|
||||
fprintf(e->f, "#%"PRIu64", lsl #12",
|
||||
n>>12);
|
||||
} else {
|
||||
assert(!(n & ~0xfffull));
|
||||
fprintf(e->f, "#%"PRIu64, n);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'M':
|
||||
c = *s++;
|
||||
assert(c == '0' || c == '1' || c == '=');
|
||||
r = c == '=' ? i->to : i->arg[c - '0'];
|
||||
switch (rtype(r)) {
|
||||
default:
|
||||
die("todo (arm emit): unhandled ref");
|
||||
case RTmp:
|
||||
assert(isreg(r));
|
||||
fprintf(e->f, "[%s]", rname(r.val, Kl));
|
||||
break;
|
||||
case RSlot:
|
||||
fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
loadaddr(Con *c, char *rn, E *e)
|
||||
{
|
||||
char *p, *l, *s;
|
||||
|
||||
switch (c->sym.type) {
|
||||
default:
|
||||
die("unreachable");
|
||||
case SGlo:
|
||||
if (T.apple)
|
||||
s = "\tadrp\tR, S@pageO\n"
|
||||
"\tadd\tR, R, S@pageoffO\n";
|
||||
else
|
||||
s = "\tadrp\tR, SO\n"
|
||||
"\tadd\tR, R, #:lo12:SO\n";
|
||||
break;
|
||||
case SThr:
|
||||
if (T.apple)
|
||||
s = "\tadrp\tR, S@tlvppage\n"
|
||||
"\tldr\tR, [R, S@tlvppageoff]\n";
|
||||
else
|
||||
s = "\tmrs\tR, tpidr_el0\n"
|
||||
"\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n"
|
||||
"\tadd\tR, R, #:tprel_lo12_nc:SO\n";
|
||||
break;
|
||||
}
|
||||
|
||||
l = str(c->sym.id);
|
||||
p = l[0] == '"' ? "" : T.assym;
|
||||
for (; *s; s++)
|
||||
switch (*s) {
|
||||
default:
|
||||
fputc(*s, e->f);
|
||||
break;
|
||||
case 'R':
|
||||
fputs(rn, e->f);
|
||||
break;
|
||||
case 'S':
|
||||
fputs(p, e->f);
|
||||
fputs(l, e->f);
|
||||
break;
|
||||
case 'O':
|
||||
if (c->bits.i)
|
||||
/* todo, handle large offsets */
|
||||
fprintf(e->f, "+%"PRIi64, c->bits.i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
loadcon(Con *c, int r, int k, E *e)
|
||||
{
|
||||
char *rn;
|
||||
int64_t n;
|
||||
int w, sh;
|
||||
|
||||
w = KWIDE(k);
|
||||
rn = rname(r, k);
|
||||
n = c->bits.i;
|
||||
if (c->type == CAddr) {
|
||||
rn = rname(r, Kl);
|
||||
loadaddr(c, rn, e);
|
||||
return;
|
||||
}
|
||||
assert(c->type == CBits);
|
||||
if (!w)
|
||||
n = (int32_t)n;
|
||||
if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
|
||||
fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
|
||||
} else {
|
||||
fprintf(e->f, "\tmov\t%s, #%d\n",
|
||||
rn, (int)(n & 0xffff));
|
||||
for (sh=16; n>>=16; sh+=16) {
|
||||
if ((!w && sh == 32) || sh == 64)
|
||||
break;
|
||||
fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
|
||||
rn, (uint)(n & 0xffff), sh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emitins(Ins *, E *);
|
||||
|
||||
static int
|
||||
fixarg(Ref *pr, int sz, int t, E *e)
|
||||
{
|
||||
Ins *i;
|
||||
Ref r;
|
||||
uint64_t s;
|
||||
|
||||
r = *pr;
|
||||
if (rtype(r) == RSlot) {
|
||||
s = slot(r, e);
|
||||
if (s > sz * 4095u) {
|
||||
if (t < 0)
|
||||
return 1;
|
||||
i = &(Ins){Oaddr, Kl, TMP(t), {r}};
|
||||
emitins(i, e);
|
||||
*pr = TMP(t);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
emitins(Ins *i, E *e)
|
||||
{
|
||||
char *l, *p, *rn;
|
||||
uint64_t s;
|
||||
int o, t;
|
||||
Ref r;
|
||||
Con *c;
|
||||
|
||||
switch (i->op) {
|
||||
default:
|
||||
if (isload(i->op))
|
||||
fixarg(&i->arg[0], loadsz(i), IP1, e);
|
||||
if (isstore(i->op)) {
|
||||
t = T.apple ? -1 : R18;
|
||||
if (fixarg(&i->arg[1], storesz(i), t, e)) {
|
||||
if (req(i->arg[0], TMP(IP1))) {
|
||||
fprintf(e->f,
|
||||
"\tfmov\t%c31, %c17\n",
|
||||
"ds"[i->cls == Kw],
|
||||
"xw"[i->cls == Kw]);
|
||||
i->arg[0] = TMP(V31);
|
||||
i->op = Ostores + (i->cls-Kw);
|
||||
}
|
||||
fixarg(&i->arg[1], storesz(i), IP1, e);
|
||||
}
|
||||
}
|
||||
Table:
|
||||
/* most instructions are just pulled out of
|
||||
* the table omap[], some special cases are
|
||||
* detailed below */
|
||||
for (o=0;; o++) {
|
||||
/* this linear search should really be a binary
|
||||
* search */
|
||||
if (omap[o].op == NOp)
|
||||
die("no match for %s(%c)",
|
||||
optab[i->op].name, "wlsd"[i->cls]);
|
||||
if (omap[o].op == i->op)
|
||||
if (omap[o].cls == i->cls || omap[o].cls == Ka
|
||||
|| (omap[o].cls == Ki && KBASE(i->cls) == 0))
|
||||
break;
|
||||
}
|
||||
emitf(omap[o].fmt, i, e);
|
||||
break;
|
||||
case Onop:
|
||||
break;
|
||||
case Ocopy:
|
||||
if (req(i->to, i->arg[0]))
|
||||
break;
|
||||
if (rtype(i->to) == RSlot) {
|
||||
r = i->to;
|
||||
if (!isreg(i->arg[0])) {
|
||||
i->to = TMP(IP1);
|
||||
emitins(i, e);
|
||||
i->arg[0] = i->to;
|
||||
}
|
||||
i->op = Ostorew + i->cls;
|
||||
i->cls = Kw;
|
||||
i->arg[1] = r;
|
||||
emitins(i, e);
|
||||
break;
|
||||
}
|
||||
assert(isreg(i->to));
|
||||
switch (rtype(i->arg[0])) {
|
||||
case RCon:
|
||||
c = &e->fn->con[i->arg[0].val];
|
||||
loadcon(c, i->to.val, i->cls, e);
|
||||
break;
|
||||
case RSlot:
|
||||
i->op = Oload;
|
||||
emitins(i, e);
|
||||
break;
|
||||
default:
|
||||
assert(i->to.val != IP1);
|
||||
goto Table;
|
||||
}
|
||||
break;
|
||||
case Oaddr:
|
||||
assert(rtype(i->arg[0]) == RSlot);
|
||||
rn = rname(i->to.val, Kl);
|
||||
s = slot(i->arg[0], e);
|
||||
if (s <= 4095)
|
||||
fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s);
|
||||
else if (s <= 65535)
|
||||
fprintf(e->f,
|
||||
"\tmov\t%s, #%"PRIu64"\n"
|
||||
"\tadd\t%s, x29, %s\n",
|
||||
rn, s, rn, rn
|
||||
);
|
||||
else
|
||||
fprintf(e->f,
|
||||
"\tmov\t%s, #%"PRIu64"\n"
|
||||
"\tmovk\t%s, #%"PRIu64", lsl #16\n"
|
||||
"\tadd\t%s, x29, %s\n",
|
||||
rn, s & 0xFFFF, rn, s >> 16, rn, rn
|
||||
);
|
||||
break;
|
||||
case Ocall:
|
||||
if (rtype(i->arg[0]) != RCon)
|
||||
goto Table;
|
||||
c = &e->fn->con[i->arg[0].val];
|
||||
if (c->type != CAddr
|
||||
|| c->sym.type != SGlo
|
||||
|| c->bits.i)
|
||||
die("invalid call argument");
|
||||
l = str(c->sym.id);
|
||||
p = l[0] == '"' ? "" : T.assym;
|
||||
fprintf(e->f, "\tbl\t%s%s\n", p, l);
|
||||
break;
|
||||
case Osalloc:
|
||||
emitf("sub sp, sp, %0", i, e);
|
||||
if (!req(i->to, R))
|
||||
emitf("mov %=, sp", i, e);
|
||||
break;
|
||||
case Odbgloc:
|
||||
emitdbgloc(i->arg[0].val, i->arg[1].val, e->f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
framelayout(E *e)
|
||||
{
|
||||
int *r;
|
||||
uint o;
|
||||
uint64_t f;
|
||||
|
||||
for (o=0, r=arm64_rclob; *r>=0; r++)
|
||||
o += 1 & (e->fn->reg >> *r);
|
||||
f = e->fn->slot;
|
||||
f = (f + 3) & -4;
|
||||
o += o & 1;
|
||||
e->padding = 4*(f-e->fn->slot);
|
||||
e->frame = 4*f + 8*o;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Stack-frame layout:
|
||||
|
||||
+=============+
|
||||
| varargs |
|
||||
| save area |
|
||||
+-------------+
|
||||
| callee-save | ^
|
||||
| registers | |
|
||||
+-------------+ |
|
||||
| ... | |
|
||||
| spill slots | |
|
||||
| ... | | e->frame
|
||||
+-------------+ |
|
||||
| ... | |
|
||||
| locals | |
|
||||
| ... | |
|
||||
+-------------+ |
|
||||
| e->padding | v
|
||||
+-------------+
|
||||
| saved x29 |
|
||||
| saved x30 |
|
||||
+=============+ <- x29
|
||||
|
||||
*/
|
||||
|
||||
void
|
||||
arm64_emitfn(Fn *fn, FILE *out)
|
||||
{
|
||||
static char *ctoa[] = {
|
||||
#define X(c, s) [c] = s,
|
||||
CMP(X)
|
||||
#undef X
|
||||
};
|
||||
static int id0;
|
||||
int s, n, c, lbl, *r;
|
||||
uint64_t o;
|
||||
Blk *b, *t;
|
||||
Ins *i;
|
||||
E *e;
|
||||
|
||||
e = &(E){.f = out, .fn = fn};
|
||||
if (T.apple)
|
||||
e->fn->lnk.align = 4;
|
||||
emitfnlnk(e->fn->name, &e->fn->lnk, e->f);
|
||||
fputs("\thint\t#34\n", e->f);
|
||||
framelayout(e);
|
||||
|
||||
if (e->fn->vararg && !T.apple) {
|
||||
for (n=7; n>=0; n--)
|
||||
fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
|
||||
for (n=7; n>=0; n-=2)
|
||||
fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n);
|
||||
}
|
||||
|
||||
if (e->frame + 16 <= 512)
|
||||
fprintf(e->f,
|
||||
"\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
|
||||
e->frame + 16
|
||||
);
|
||||
else if (e->frame <= 4095)
|
||||
fprintf(e->f,
|
||||
"\tsub\tsp, sp, #%"PRIu64"\n"
|
||||
"\tstp\tx29, x30, [sp, -16]!\n",
|
||||
e->frame
|
||||
);
|
||||
else if (e->frame <= 65535)
|
||||
fprintf(e->f,
|
||||
"\tmov\tx16, #%"PRIu64"\n"
|
||||
"\tsub\tsp, sp, x16\n"
|
||||
"\tstp\tx29, x30, [sp, -16]!\n",
|
||||
e->frame
|
||||
);
|
||||
else
|
||||
fprintf(e->f,
|
||||
"\tmov\tx16, #%"PRIu64"\n"
|
||||
"\tmovk\tx16, #%"PRIu64", lsl #16\n"
|
||||
"\tsub\tsp, sp, x16\n"
|
||||
"\tstp\tx29, x30, [sp, -16]!\n",
|
||||
e->frame & 0xFFFF, e->frame >> 16
|
||||
);
|
||||
fputs("\tmov\tx29, sp\n", e->f);
|
||||
s = (e->frame - e->padding) / 4;
|
||||
for (r=arm64_rclob; *r>=0; r++)
|
||||
if (e->fn->reg & BIT(*r)) {
|
||||
s -= 2;
|
||||
i = &(Ins){.arg = {TMP(*r), SLOT(s)}};
|
||||
i->op = *r >= V0 ? Ostored : Ostorel;
|
||||
emitins(i, e);
|
||||
}
|
||||
|
||||
for (lbl=0, b=e->fn->start; b; b=b->link) {
|
||||
if (lbl || b->npred > 1)
|
||||
fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id);
|
||||
for (i=b->ins; i!=&b->ins[b->nins]; i++)
|
||||
emitins(i, e);
|
||||
lbl = 1;
|
||||
switch (b->jmp.type) {
|
||||
case Jhlt:
|
||||
fprintf(e->f, "\tbrk\t#1000\n");
|
||||
break;
|
||||
case Jret0:
|
||||
s = (e->frame - e->padding) / 4;
|
||||
for (r=arm64_rclob; *r>=0; r++)
|
||||
if (e->fn->reg & BIT(*r)) {
|
||||
s -= 2;
|
||||
i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}};
|
||||
i->cls = *r >= V0 ? Kd : Kl;
|
||||
emitins(i, e);
|
||||
}
|
||||
if (e->fn->dynalloc)
|
||||
fputs("\tmov sp, x29\n", e->f);
|
||||
o = e->frame + 16;
|
||||
if (e->fn->vararg && !T.apple)
|
||||
o += 192;
|
||||
if (o <= 504)
|
||||
fprintf(e->f,
|
||||
"\tldp\tx29, x30, [sp], %"PRIu64"\n",
|
||||
o
|
||||
);
|
||||
else if (o - 16 <= 4095)
|
||||
fprintf(e->f,
|
||||
"\tldp\tx29, x30, [sp], 16\n"
|
||||
"\tadd\tsp, sp, #%"PRIu64"\n",
|
||||
o - 16
|
||||
);
|
||||
else if (o - 16 <= 65535)
|
||||
fprintf(e->f,
|
||||
"\tldp\tx29, x30, [sp], 16\n"
|
||||
"\tmov\tx16, #%"PRIu64"\n"
|
||||
"\tadd\tsp, sp, x16\n",
|
||||
o - 16
|
||||
);
|
||||
else
|
||||
fprintf(e->f,
|
||||
"\tldp\tx29, x30, [sp], 16\n"
|
||||
"\tmov\tx16, #%"PRIu64"\n"
|
||||
"\tmovk\tx16, #%"PRIu64", lsl #16\n"
|
||||
"\tadd\tsp, sp, x16\n",
|
||||
(o - 16) & 0xFFFF, (o - 16) >> 16
|
||||
);
|
||||
fprintf(e->f, "\tret\n");
|
||||
break;
|
||||
case Jjmp:
|
||||
Jmp:
|
||||
if (b->s1 != b->link)
|
||||
fprintf(e->f,
|
||||
"\tb\t%s%d\n",
|
||||
T.asloc, id0+b->s1->id
|
||||
);
|
||||
else
|
||||
lbl = 0;
|
||||
break;
|
||||
default:
|
||||
c = b->jmp.type - Jjf;
|
||||
if (c < 0 || c > NCmp)
|
||||
die("unhandled jump %d", b->jmp.type);
|
||||
if (b->link == b->s2) {
|
||||
t = b->s1;
|
||||
b->s1 = b->s2;
|
||||
b->s2 = t;
|
||||
} else
|
||||
c = cmpneg(c);
|
||||
fprintf(e->f,
|
||||
"\tb%s\t%s%d\n",
|
||||
ctoa[c], T.asloc, id0+b->s2->id
|
||||
);
|
||||
goto Jmp;
|
||||
}
|
||||
}
|
||||
id0 += e->fn->nblk;
|
||||
if (!T.apple)
|
||||
elf_emitfnfin(fn->name, out);
|
||||
}
|
||||
316
src/qbe/arm64/isel.c
Normal file
316
src/qbe/arm64/isel.c
Normal file
@@ -0,0 +1,316 @@
|
||||
#include "all.h"
|
||||
|
||||
enum Imm {
|
||||
Iother,
|
||||
Iplo12,
|
||||
Iphi12,
|
||||
Iplo24,
|
||||
Inlo12,
|
||||
Inhi12,
|
||||
Inlo24
|
||||
};
|
||||
|
||||
static enum Imm
|
||||
imm(Con *c, int k, int64_t *pn)
|
||||
{
|
||||
int64_t n;
|
||||
int i;
|
||||
|
||||
if (c->type != CBits)
|
||||
return Iother;
|
||||
n = c->bits.i;
|
||||
if (k == Kw)
|
||||
n = (int32_t)n;
|
||||
i = Iplo12;
|
||||
if (n < 0) {
|
||||
i = Inlo12;
|
||||
n = -(uint64_t)n;
|
||||
}
|
||||
*pn = n;
|
||||
if ((n & 0x000fff) == n)
|
||||
return i;
|
||||
if ((n & 0xfff000) == n)
|
||||
return i + 1;
|
||||
if ((n & 0xffffff) == n)
|
||||
return i + 2;
|
||||
return Iother;
|
||||
}
|
||||
|
||||
int
|
||||
arm64_logimm(uint64_t x, int k)
|
||||
{
|
||||
uint64_t n;
|
||||
|
||||
if (k == Kw)
|
||||
x = (x & 0xffffffff) | x << 32;
|
||||
if (x & 1)
|
||||
x = ~x;
|
||||
if (x == 0)
|
||||
return 0;
|
||||
if (x == 0xaaaaaaaaaaaaaaaa)
|
||||
return 1;
|
||||
n = x & 0xf;
|
||||
if (0x1111111111111111 * n == x)
|
||||
goto Check;
|
||||
n = x & 0xff;
|
||||
if (0x0101010101010101 * n == x)
|
||||
goto Check;
|
||||
n = x & 0xffff;
|
||||
if (0x0001000100010001 * n == x)
|
||||
goto Check;
|
||||
n = x & 0xffffffff;
|
||||
if (0x0000000100000001 * n == x)
|
||||
goto Check;
|
||||
n = x;
|
||||
Check:
|
||||
return (n & (n + (n & -n))) == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
fixarg(Ref *pr, int k, int phi, Fn *fn)
|
||||
{
|
||||
char buf[32];
|
||||
Con *c, cc;
|
||||
Ref r0, r1, r2, r3;
|
||||
int s, n;
|
||||
|
||||
r0 = *pr;
|
||||
switch (rtype(r0)) {
|
||||
case RCon:
|
||||
c = &fn->con[r0.val];
|
||||
if (T.apple
|
||||
&& c->type == CAddr
|
||||
&& c->sym.type == SThr) {
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
*pr = r1;
|
||||
if (c->bits.i) {
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
cc = (Con){.type = CBits};
|
||||
cc.bits.i = c->bits.i;
|
||||
r3 = newcon(&cc, fn);
|
||||
emit(Oadd, Kl, r1, r2, r3);
|
||||
r1 = r2;
|
||||
}
|
||||
emit(Ocopy, Kl, r1, TMP(R0), R);
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
emit(Ocall, 0, R, r1, CALL(33));
|
||||
emit(Ocopy, Kl, TMP(R0), r2, R);
|
||||
emit(Oload, Kl, r1, r2, R);
|
||||
cc = *c;
|
||||
cc.bits.i = 0;
|
||||
r3 = newcon(&cc, fn);
|
||||
emit(Ocopy, Kl, r2, r3, R);
|
||||
break;
|
||||
}
|
||||
if (KBASE(k) == 0 && phi)
|
||||
return;
|
||||
r1 = newtmp("isel", k, fn);
|
||||
if (KBASE(k) == 0) {
|
||||
emit(Ocopy, k, r1, r0, R);
|
||||
} else {
|
||||
n = stashbits(c->bits.i, KWIDE(k) ? 8 : 4);
|
||||
vgrow(&fn->con, ++fn->ncon);
|
||||
c = &fn->con[fn->ncon-1];
|
||||
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
|
||||
*c = (Con){.type = CAddr};
|
||||
c->sym.id = intern(buf);
|
||||
r2 = newtmp("isel", Kl, fn);
|
||||
emit(Oload, k, r1, r2, R);
|
||||
emit(Ocopy, Kl, r2, CON(c-fn->con), R);
|
||||
}
|
||||
*pr = r1;
|
||||
break;
|
||||
case RTmp:
|
||||
s = fn->tmp[r0.val].slot;
|
||||
if (s == -1)
|
||||
break;
|
||||
r1 = newtmp("isel", Kl, fn);
|
||||
emit(Oaddr, Kl, r1, SLOT(s), R);
|
||||
*pr = r1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
selcmp(Ref arg[2], int k, Fn *fn)
|
||||
{
|
||||
Ref r, *iarg;
|
||||
Con *c;
|
||||
int swap, cmp, fix;
|
||||
int64_t n;
|
||||
|
||||
if (KBASE(k) == 1) {
|
||||
emit(Oafcmp, k, R, arg[0], arg[1]);
|
||||
iarg = curi->arg;
|
||||
fixarg(&iarg[0], k, 0, fn);
|
||||
fixarg(&iarg[1], k, 0, fn);
|
||||
return 0;
|
||||
}
|
||||
swap = rtype(arg[0]) == RCon;
|
||||
if (swap) {
|
||||
r = arg[1];
|
||||
arg[1] = arg[0];
|
||||
arg[0] = r;
|
||||
}
|
||||
fix = 1;
|
||||
cmp = Oacmp;
|
||||
r = arg[1];
|
||||
if (rtype(r) == RCon) {
|
||||
c = &fn->con[r.val];
|
||||
switch (imm(c, k, &n)) {
|
||||
default:
|
||||
break;
|
||||
case Iplo12:
|
||||
case Iphi12:
|
||||
fix = 0;
|
||||
break;
|
||||
case Inlo12:
|
||||
case Inhi12:
|
||||
cmp = Oacmn;
|
||||
r = getcon(n, fn);
|
||||
fix = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
emit(cmp, k, R, arg[0], r);
|
||||
iarg = curi->arg;
|
||||
fixarg(&iarg[0], k, 0, fn);
|
||||
if (fix)
|
||||
fixarg(&iarg[1], k, 0, fn);
|
||||
return swap;
|
||||
}
|
||||
|
||||
static int
|
||||
callable(Ref r, Fn *fn)
|
||||
{
|
||||
Con *c;
|
||||
|
||||
if (rtype(r) == RTmp)
|
||||
return 1;
|
||||
if (rtype(r) == RCon) {
|
||||
c = &fn->con[r.val];
|
||||
if (c->type == CAddr)
|
||||
if (c->bits.i == 0)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
sel(Ins i, Fn *fn)
|
||||
{
|
||||
Ref *iarg;
|
||||
Ins *i0;
|
||||
int ck, cc;
|
||||
|
||||
if (INRANGE(i.op, Oalloc, Oalloc1)) {
|
||||
i0 = curi - 1;
|
||||
salloc(i.to, i.arg[0], fn);
|
||||
fixarg(&i0->arg[0], Kl, 0, fn);
|
||||
return;
|
||||
}
|
||||
if (iscmp(i.op, &ck, &cc)) {
|
||||
emit(Oflag, i.cls, i.to, R, R);
|
||||
i0 = curi;
|
||||
if (selcmp(i.arg, ck, fn))
|
||||
i0->op += cmpop(cc);
|
||||
else
|
||||
i0->op += cc;
|
||||
return;
|
||||
}
|
||||
if (i.op == Ocall)
|
||||
if (callable(i.arg[0], fn)) {
|
||||
emiti(i);
|
||||
return;
|
||||
}
|
||||
if (i.op != Onop) {
|
||||
emiti(i);
|
||||
iarg = curi->arg; /* fixarg() can change curi */
|
||||
fixarg(&iarg[0], argcls(&i, 0), 0, fn);
|
||||
fixarg(&iarg[1], argcls(&i, 1), 0, fn);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
seljmp(Blk *b, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
Ins *i, *ir;
|
||||
int ck, cc, use;
|
||||
|
||||
if (b->jmp.type == Jret0
|
||||
|| b->jmp.type == Jjmp
|
||||
|| b->jmp.type == Jhlt)
|
||||
return;
|
||||
assert(b->jmp.type == Jjnz);
|
||||
r = b->jmp.arg;
|
||||
use = -1;
|
||||
b->jmp.arg = R;
|
||||
ir = 0;
|
||||
i = &b->ins[b->nins];
|
||||
while (i > b->ins)
|
||||
if (req((--i)->to, r)) {
|
||||
use = fn->tmp[r.val].nuse;
|
||||
ir = i;
|
||||
break;
|
||||
}
|
||||
if (ir && use == 1
|
||||
&& iscmp(ir->op, &ck, &cc)) {
|
||||
if (selcmp(ir->arg, ck, fn))
|
||||
cc = cmpop(cc);
|
||||
b->jmp.type = Jjf + cc;
|
||||
*ir = (Ins){.op = Onop};
|
||||
}
|
||||
else {
|
||||
selcmp((Ref[]){r, CON_Z}, Kw, fn);
|
||||
b->jmp.type = Jjfine;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
arm64_isel(Fn *fn)
|
||||
{
|
||||
Blk *b, **sb;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
uint n, al;
|
||||
int64_t sz;
|
||||
|
||||
/* assign slots to fast allocs */
|
||||
b = fn->start;
|
||||
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
|
||||
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op == al) {
|
||||
if (rtype(i->arg[0]) != RCon)
|
||||
break;
|
||||
sz = fn->con[i->arg[0].val].bits.i;
|
||||
if (sz < 0 || sz >= INT_MAX-15)
|
||||
err("invalid alloc size %"PRId64, sz);
|
||||
sz = (sz + n-1) & -n;
|
||||
sz /= 4;
|
||||
fn->tmp[i->to.val].slot = fn->slot;
|
||||
fn->slot += sz;
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
curi = &insb[NIns];
|
||||
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
|
||||
for (p=(*sb)->phi; p; p=p->link) {
|
||||
for (n=0; p->blk[n] != b; n++)
|
||||
assert(n+1 < p->narg);
|
||||
fixarg(&p->arg[n], p->cls, 1, fn);
|
||||
}
|
||||
seljmp(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;)
|
||||
sel(*--i, fn);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
|
||||
if (debug['I']) {
|
||||
fprintf(stderr, "\n> After instruction selection:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
69
src/qbe/arm64/targ.c
Normal file
69
src/qbe/arm64/targ.c
Normal file
@@ -0,0 +1,69 @@
|
||||
#include "all.h"
|
||||
|
||||
int arm64_rsave[] = {
|
||||
R0, R1, R2, R3, R4, R5, R6, R7,
|
||||
R8, R9, R10, R11, R12, R13, R14, R15,
|
||||
IP0, IP1, R18, LR,
|
||||
V0, V1, V2, V3, V4, V5, V6, V7,
|
||||
V16, V17, V18, V19, V20, V21, V22, V23,
|
||||
V24, V25, V26, V27, V28, V29, V30,
|
||||
-1
|
||||
};
|
||||
int arm64_rclob[] = {
|
||||
R19, R20, R21, R22, R23, R24, R25, R26,
|
||||
R27, R28,
|
||||
V8, V9, V10, V11, V12, V13, V14, V15,
|
||||
-1
|
||||
};
|
||||
|
||||
#define RGLOB (BIT(FP) | BIT(SP) | BIT(IP1) | BIT(R18))
|
||||
|
||||
static int
|
||||
arm64_memargs(int op)
|
||||
{
|
||||
(void)op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define ARM64_COMMON \
|
||||
.gpr0 = R0, \
|
||||
.ngpr = NGPR, \
|
||||
.fpr0 = V0, \
|
||||
.nfpr = NFPR, \
|
||||
.rglob = RGLOB, \
|
||||
.nrglob = 4, \
|
||||
.rsave = arm64_rsave, \
|
||||
.nrsave = {NGPS, NFPS}, \
|
||||
.retregs = arm64_retregs, \
|
||||
.argregs = arm64_argregs, \
|
||||
.memargs = arm64_memargs, \
|
||||
.isel = arm64_isel, \
|
||||
.abi1 = arm64_abi, \
|
||||
.emitfn = arm64_emitfn, \
|
||||
.cansel = 0, \
|
||||
|
||||
Target T_arm64 = {
|
||||
.name = "arm64",
|
||||
.abi0 = elimsb,
|
||||
.emitfin = elf_emitfin,
|
||||
.asloc = ".L",
|
||||
ARM64_COMMON
|
||||
};
|
||||
|
||||
Target T_arm64_apple = {
|
||||
.name = "arm64_apple",
|
||||
.apple = 1,
|
||||
.abi0 = apple_extsb,
|
||||
.emitfin = macho_emitfin,
|
||||
.asloc = "L",
|
||||
.assym = "_",
|
||||
ARM64_COMMON
|
||||
};
|
||||
|
||||
MAKESURE(globals_are_not_arguments,
|
||||
(RGLOB & (BIT(R8+1) - 1)) == 0
|
||||
);
|
||||
MAKESURE(arrays_size_ok,
|
||||
sizeof arm64_rsave == (NGPS+NFPS+1) * sizeof(int) &&
|
||||
sizeof arm64_rclob == (NCLR+1) * sizeof(int)
|
||||
);
|
||||
567
src/qbe/cfg.c
Normal file
567
src/qbe/cfg.c
Normal file
@@ -0,0 +1,567 @@
|
||||
#include "all.h"
|
||||
|
||||
Blk *
|
||||
newblk()
|
||||
{
|
||||
static Blk z;
|
||||
Blk *b;
|
||||
|
||||
b = alloc(sizeof *b);
|
||||
*b = z;
|
||||
b->ins = vnew(0, sizeof b->ins[0], PFn);
|
||||
b->pred = vnew(0, sizeof b->pred[0], PFn);
|
||||
return b;
|
||||
}
|
||||
|
||||
static void
|
||||
fixphis(Fn *f)
|
||||
{
|
||||
Blk *b, *bp;
|
||||
Phi *p;
|
||||
uint n, n0;
|
||||
|
||||
for (b=f->start; b; b=b->link) {
|
||||
assert(b->id < f->nblk);
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
for (n=n0=0; n<p->narg; n++) {
|
||||
bp = p->blk[n];
|
||||
if (bp->id != -1u)
|
||||
if (bp->s1 == b || bp->s2 == b) {
|
||||
p->blk[n0] = bp;
|
||||
p->arg[n0] = p->arg[n];
|
||||
n0++;
|
||||
}
|
||||
}
|
||||
assert(n0 > 0);
|
||||
p->narg = n0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
addpred(Blk *bp, Blk *b)
|
||||
{
|
||||
vgrow(&b->pred, ++b->npred);
|
||||
b->pred[b->npred-1] = bp;
|
||||
}
|
||||
|
||||
void
|
||||
fillpreds(Fn *f)
|
||||
{
|
||||
Blk *b;
|
||||
|
||||
for (b=f->start; b; b=b->link)
|
||||
b->npred = 0;
|
||||
for (b=f->start; b; b=b->link) {
|
||||
if (b->s1)
|
||||
addpred(b, b->s1);
|
||||
if (b->s2 && b->s2 != b->s1)
|
||||
addpred(b, b->s2);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
porec(Blk *b, uint *npo)
|
||||
{
|
||||
Blk *s1, *s2;
|
||||
|
||||
if (!b || b->id != -1u)
|
||||
return;
|
||||
b->id = 0; /* marker */
|
||||
s1 = b->s1;
|
||||
s2 = b->s2;
|
||||
if (s1 && s2 && s1->loop > s2->loop) {
|
||||
s1 = b->s2;
|
||||
s2 = b->s1;
|
||||
}
|
||||
porec(s1, npo);
|
||||
porec(s2, npo);
|
||||
b->id = (*npo)++;
|
||||
}
|
||||
|
||||
static void
|
||||
fillrpo(Fn *f)
|
||||
{
|
||||
Blk *b, **p;
|
||||
|
||||
for (b=f->start; b; b=b->link)
|
||||
b->id = -1u;
|
||||
f->nblk = 0;
|
||||
porec(f->start, &f->nblk);
|
||||
vgrow(&f->rpo, f->nblk);
|
||||
for (p=&f->start; (b=*p);) {
|
||||
if (b->id == -1u) {
|
||||
*p = b->link;
|
||||
} else {
|
||||
b->id = f->nblk-b->id-1;
|
||||
f->rpo[b->id] = b;
|
||||
p = &b->link;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* fill rpo, preds; prune dead blks */
|
||||
void
|
||||
fillcfg(Fn *f)
|
||||
{
|
||||
fillrpo(f);
|
||||
fillpreds(f);
|
||||
fixphis(f);
|
||||
}
|
||||
|
||||
/* for dominators computation, read
|
||||
* "A Simple, Fast Dominance Algorithm"
|
||||
* by K. Cooper, T. Harvey, and K. Kennedy.
|
||||
*/
|
||||
|
||||
static Blk *
|
||||
inter(Blk *b1, Blk *b2)
|
||||
{
|
||||
Blk *bt;
|
||||
|
||||
if (b1 == 0)
|
||||
return b2;
|
||||
while (b1 != b2) {
|
||||
if (b1->id < b2->id) {
|
||||
bt = b1;
|
||||
b1 = b2;
|
||||
b2 = bt;
|
||||
}
|
||||
while (b1->id > b2->id) {
|
||||
b1 = b1->idom;
|
||||
assert(b1);
|
||||
}
|
||||
}
|
||||
return b1;
|
||||
}
|
||||
|
||||
void
|
||||
filldom(Fn *fn)
|
||||
{
|
||||
Blk *b, *d;
|
||||
int ch;
|
||||
uint n, p;
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
b->idom = 0;
|
||||
b->dom = 0;
|
||||
b->dlink = 0;
|
||||
}
|
||||
do {
|
||||
ch = 0;
|
||||
for (n=1; n<fn->nblk; n++) {
|
||||
b = fn->rpo[n];
|
||||
d = 0;
|
||||
for (p=0; p<b->npred; p++)
|
||||
if (b->pred[p]->idom
|
||||
|| b->pred[p] == fn->start)
|
||||
d = inter(d, b->pred[p]);
|
||||
if (d != b->idom) {
|
||||
ch++;
|
||||
b->idom = d;
|
||||
}
|
||||
}
|
||||
} while (ch);
|
||||
for (b=fn->start; b; b=b->link)
|
||||
if ((d=b->idom)) {
|
||||
assert(d != b);
|
||||
b->dlink = d->dom;
|
||||
d->dom = b;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
sdom(Blk *b1, Blk *b2)
|
||||
{
|
||||
assert(b1 && b2);
|
||||
if (b1 == b2)
|
||||
return 0;
|
||||
while (b2->id > b1->id)
|
||||
b2 = b2->idom;
|
||||
return b1 == b2;
|
||||
}
|
||||
|
||||
int
|
||||
dom(Blk *b1, Blk *b2)
|
||||
{
|
||||
return b1 == b2 || sdom(b1, b2);
|
||||
}
|
||||
|
||||
static void
|
||||
addfron(Blk *a, Blk *b)
|
||||
{
|
||||
uint n;
|
||||
|
||||
for (n=0; n<a->nfron; n++)
|
||||
if (a->fron[n] == b)
|
||||
return;
|
||||
if (!a->nfron)
|
||||
a->fron = vnew(++a->nfron, sizeof a->fron[0], PFn);
|
||||
else
|
||||
vgrow(&a->fron, ++a->nfron);
|
||||
a->fron[a->nfron-1] = b;
|
||||
}
|
||||
|
||||
/* fill the dominance frontier */
|
||||
void
|
||||
fillfron(Fn *fn)
|
||||
{
|
||||
Blk *a, *b;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->nfron = 0;
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
if (b->s1)
|
||||
for (a=b; !sdom(a, b->s1); a=a->idom)
|
||||
addfron(a, b->s1);
|
||||
if (b->s2)
|
||||
for (a=b; !sdom(a, b->s2); a=a->idom)
|
||||
addfron(a, b->s2);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
loopmark(Blk *hd, Blk *b, void f(Blk *, Blk *))
|
||||
{
|
||||
uint p;
|
||||
|
||||
if (b->id < hd->id || b->visit == hd->id)
|
||||
return;
|
||||
b->visit = hd->id;
|
||||
f(hd, b);
|
||||
for (p=0; p<b->npred; ++p)
|
||||
loopmark(hd, b->pred[p], f);
|
||||
}
|
||||
|
||||
void
|
||||
loopiter(Fn *fn, void f(Blk *, Blk *))
|
||||
{
|
||||
uint n, p;
|
||||
Blk *b;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->visit = -1u;
|
||||
for (n=0; n<fn->nblk; ++n) {
|
||||
b = fn->rpo[n];
|
||||
for (p=0; p<b->npred; ++p)
|
||||
if (b->pred[p]->id >= n)
|
||||
loopmark(b, b->pred[p], f);
|
||||
}
|
||||
}
|
||||
|
||||
/* dominator tree depth */
|
||||
void
|
||||
filldepth(Fn *fn)
|
||||
{
|
||||
Blk *b, *d;
|
||||
int depth;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->depth = -1;
|
||||
|
||||
fn->start->depth = 0;
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
if (b->depth != -1)
|
||||
continue;
|
||||
depth = 1;
|
||||
for (d=b->idom; d->depth==-1; d=d->idom)
|
||||
depth++;
|
||||
depth += d->depth;
|
||||
b->depth = depth;
|
||||
for (d=b->idom; d->depth==-1; d=d->idom)
|
||||
d->depth = --depth;
|
||||
}
|
||||
}
|
||||
|
||||
/* least common ancestor in dom tree */
|
||||
Blk *
|
||||
lca(Blk *b1, Blk *b2)
|
||||
{
|
||||
if (!b1)
|
||||
return b2;
|
||||
if (!b2)
|
||||
return b1;
|
||||
while (b1->depth > b2->depth)
|
||||
b1 = b1->idom;
|
||||
while (b2->depth > b1->depth)
|
||||
b2 = b2->idom;
|
||||
while (b1 != b2) {
|
||||
b1 = b1->idom;
|
||||
b2 = b2->idom;
|
||||
}
|
||||
return b1;
|
||||
}
|
||||
|
||||
void
|
||||
multloop(Blk *hd, Blk *b)
|
||||
{
|
||||
(void)hd;
|
||||
b->loop *= 10;
|
||||
}
|
||||
|
||||
void
|
||||
fillloop(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->loop = 1;
|
||||
loopiter(fn, multloop);
|
||||
}
|
||||
|
||||
static void
|
||||
uffind(Blk **pb, Blk **uf)
|
||||
{
|
||||
Blk **pb1;
|
||||
|
||||
pb1 = &uf[(*pb)->id];
|
||||
if (*pb1) {
|
||||
uffind(pb1, uf);
|
||||
*pb = *pb1;
|
||||
}
|
||||
}
|
||||
|
||||
/* requires rpo and no phis, breaks cfg */
|
||||
void
|
||||
simpljmp(Fn *fn)
|
||||
{
|
||||
|
||||
Blk **uf; /* union-find */
|
||||
Blk **p, *b, *ret;
|
||||
|
||||
ret = newblk();
|
||||
ret->id = fn->nblk++;
|
||||
ret->jmp.type = Jret0;
|
||||
uf = emalloc(fn->nblk * sizeof uf[0]);
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
assert(!b->phi);
|
||||
if (b->jmp.type == Jret0) {
|
||||
b->jmp.type = Jjmp;
|
||||
b->s1 = ret;
|
||||
}
|
||||
if (b->nins == 0)
|
||||
if (b->jmp.type == Jjmp) {
|
||||
uffind(&b->s1, uf);
|
||||
if (b->s1 != b)
|
||||
uf[b->id] = b->s1;
|
||||
}
|
||||
}
|
||||
for (p=&fn->start; (b=*p); p=&b->link) {
|
||||
if (b->s1)
|
||||
uffind(&b->s1, uf);
|
||||
if (b->s2)
|
||||
uffind(&b->s2, uf);
|
||||
if (b->s1 && b->s1 == b->s2) {
|
||||
b->jmp.type = Jjmp;
|
||||
b->s2 = 0;
|
||||
}
|
||||
}
|
||||
*p = ret;
|
||||
free(uf);
|
||||
}
|
||||
|
||||
static int
|
||||
reachrec(Blk *b, Blk *to)
|
||||
{
|
||||
if (b == to)
|
||||
return 1;
|
||||
if (!b || b->visit)
|
||||
return 0;
|
||||
|
||||
b->visit = 1;
|
||||
if (reachrec(b->s1, to))
|
||||
return 1;
|
||||
if (reachrec(b->s2, to))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Blk.visit needs to be clear at entry */
|
||||
int
|
||||
reaches(Fn *fn, Blk *b, Blk *to)
|
||||
{
|
||||
int r;
|
||||
|
||||
assert(to);
|
||||
r = reachrec(b, to);
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->visit = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
/* can b reach 'to' not through excl
|
||||
* Blk.visit needs to be clear at entry */
|
||||
int
|
||||
reachesnotvia(Fn *fn, Blk *b, Blk *to, Blk *excl)
|
||||
{
|
||||
excl->visit = 1;
|
||||
return reaches(fn, b, to);
|
||||
}
|
||||
|
||||
int
|
||||
ifgraph(Blk *ifb, Blk **pthenb, Blk **pelseb, Blk **pjoinb)
|
||||
{
|
||||
Blk *s1, *s2, **t;
|
||||
|
||||
if (ifb->jmp.type != Jjnz)
|
||||
return 0;
|
||||
|
||||
s1 = ifb->s1;
|
||||
s2 = ifb->s2;
|
||||
if (s1->id > s2->id) {
|
||||
s1 = ifb->s2;
|
||||
s2 = ifb->s1;
|
||||
t = pthenb;
|
||||
pthenb = pelseb;
|
||||
pelseb = t;
|
||||
}
|
||||
if (s1 == s2)
|
||||
return 0;
|
||||
|
||||
if (s1->jmp.type != Jjmp || s1->npred != 1)
|
||||
return 0;
|
||||
|
||||
if (s1->s1 == s2) {
|
||||
/* if-then / if-else */
|
||||
if (s2->npred != 2)
|
||||
return 0;
|
||||
*pthenb = s1;
|
||||
*pelseb = ifb;
|
||||
*pjoinb = s2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (s2->jmp.type != Jjmp || s2->npred != 1)
|
||||
return 0;
|
||||
if (s1->s1 != s2->s1 || s1->s1->npred != 2)
|
||||
return 0;
|
||||
|
||||
assert(s1->s1 != ifb);
|
||||
*pthenb = s1;
|
||||
*pelseb = s2;
|
||||
*pjoinb = s1->s1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
typedef struct Jmp Jmp;
|
||||
|
||||
struct Jmp {
|
||||
int type;
|
||||
Ref arg;
|
||||
Blk *s1, *s2;
|
||||
};
|
||||
|
||||
static int
|
||||
jmpeq(Jmp *a, Jmp *b)
|
||||
{
|
||||
return a->type == b->type && req(a->arg, b->arg)
|
||||
&& a->s1 == b->s1 && a->s2 == b->s2;
|
||||
}
|
||||
|
||||
static int
|
||||
jmpnophi(Jmp *j)
|
||||
{
|
||||
if (j->s1 && j->s1->phi)
|
||||
return 0;
|
||||
if (j->s2 && j->s2->phi)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* require cfg rpo, breaks use */
|
||||
void
|
||||
simplcfg(Fn *fn)
|
||||
{
|
||||
Ins cpy, *i;
|
||||
Blk *b, *bb, **pb;
|
||||
Jmp *jmp, *j, *jj;
|
||||
Phi *p;
|
||||
int *empty, done;
|
||||
uint n;
|
||||
|
||||
if (debug['C']) {
|
||||
fprintf(stderr, "\n> Before CFG simplification:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
|
||||
cpy = (Ins){.op = Ocopy};
|
||||
for (b=fn->start; b; b=b->link)
|
||||
if (b->npred == 1) {
|
||||
bb = b->pred[0];
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
cpy.cls = p->cls;
|
||||
cpy.to = p->to;
|
||||
cpy.arg[0] = phiarg(p, bb);
|
||||
addins(&bb->ins, &bb->nins, &cpy);
|
||||
}
|
||||
b->phi = 0;
|
||||
}
|
||||
|
||||
jmp = emalloc(fn->nblk * sizeof jmp[0]);
|
||||
empty = emalloc(fn->nblk * sizeof empty[0]);
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
jmp[b->id].type = b->jmp.type;
|
||||
jmp[b->id].arg = b->jmp.arg;
|
||||
jmp[b->id].s1 = b->s1;
|
||||
jmp[b->id].s2 = b->s2;
|
||||
empty[b->id] = !b->phi;
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op != Onop && i->op != Odbgloc) {
|
||||
empty[b->id] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
done = 1;
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
if (b->id == -1u)
|
||||
continue;
|
||||
j = &jmp[b->id];
|
||||
if (j->type == Jjmp && j->s1->npred == 1) {
|
||||
assert(!j->s1->phi);
|
||||
addbins(&b->ins, &b->nins, j->s1);
|
||||
empty[b->id] &= empty[j->s1->id];
|
||||
jj = &jmp[j->s1->id];
|
||||
pb = (Blk*[]){jj->s1, jj->s2, 0};
|
||||
for (; (bb=*pb); pb++)
|
||||
for (p=bb->phi; p; p=p->link) {
|
||||
n = phiargn(p, j->s1);
|
||||
p->blk[n] = b;
|
||||
}
|
||||
j->s1->id = -1u;
|
||||
*j = *jj;
|
||||
done = 0;
|
||||
}
|
||||
else if (j->type == Jjnz
|
||||
&& empty[j->s1->id] && empty[j->s2->id]
|
||||
&& jmpeq(&jmp[j->s1->id], &jmp[j->s2->id])
|
||||
&& jmpnophi(&jmp[j->s1->id])) {
|
||||
*j = jmp[j->s1->id];
|
||||
done = 0;
|
||||
}
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
if (b->id != -1u) {
|
||||
j = &jmp[b->id];
|
||||
b->jmp.type = j->type;
|
||||
b->jmp.arg = j->arg;
|
||||
b->s1 = j->s1;
|
||||
b->s2 = j->s2;
|
||||
assert(!j->s1 || j->s1->id != -1u);
|
||||
assert(!j->s2 || j->s2->id != -1u);
|
||||
}
|
||||
|
||||
fillcfg(fn);
|
||||
free(empty);
|
||||
free(jmp);
|
||||
|
||||
if (debug['C']) {
|
||||
fprintf(stderr, "\n> After CFG simplification:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
18
src/qbe/config.h
Normal file
18
src/qbe/config.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/* Auto-generated default target for QBE.
|
||||
The qbe_backend.c #ifdef chain handles all common platforms;
|
||||
this file is only reached by the #else fallback. */
|
||||
#if defined(__aarch64__) && defined(__APPLE__)
|
||||
#define Deftgt T_arm64_apple
|
||||
#elif defined(__aarch64__)
|
||||
#define Deftgt T_arm64
|
||||
#elif defined(__x86_64__) && defined(__APPLE__)
|
||||
#define Deftgt T_amd64_apple
|
||||
#elif defined(__x86_64__) && defined(_WIN32)
|
||||
#define Deftgt T_amd64_win
|
||||
#elif defined(__x86_64__)
|
||||
#define Deftgt T_amd64_sysv
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
#define Deftgt T_rv64
|
||||
#else
|
||||
#error "unsupported target for QBE"
|
||||
#endif
|
||||
408
src/qbe/copy.c
Normal file
408
src/qbe/copy.c
Normal file
@@ -0,0 +1,408 @@
|
||||
#include "all.h"
|
||||
|
||||
typedef struct Ext Ext;
|
||||
|
||||
struct Ext {
|
||||
char zext;
|
||||
char nopw; /* is a no-op if arg width is <= nopw */
|
||||
char usew; /* uses only the low usew bits of arg */
|
||||
};
|
||||
|
||||
static int
|
||||
ext(Ins *i, Ext *e)
|
||||
{
|
||||
static Ext tbl[] = {
|
||||
/*extsb*/ {0, 7, 8},
|
||||
/*extub*/ {1, 8, 8},
|
||||
/*extsh*/ {0, 15, 16},
|
||||
/*extuh*/ {1, 16, 16},
|
||||
/*extsw*/ {0, 31, 32},
|
||||
/*extuw*/ {1, 32, 32},
|
||||
};
|
||||
|
||||
if (!isext(i->op))
|
||||
return 0;
|
||||
*e = tbl[i->op - Oextsb];
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
bitwidth(uint64_t v)
|
||||
{
|
||||
int n;
|
||||
|
||||
n = 0;
|
||||
if (v >> 32) { n += 32; v >>= 32; }
|
||||
if (v >> 16) { n += 16; v >>= 16; }
|
||||
if (v >> 8) { n += 8; v >>= 8; }
|
||||
if (v >> 4) { n += 4; v >>= 4; }
|
||||
if (v >> 2) { n += 2; v >>= 2; }
|
||||
if (v >> 1) { n += 1; v >>= 1; }
|
||||
return n+v;
|
||||
}
|
||||
|
||||
/* no more than w bits are used */
|
||||
static int
|
||||
usewidthle(Fn *fn, Ref r, int w)
|
||||
{
|
||||
Ext e;
|
||||
Tmp *t;
|
||||
Use *u;
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
Ref rc;
|
||||
int64_t v;
|
||||
int b;
|
||||
|
||||
assert(rtype(r) == RTmp);
|
||||
t = &fn->tmp[r.val];
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
switch (u->type) {
|
||||
case UPhi:
|
||||
p = u->u.phi;
|
||||
/* during gvn, phi nodes may be
|
||||
* replaced by other temps; in
|
||||
* this case, the replaced phi
|
||||
* uses are added to the
|
||||
* replacement temp uses and
|
||||
* Phi.to is set to R */
|
||||
if (p->visit || req(p->to, R))
|
||||
continue;
|
||||
p->visit = 1;
|
||||
b = usewidthle(fn, p->to, w);
|
||||
p->visit = 0;
|
||||
if (b)
|
||||
continue;
|
||||
break;
|
||||
case UIns:
|
||||
i = u->u.ins;
|
||||
assert(i != 0);
|
||||
if (i->op == Ocopy)
|
||||
if (usewidthle(fn, i->to, w))
|
||||
continue;
|
||||
if (ext(i, &e)) {
|
||||
if (e.usew <= w)
|
||||
continue;
|
||||
if (usewidthle(fn, i->to, w))
|
||||
continue;
|
||||
}
|
||||
if (i->op == Oand) {
|
||||
if (req(r, i->arg[0]))
|
||||
rc = i->arg[1];
|
||||
else {
|
||||
assert(req(r, i->arg[1]));
|
||||
rc = i->arg[0];
|
||||
}
|
||||
if (isconbits(fn, rc, &v)
|
||||
&& bitwidth(v) <= w)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
min(int v1, int v2)
|
||||
{
|
||||
return v1 < v2 ? v1 : v2;
|
||||
}
|
||||
|
||||
/* is the ref narrower than w bits */
|
||||
static int
|
||||
defwidthle(Fn *fn, Ref r, int w)
|
||||
{
|
||||
Ext e;
|
||||
Tmp *t;
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
uint n;
|
||||
int64_t v;
|
||||
int x;
|
||||
|
||||
if (isconbits(fn, r, &v)
|
||||
&& bitwidth(v) <= w)
|
||||
return 1;
|
||||
if (rtype(r) != RTmp)
|
||||
return 0;
|
||||
t = &fn->tmp[r.val];
|
||||
if (t->cls != Kw)
|
||||
return 0;
|
||||
|
||||
if (!t->def) {
|
||||
/* phi def */
|
||||
for (p=fn->rpo[t->bid]->phi; p; p=p->link)
|
||||
if (req(p->to, r))
|
||||
break;
|
||||
assert(p);
|
||||
if (p->visit)
|
||||
return 1;
|
||||
p->visit = 1;
|
||||
for (n=0; n<p->narg; n++)
|
||||
if (!defwidthle(fn, p->arg[n], w)) {
|
||||
p->visit = 0;
|
||||
return 0;
|
||||
}
|
||||
p->visit = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
i = t->def;
|
||||
if (i->op == Ocopy)
|
||||
return defwidthle(fn, i->arg[0], w);
|
||||
if (i->op == Oshr || i->op == Osar) {
|
||||
if (isconbits(fn, i->arg[1], &v))
|
||||
if (0 < v && v <= 32) {
|
||||
if (i->op == Oshr && w+v >= 32)
|
||||
return 1;
|
||||
if (w < 32) {
|
||||
if (i->op == Osar)
|
||||
w = min(31, w+v);
|
||||
else
|
||||
w = min(32, w+v);
|
||||
}
|
||||
}
|
||||
return defwidthle(fn, i->arg[0], w);
|
||||
}
|
||||
if (iscmp(i->op, &x, &x))
|
||||
return w >= 1;
|
||||
if (i->op == Oand) {
|
||||
if (defwidthle(fn, i->arg[0], w)
|
||||
|| defwidthle(fn, i->arg[1], w))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
if (i->op == Oor || i->op == Oxor) {
|
||||
if (defwidthle(fn, i->arg[0], w)
|
||||
&& defwidthle(fn, i->arg[1], w))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
if (ext(i, &e)) {
|
||||
if (e.zext && e.usew <= w)
|
||||
return 1;
|
||||
w = min(w, e.nopw);
|
||||
return defwidthle(fn, i->arg[0], w);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
isw1(Fn *fn, Ref r)
|
||||
{
|
||||
return defwidthle(fn, r, 1);
|
||||
}
|
||||
|
||||
/* insert early extub/extuh instructions
|
||||
* for pars used only narrowly; this
|
||||
* helps factoring extensions out of
|
||||
* loops
|
||||
*
|
||||
* needs use; breaks use
|
||||
*/
|
||||
void
|
||||
narrowpars(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
int loop;
|
||||
Ins ext, *i, *ins;
|
||||
uint npar, nins;
|
||||
Ref r;
|
||||
|
||||
/* only useful for functions with loops */
|
||||
loop = 0;
|
||||
for (b=fn->start; b; b=b->link)
|
||||
if (b->loop > 1) {
|
||||
loop = 1;
|
||||
break;
|
||||
}
|
||||
if (!loop)
|
||||
return;
|
||||
|
||||
b = fn->start;
|
||||
|
||||
npar = 0;
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (!ispar(i->op))
|
||||
break;
|
||||
npar++;
|
||||
}
|
||||
if (npar == 0)
|
||||
return;
|
||||
|
||||
nins = b->nins + npar;
|
||||
ins = vnew(nins, sizeof ins[0], PFn);
|
||||
icpy(ins, b->ins, npar);
|
||||
icpy(ins + 2*npar, b->ins+npar, b->nins-npar);
|
||||
b->ins = ins;
|
||||
b->nins = nins;
|
||||
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (!ispar(i->op))
|
||||
break;
|
||||
ext = (Ins){.op = Onop};
|
||||
if (i->cls == Kw)
|
||||
if (usewidthle(fn, i->to, 16)) {
|
||||
ext.op = Oextuh;
|
||||
if (usewidthle(fn, i->to, 8))
|
||||
ext.op = Oextub;
|
||||
r = newtmp("vw", i->cls, fn);
|
||||
ext.cls = i->cls;
|
||||
ext.to = i->to;
|
||||
ext.arg[0] = r;
|
||||
i->to = r;
|
||||
}
|
||||
*(i+npar) = ext;
|
||||
}
|
||||
}
|
||||
|
||||
Ref
|
||||
copyref(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
/* which extensions are copies for a given
|
||||
* argument width */
|
||||
static bits extcpy[] = {
|
||||
[WFull] = 0,
|
||||
[Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw),
|
||||
[Wub] = BIT(Wub) | BIT(Wuh) | BIT(Wuw),
|
||||
[Wsh] = BIT(Wsh) | BIT(Wsw),
|
||||
[Wuh] = BIT(Wuh) | BIT(Wuw),
|
||||
[Wsw] = BIT(Wsw),
|
||||
[Wuw] = BIT(Wuw),
|
||||
};
|
||||
Ext e;
|
||||
Tmp *t;
|
||||
int64_t v;
|
||||
int w, z;
|
||||
|
||||
if (i->op == Ocopy)
|
||||
return i->arg[0];
|
||||
|
||||
/* op identity value */
|
||||
if (optab[i->op].hasid
|
||||
&& KBASE(i->cls) == 0 /* integer only - fp NaN! */
|
||||
&& req(i->arg[1], con01[optab[i->op].idval])
|
||||
&& (!optab[i->op].cmpeqwl || isw1(fn, i->arg[0])))
|
||||
return i->arg[0];
|
||||
|
||||
/* idempotent op with identical args */
|
||||
if (optab[i->op].idemp
|
||||
&& req(i->arg[0], i->arg[1]))
|
||||
return i->arg[0];
|
||||
|
||||
/* integer cmp with identical args */
|
||||
if ((optab[i->op].cmpeqwl || optab[i->op].cmplgtewl)
|
||||
&& req(i->arg[0], i->arg[1]))
|
||||
return con01[optab[i->op].eqval];
|
||||
|
||||
/* cmpeq/ne 0 with 0/non-0 inference */
|
||||
if (optab[i->op].cmpeqwl
|
||||
&& req(i->arg[1], CON_Z)
|
||||
&& zeroval(fn, b, i->arg[0], argcls(i, 0), &z))
|
||||
return con01[optab[i->op].eqval^z^1];
|
||||
|
||||
/* redundant and mask */
|
||||
if (i->op == Oand
|
||||
&& isconbits(fn, i->arg[1], &v)
|
||||
&& (v > 0 && ((v+1) & v) == 0)
|
||||
&& defwidthle(fn, i->arg[0], bitwidth(v)))
|
||||
return i->arg[0];
|
||||
|
||||
if (i->cls == Kw
|
||||
&& (i->op == Oextsw || i->op == Oextuw))
|
||||
return i->arg[0];
|
||||
|
||||
if (ext(i, &e) && rtype(i->arg[0]) == RTmp) {
|
||||
t = &fn->tmp[i->arg[0].val];
|
||||
assert(KBASE(t->cls) == 0);
|
||||
|
||||
/* do not break typing by returning
|
||||
* a narrower temp */
|
||||
if (KWIDE(i->cls) > KWIDE(t->cls))
|
||||
return R;
|
||||
|
||||
w = Wsb + (i->op - Oextsb);
|
||||
if (BIT(w) & extcpy[t->width])
|
||||
return i->arg[0];
|
||||
|
||||
/* avoid eliding extensions of params
|
||||
* inserted in the start block; their
|
||||
* point is to make further extensions
|
||||
* redundant */
|
||||
if ((!t->def || !ispar(t->def->op))
|
||||
&& usewidthle(fn, i->to, e.usew))
|
||||
return i->arg[0];
|
||||
|
||||
if (defwidthle(fn, i->arg[0], e.nopw))
|
||||
return i->arg[0];
|
||||
}
|
||||
|
||||
return R;
|
||||
}
|
||||
|
||||
static int
|
||||
phieq(Phi *pa, Phi *pb)
|
||||
{
|
||||
Ref r;
|
||||
uint n;
|
||||
|
||||
assert(pa->narg == pb->narg);
|
||||
for (n=0; n<pa->narg; n++) {
|
||||
r = phiarg(pb, pa->blk[n]);
|
||||
if (!req(pa->arg[n], r))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
Ref
|
||||
phicopyref(Fn *fn, Blk *b, Phi *p)
|
||||
{
|
||||
Blk *d, **s;
|
||||
Phi *p1;
|
||||
uint n, c;
|
||||
|
||||
/* identical args */
|
||||
for (n=0; n<p->narg-1; n++)
|
||||
if (!req(p->arg[n], p->arg[n+1]))
|
||||
break;
|
||||
if (n == p->narg-1)
|
||||
return p->arg[n];
|
||||
|
||||
/* same as a previous phi */
|
||||
for (p1=b->phi; p1!=p; p1=p1->link) {
|
||||
assert(p1);
|
||||
if (phieq(p1, p))
|
||||
return p1->to;
|
||||
}
|
||||
|
||||
/* can be replaced by a
|
||||
* dominating jnz arg */
|
||||
d = b->idom;
|
||||
if (p->narg != 2
|
||||
|| d->jmp.type != Jjnz
|
||||
|| !isw1(fn, d->jmp.arg))
|
||||
return R;
|
||||
|
||||
s = (Blk*[]){0, 0};
|
||||
for (n=0; n<2; n++)
|
||||
for (c=0; c<2; c++)
|
||||
if (req(p->arg[n], con01[c]))
|
||||
s[c] = p->blk[n];
|
||||
|
||||
/* if s1 ends with a jnz on either b
|
||||
* or s2; the inference below is wrong
|
||||
* without the jump type checks */
|
||||
if (d->s1 == s[1] && d->s2 == s[0]
|
||||
&& d->s1->jmp.type == Jjmp
|
||||
&& d->s2->jmp.type == Jjmp)
|
||||
return d->jmp.arg;
|
||||
|
||||
return R;
|
||||
}
|
||||
141
src/qbe/doc/abi.txt
Normal file
141
src/qbe/doc/abi.txt
Normal file
@@ -0,0 +1,141 @@
|
||||
==================
|
||||
System V ABI AMD64
|
||||
==================
|
||||
|
||||
|
||||
This document describes concisely the subset of the amd64
|
||||
ABI as it is implemented in QBE. The subset can handle
|
||||
correctly arbitrary standard C-like structs containing
|
||||
float and integer types. Structs that have unaligned
|
||||
members are also supported through opaque types, see
|
||||
the IL description document for more information about
|
||||
them.
|
||||
|
||||
|
||||
- ABI Subset Implemented
|
||||
------------------------
|
||||
|
||||
Data classes of interest as defined by the ABI:
|
||||
* INTEGER
|
||||
* SSE
|
||||
* MEMORY
|
||||
|
||||
|
||||
~ Classification
|
||||
|
||||
1. The size of each argument gets rounded up to eightbytes.
|
||||
(It keeps the stack always 8 bytes aligned.)
|
||||
2. _Bool, char, short, int, long, long long and pointers
|
||||
are in the INTEGER class. In the context of QBE, it
|
||||
means that 'l' and 'w' are in the INTEGER class.
|
||||
3. float and double are in the SSE class. In the context
|
||||
of QBE, it means that 's' and 'd' are in the SSE class.
|
||||
4. If the size of an object is larger than two eightbytes
|
||||
or if contains unaligned fields, it has class MEMORY.
|
||||
In the context of QBE, those are big aggregate types
|
||||
and opaque types.
|
||||
5. Otherwise, recursively classify fields and determine
|
||||
the class of the two eightbytes using the classes of
|
||||
their components. If any is INTEGER the result is
|
||||
INTEGER, otherwise the result is SSE.
|
||||
|
||||
~ Passing
|
||||
|
||||
* Classify arguments in order.
|
||||
* INTEGER arguments use in order `%rdi` `%rsi` `%rdx`
|
||||
`%rcx` `%r8` `%r9`.
|
||||
* SSE arguments use in order `%xmm0` - `%xmm7`.
|
||||
* MEMORY gets passed on the stack. They are "pushed"
|
||||
in the right-to-left order, so from the callee's
|
||||
point of view, the left-most argument appears first
|
||||
on the stack.
|
||||
* When we run out of registers for an aggregate, revert
|
||||
the assignment for the first eightbytes and pass it
|
||||
on the stack.
|
||||
* When all registers are taken, write arguments on the
|
||||
stack from right to left.
|
||||
* When calling a variadic function, %al stores the number
|
||||
of vector registers used to pass arguments (it must be
|
||||
an upper bound and does not have to be exact).
|
||||
* Registers `%rbx`, `%r12` - `%r15` are callee-save.
|
||||
|
||||
~ Returning
|
||||
|
||||
* Classify the return type.
|
||||
* Use `%rax` and `%rdx` in order for INTEGER return
|
||||
values.
|
||||
* Use `%xmm0` and `%xmm1` in order for SSE return values.
|
||||
* If the return value's class is MEMORY, the first
|
||||
argument of the function `%rdi` was a pointer to an
|
||||
area big enough to fit the return value. The function
|
||||
writes the return value there and returns the address
|
||||
(that was in `%rdi`) in `%rax`.
|
||||
|
||||
|
||||
- Alignment on the Stack
|
||||
------------------------
|
||||
|
||||
The ABI is unclear on the alignment requirement of the
|
||||
stack. What must be ensured is that, right before
|
||||
executing a 'call' instruction, the stack pointer `%rsp`
|
||||
is aligned on 16 bytes. On entry of the called
|
||||
function, the stack pointer is 8 modulo 16. Since most
|
||||
functions will have a prelude pushing `%rbp`, the frame
|
||||
pointer, upon entry of the body code of the function is
|
||||
also aligned on 16 bytes (== 0 mod 16).
|
||||
|
||||
Here is a diagram of the stack layout after a call from
|
||||
g() to f().
|
||||
|
||||
| |
|
||||
| g() locals |
|
||||
+-------------+
|
||||
^ | | \
|
||||
| | stack arg 2 | '
|
||||
| |xxxxxxxxxxxxx| | f()'s MEMORY
|
||||
growing | +-------------+ | arguments
|
||||
addresses | | stack arg 1 | ,
|
||||
| |xxxxxxxxxxxxx| /
|
||||
| +-------------+ -> 0 mod 16
|
||||
| | ret addr |
|
||||
+-------------+
|
||||
| saved %rbp |
|
||||
+-------------+ -> f()'s %rbp
|
||||
| f() locals | 0 mod 16
|
||||
| ... |
|
||||
-> %rsp
|
||||
|
||||
Legend:
|
||||
* `xxxxx` Optional padding.
|
||||
|
||||
|
||||
- Remarks
|
||||
---------
|
||||
|
||||
* A struct can be returned in registers in one of three
|
||||
ways. Either `%rax`, `%rdx` are used, or `%xmm0`,
|
||||
`%xmm1`, or finally `%rax`, `%xmm0`. The last case
|
||||
happens when a struct is returned with one half
|
||||
classified as INTEGER and the other as SSE. This
|
||||
is a consequence of the <@Returning> section above.
|
||||
|
||||
* The size of the arguments area of the stack needs to
|
||||
be computed first, then arguments are packed starting
|
||||
from the bottom of the argument area, respecting
|
||||
alignment constraints. The ABI mentions "pushing"
|
||||
arguments in right-to-left order, but I think it's a
|
||||
mistaken view because of the alignment constraints.
|
||||
|
||||
Example: If three 8 bytes MEMORY arguments are passed
|
||||
to the callee and the caller's stack pointer is 16 bytes
|
||||
algined, the layout will be like this.
|
||||
|
||||
+-------------+
|
||||
|xxxxxxxxxxxxx| padding
|
||||
| stack arg 3 |
|
||||
| stack arg 2 |
|
||||
| stack arg 1 |
|
||||
+-------------+ -> 0 mod 16
|
||||
|
||||
The padding must not be at the end of the stack area.
|
||||
A "pushing" logic would put it at the end.
|
||||
1196
src/qbe/doc/il.txt
Normal file
1196
src/qbe/doc/il.txt
Normal file
File diff suppressed because it is too large
Load Diff
98
src/qbe/doc/llvm.txt
Normal file
98
src/qbe/doc/llvm.txt
Normal file
@@ -0,0 +1,98 @@
|
||||
===========
|
||||
QBE vs LLVM
|
||||
===========
|
||||
|
||||
|
||||
Both QBE and LLVM are compiler backends using an SSA
|
||||
representation. This document will explain why LLVM
|
||||
does not make QBE a redundant project. Obviously,
|
||||
everything following is biased, because written by me.
|
||||
|
||||
- Scope
|
||||
-------
|
||||
|
||||
QBE is a much smaller scale project with different goals
|
||||
than LLVM.
|
||||
|
||||
* QBE is for amateur language designers.
|
||||
|
||||
It does not address all the problems faced when
|
||||
conceiving an industry-grade language. If you are
|
||||
toying with some language ideas, using LLVM will
|
||||
be like hauling your backpack with a truck, but
|
||||
using QBE will feel more like riding a bicycle.
|
||||
|
||||
* QBE is about the first 70%, not the last 30%.
|
||||
|
||||
It attempts to pinpoint, in the extremely vast
|
||||
compilation literature, the optimizations that get
|
||||
you 70% of the performance in 10% of the code of
|
||||
full blown compilers.
|
||||
|
||||
For example, copy propagation on SSA form is
|
||||
implemented in 160 lines of code in QBE!
|
||||
|
||||
* QBE is extremely hackable.
|
||||
|
||||
First, it is, and will remain, a small project
|
||||
(less than 8 kloc). Second, it is programmed in
|
||||
non-fancy C99 without any dependencies. Third,
|
||||
it is able to dump the IL and debug information in
|
||||
a uniform format after each pass.
|
||||
|
||||
On my Core 2 Duo machine, QBE compiles in half a
|
||||
second (without optimizations).
|
||||
|
||||
- Features
|
||||
----------
|
||||
|
||||
LLVM is definitely more packed with features, but there
|
||||
are a few things provided in QBE to consider.
|
||||
|
||||
* LLVM does NOT provide full C compatibility for you.
|
||||
|
||||
In more technical terms, any language that provides
|
||||
good C compatibility and uses LLVM as a backend
|
||||
needs to reimplement large chunks of the ABI in
|
||||
its frontend! This well known issue in the LLVM
|
||||
community causes a great deal of duplication
|
||||
and bugs.
|
||||
|
||||
Implementing a complete C ABI (with struct arguments
|
||||
and returns) is incredibly tricky, and not really
|
||||
a lot of fun. QBE provides you with IL operations
|
||||
to call in (and be called by) C with no pain.
|
||||
Moreover the ABI implementation in QBE has been
|
||||
thoroughly tested by fuzzing and manual tests.
|
||||
|
||||
* LLVM IL is more cluttered with memory operations.
|
||||
|
||||
Implementing SSA construction is hard. To save its
|
||||
users from having to implement it, LLVM provides
|
||||
stack slots. This means that one increment of
|
||||
a variable `v` will be composed of three LLVM
|
||||
instructions: one load, one add, and one store.
|
||||
|
||||
QBE provides simple non-SSA temporaries, so
|
||||
incrementing `v` is simply done with one instruction
|
||||
`%v =w add %v, 1`.
|
||||
|
||||
This could seem cosmetic, but dividing the size of
|
||||
the IL by three makes it easier for the frontend
|
||||
writers to spot bugs in the generated code.
|
||||
|
||||
* LLVM IL is more cluttered with type annotations and
|
||||
casts.
|
||||
|
||||
For the sake of advanced optimizations and
|
||||
correctness, LLVM has complex IL types. However,
|
||||
only a few types are really first class and many
|
||||
operations of source languages require casts to be
|
||||
compiled.
|
||||
|
||||
Because QBE makes a much lighter use of types, the
|
||||
IL is more readable and shorter. It can of course be
|
||||
argued back that the correctness of QBE is jeopardized,
|
||||
but remember that, in practice, the large amount
|
||||
of casts necessary in LLVM IL is undermining the
|
||||
overall effectiveness of the type system.
|
||||
15
src/qbe/doc/native_win.txt
Normal file
15
src/qbe/doc/native_win.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
There is an experimental amd64_win (native Windows ABI and calling
|
||||
convention).
|
||||
|
||||
In tree, this is currently only tested via cross-compilation from a
|
||||
Linux host, and using wine to run the tests.
|
||||
|
||||
You'll need something like:
|
||||
|
||||
sudo apt install mingw64-w64 dos2unix wine
|
||||
|
||||
and then
|
||||
|
||||
make check-amd64_win
|
||||
|
||||
should pass.
|
||||
20
src/qbe/doc/rv64.txt
Normal file
20
src/qbe/doc/rv64.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
=========
|
||||
RISC-V 64
|
||||
=========
|
||||
|
||||
- Known issues
|
||||
--------------
|
||||
|
||||
ABI with structs containing floats is not yet supported.
|
||||
|
||||
- Possible improvements
|
||||
-----------------------
|
||||
|
||||
rv64_isel() could turn compare used only with jnz into b{lt,ge}[u].
|
||||
|
||||
- Helpful links
|
||||
---------------
|
||||
|
||||
RISC-V spec: https://github.com/riscv/riscv-isa-manual/releases/latest/download/riscv-spec.pdf
|
||||
ASM manual: https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
|
||||
ABI: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
|
||||
23
src/qbe/doc/win.txt
Normal file
23
src/qbe/doc/win.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
===================
|
||||
Windows Quick Start
|
||||
===================
|
||||
|
||||
Only 64-bit versions of windows are supported. To compile
|
||||
this software you will need to get a normal UNIX toolchain.
|
||||
There are several ways to get one, but I will only describe
|
||||
how I did it.
|
||||
|
||||
1. Download and install [@1 MSYS2] (the x86_64 version).
|
||||
2. In an MSYS2 terminal, run the following command.
|
||||
|
||||
pacman -S git make mingw-w64-x86_64-gcc mingw-w64-x86_64-gdb
|
||||
|
||||
3. Restart the MSYS2 terminal.
|
||||
4. In the new terminal, clone QBE.
|
||||
|
||||
git clone git://c9x.me/qbe.git
|
||||
|
||||
5. Compile using `make`.
|
||||
|
||||
|
||||
[1] http://www.msys2.org
|
||||
271
src/qbe/emit.c
Normal file
271
src/qbe/emit.c
Normal file
@@ -0,0 +1,271 @@
|
||||
#include "all.h"
|
||||
|
||||
enum {
|
||||
SecText,
|
||||
SecData,
|
||||
SecBss,
|
||||
};
|
||||
|
||||
void
|
||||
emitlnk(char *n, Lnk *l, int s, FILE *f)
|
||||
{
|
||||
static char *sec[2][3] = {
|
||||
[0][SecText] = ".text",
|
||||
[0][SecData] = ".data",
|
||||
[0][SecBss] = ".bss",
|
||||
[1][SecText] = ".abort \"unreachable\"",
|
||||
[1][SecData] = ".section .tdata,\"awT\"",
|
||||
[1][SecBss] = ".section .tbss,\"awT\"",
|
||||
};
|
||||
char *pfx, *sfx;
|
||||
|
||||
pfx = n[0] == '"' ? "" : T.assym;
|
||||
sfx = "";
|
||||
if (T.apple && l->thread) {
|
||||
l->sec = "__DATA";
|
||||
l->secf = "__thread_data,thread_local_regular";
|
||||
sfx = "$tlv$init";
|
||||
fputs(
|
||||
".section __DATA,__thread_vars,"
|
||||
"thread_local_variables\n",
|
||||
f
|
||||
);
|
||||
fprintf(f, "%s%s:\n", pfx, n);
|
||||
fprintf(f,
|
||||
"\t.quad __tlv_bootstrap\n"
|
||||
"\t.quad 0\n"
|
||||
"\t.quad %s%s%s\n\n",
|
||||
pfx, n, sfx
|
||||
);
|
||||
}
|
||||
if (l->sec) {
|
||||
fprintf(f, ".section %s", l->sec);
|
||||
if (l->secf)
|
||||
fprintf(f, ",%s", l->secf);
|
||||
} else
|
||||
fputs(sec[l->thread != 0][s], f);
|
||||
fputc('\n', f);
|
||||
if (l->align)
|
||||
fprintf(f, ".balign %d\n", l->align);
|
||||
if (l->export)
|
||||
fprintf(f, ".globl %s%s\n", pfx, n);
|
||||
fprintf(f, "%s%s%s:\n", pfx, n, sfx);
|
||||
}
|
||||
|
||||
void
|
||||
emitfnlnk(char *n, Lnk *l, FILE *f)
|
||||
{
|
||||
emitlnk(n, l, SecText, f);
|
||||
}
|
||||
|
||||
void
|
||||
emitdat(Dat *d, FILE *f)
|
||||
{
|
||||
static struct {
|
||||
char decl[8];
|
||||
int64_t mask;
|
||||
} di[] = {
|
||||
[DB] = {"\t.byte", 0xffL},
|
||||
[DH] = {"\t.short", 0xffffL},
|
||||
[DW] = {"\t.int", 0xffffffffL},
|
||||
[DL] = {"\t.quad", -1L},
|
||||
};
|
||||
static int64_t zero;
|
||||
char *p;
|
||||
|
||||
switch (d->type) {
|
||||
case DStart:
|
||||
zero = 0;
|
||||
break;
|
||||
case DEnd:
|
||||
if (d->lnk->common) {
|
||||
if (zero == -1)
|
||||
die("invalid common data definition");
|
||||
p = d->name[0] == '"' ? "" : T.assym;
|
||||
fprintf(f, ".comm %s%s,%"PRId64,
|
||||
p, d->name, zero);
|
||||
if (d->lnk->align)
|
||||
fprintf(f, ",%d", d->lnk->align);
|
||||
fputc('\n', f);
|
||||
}
|
||||
else if (zero != -1) {
|
||||
emitlnk(d->name, d->lnk, SecBss, f);
|
||||
fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
|
||||
}
|
||||
break;
|
||||
case DZ:
|
||||
if (zero != -1)
|
||||
zero += d->u.num;
|
||||
else
|
||||
fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
|
||||
break;
|
||||
default:
|
||||
if (zero != -1) {
|
||||
emitlnk(d->name, d->lnk, SecData, f);
|
||||
if (zero > 0)
|
||||
fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
|
||||
zero = -1;
|
||||
}
|
||||
if (d->isstr) {
|
||||
if (d->type != DB)
|
||||
err("strings only supported for 'b' currently");
|
||||
fprintf(f, "\t.ascii %s\n", d->u.str);
|
||||
}
|
||||
else if (d->isref) {
|
||||
p = d->u.ref.name[0] == '"' ? "" : T.assym;
|
||||
fprintf(f, "%s %s%s%+"PRId64"\n",
|
||||
di[d->type].decl, p, d->u.ref.name,
|
||||
d->u.ref.off);
|
||||
}
|
||||
else {
|
||||
fprintf(f, "%s %"PRId64"\n",
|
||||
di[d->type].decl,
|
||||
d->u.num & di[d->type].mask);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct Asmbits Asmbits;
|
||||
|
||||
struct Asmbits {
|
||||
bits n;
|
||||
int size;
|
||||
Asmbits *link;
|
||||
};
|
||||
|
||||
static Asmbits *stash;
|
||||
|
||||
int
|
||||
stashbits(bits n, int size)
|
||||
{
|
||||
Asmbits **pb, *b;
|
||||
int i;
|
||||
|
||||
assert(size == 4 || size == 8 || size == 16);
|
||||
for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
|
||||
if (size <= b->size && b->n == n)
|
||||
return i;
|
||||
b = emalloc(sizeof *b);
|
||||
b->n = n;
|
||||
b->size = size;
|
||||
b->link = 0;
|
||||
*pb = b;
|
||||
return i;
|
||||
}
|
||||
|
||||
static void
|
||||
emitfin(FILE *f, char *sec[3])
|
||||
{
|
||||
Asmbits *b;
|
||||
int lg, i;
|
||||
union { int32_t i; float f; } u;
|
||||
|
||||
if (!stash)
|
||||
return;
|
||||
fprintf(f, "/* floating point constants */\n");
|
||||
for (lg=4; lg>=2; lg--)
|
||||
for (b=stash, i=0; b; b=b->link, i++) {
|
||||
if (b->size == (1<<lg)) {
|
||||
fprintf(f,
|
||||
".section %s\n"
|
||||
".p2align %d\n"
|
||||
"%sfp%d:",
|
||||
sec[lg-2], lg, T.asloc, i
|
||||
);
|
||||
if (lg == 4)
|
||||
fprintf(f,
|
||||
"\n\t.quad %"PRId64
|
||||
"\n\t.quad 0\n\n",
|
||||
(int64_t)b->n);
|
||||
else if (lg == 3)
|
||||
fprintf(f,
|
||||
"\n\t.quad %"PRId64
|
||||
" /* %f */\n\n",
|
||||
(int64_t)b->n,
|
||||
*(double *)&b->n);
|
||||
else if (lg == 2) {
|
||||
u.i = b->n;
|
||||
fprintf(f,
|
||||
"\n\t.int %"PRId32
|
||||
" /* %f */\n\n",
|
||||
u.i, (double)u.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
while ((b=stash)) {
|
||||
stash = b->link;
|
||||
free(b);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
elf_emitfin(FILE *f)
|
||||
{
|
||||
static char *sec[3] = { ".rodata", ".rodata", ".rodata" };
|
||||
|
||||
emitfin(f ,sec);
|
||||
fprintf(f, ".section .note.GNU-stack,\"\",@progbits\n");
|
||||
}
|
||||
|
||||
void
|
||||
elf_emitfnfin(char *fn, FILE *f)
|
||||
{
|
||||
fprintf(f, ".type %s, @function\n", fn);
|
||||
fprintf(f, ".size %s, .-%s\n", fn, fn);
|
||||
}
|
||||
|
||||
void
|
||||
macho_emitfin(FILE *f)
|
||||
{
|
||||
static char *sec[3] = {
|
||||
"__TEXT,__literal4,4byte_literals",
|
||||
"__TEXT,__literal8,8byte_literals",
|
||||
".abort \"unreachable\"",
|
||||
};
|
||||
|
||||
emitfin(f, sec);
|
||||
}
|
||||
|
||||
void
|
||||
pe_emitfin(FILE *f)
|
||||
{
|
||||
static char *sec[3] = { ".rodata", ".rodata", ".rodata" };
|
||||
|
||||
emitfin(f, sec);
|
||||
}
|
||||
|
||||
static uint32_t *file;
|
||||
static uint nfile;
|
||||
static uint curfile;
|
||||
|
||||
void
|
||||
emitdbgfile(char *fn, FILE *f)
|
||||
{
|
||||
uint32_t id;
|
||||
uint n;
|
||||
|
||||
id = intern(fn);
|
||||
for (n=0; n<nfile; n++)
|
||||
if (file[n] == id) {
|
||||
/* gas requires positive
|
||||
* file numbers */
|
||||
curfile = n + 1;
|
||||
return;
|
||||
}
|
||||
if (!file)
|
||||
file = vnew(0, sizeof *file, PHeap);
|
||||
vgrow(&file, ++nfile);
|
||||
file[nfile-1] = id;
|
||||
curfile = nfile;
|
||||
fprintf(f, ".file %u %s\n", curfile, fn);
|
||||
}
|
||||
|
||||
void
|
||||
emitdbgloc(uint line, uint col, FILE *f)
|
||||
{
|
||||
if (col != 0)
|
||||
fprintf(f, "\t.loc %u %u %u\n", curfile, line, col);
|
||||
else
|
||||
fprintf(f, "\t.loc %u %u\n", curfile, line);
|
||||
}
|
||||
246
src/qbe/fold.c
Normal file
246
src/qbe/fold.c
Normal file
@@ -0,0 +1,246 @@
|
||||
#include "all.h"
|
||||
|
||||
/* boring folding code */
|
||||
|
||||
static int
|
||||
iscon(Con *c, int w, uint64_t k)
|
||||
{
|
||||
if (c->type != CBits)
|
||||
return 0;
|
||||
if (w)
|
||||
return (uint64_t)c->bits.i == k;
|
||||
else
|
||||
return (uint32_t)c->bits.i == (uint32_t)k;
|
||||
}
|
||||
|
||||
int
|
||||
foldint(Con *res, int op, int w, Con *cl, Con *cr)
|
||||
{
|
||||
union {
|
||||
int64_t s;
|
||||
uint64_t u;
|
||||
float fs;
|
||||
double fd;
|
||||
} l, r;
|
||||
uint64_t x;
|
||||
Sym sym;
|
||||
int typ;
|
||||
|
||||
memset(&sym, 0, sizeof sym);
|
||||
typ = CBits;
|
||||
l.s = cl->bits.i;
|
||||
r.s = cr->bits.i;
|
||||
if (op == Oadd) {
|
||||
if (cl->type == CAddr) {
|
||||
if (cr->type == CAddr)
|
||||
return 1;
|
||||
typ = CAddr;
|
||||
sym = cl->sym;
|
||||
}
|
||||
else if (cr->type == CAddr) {
|
||||
typ = CAddr;
|
||||
sym = cr->sym;
|
||||
}
|
||||
}
|
||||
else if (op == Osub) {
|
||||
if (cl->type == CAddr) {
|
||||
if (cr->type != CAddr) {
|
||||
typ = CAddr;
|
||||
sym = cl->sym;
|
||||
} else if (!symeq(cl->sym, cr->sym))
|
||||
return 1;
|
||||
}
|
||||
else if (cr->type == CAddr)
|
||||
return 1;
|
||||
}
|
||||
else if (cl->type == CAddr || cr->type == CAddr)
|
||||
return 1;
|
||||
if (op == Odiv || op == Orem || op == Oudiv || op == Ourem) {
|
||||
if (iscon(cr, w, 0))
|
||||
return 1;
|
||||
if (op == Odiv || op == Orem) {
|
||||
x = w ? INT64_MIN : INT32_MIN;
|
||||
if (iscon(cr, w, -1))
|
||||
if (iscon(cl, w, x))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
switch (op) {
|
||||
case Oadd: x = l.u + r.u; break;
|
||||
case Osub: x = l.u - r.u; break;
|
||||
case Oneg: x = -l.u; break;
|
||||
case Odiv: x = w ? l.s / r.s : (int32_t)l.s / (int32_t)r.s; break;
|
||||
case Orem: x = w ? l.s % r.s : (int32_t)l.s % (int32_t)r.s; break;
|
||||
case Oudiv: x = w ? l.u / r.u : (uint32_t)l.u / (uint32_t)r.u; break;
|
||||
case Ourem: x = w ? l.u % r.u : (uint32_t)l.u % (uint32_t)r.u; break;
|
||||
case Omul: x = l.u * r.u; break;
|
||||
case Oand: x = l.u & r.u; break;
|
||||
case Oor: x = l.u | r.u; break;
|
||||
case Oxor: x = l.u ^ r.u; break;
|
||||
case Osar: x = (w ? l.s : (int32_t)l.s) >> (r.u & (31|w<<5)); break;
|
||||
case Oshr: x = (w ? l.u : (uint32_t)l.u) >> (r.u & (31|w<<5)); break;
|
||||
case Oshl: x = l.u << (r.u & (31|w<<5)); break;
|
||||
case Oextsb: x = (int8_t)l.u; break;
|
||||
case Oextub: x = (uint8_t)l.u; break;
|
||||
case Oextsh: x = (int16_t)l.u; break;
|
||||
case Oextuh: x = (uint16_t)l.u; break;
|
||||
case Oextsw: x = (int32_t)l.u; break;
|
||||
case Oextuw: x = (uint32_t)l.u; break;
|
||||
case Ostosi: x = w ? (int64_t)cl->bits.s : (int32_t)cl->bits.s; break;
|
||||
case Ostoui: x = w ? (uint64_t)cl->bits.s : (uint32_t)cl->bits.s; break;
|
||||
case Odtosi: x = w ? (int64_t)cl->bits.d : (int32_t)cl->bits.d; break;
|
||||
case Odtoui: x = w ? (uint64_t)cl->bits.d : (uint32_t)cl->bits.d; break;
|
||||
case Ocast:
|
||||
x = l.u;
|
||||
if (cl->type == CAddr) {
|
||||
typ = CAddr;
|
||||
sym = cl->sym;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (Ocmpw <= op && op <= Ocmpl1) {
|
||||
if (op <= Ocmpw1) {
|
||||
l.u = (int32_t)l.u;
|
||||
r.u = (int32_t)r.u;
|
||||
} else
|
||||
op -= Ocmpl - Ocmpw;
|
||||
switch (op - Ocmpw) {
|
||||
case Ciule: x = l.u <= r.u; break;
|
||||
case Ciult: x = l.u < r.u; break;
|
||||
case Cisle: x = l.s <= r.s; break;
|
||||
case Cislt: x = l.s < r.s; break;
|
||||
case Cisgt: x = l.s > r.s; break;
|
||||
case Cisge: x = l.s >= r.s; break;
|
||||
case Ciugt: x = l.u > r.u; break;
|
||||
case Ciuge: x = l.u >= r.u; break;
|
||||
case Cieq: x = l.u == r.u; break;
|
||||
case Cine: x = l.u != r.u; break;
|
||||
default: die("unreachable");
|
||||
}
|
||||
}
|
||||
else if (Ocmps <= op && op <= Ocmps1) {
|
||||
switch (op - Ocmps) {
|
||||
case Cfle: x = l.fs <= r.fs; break;
|
||||
case Cflt: x = l.fs < r.fs; break;
|
||||
case Cfgt: x = l.fs > r.fs; break;
|
||||
case Cfge: x = l.fs >= r.fs; break;
|
||||
case Cfne: x = l.fs != r.fs; break;
|
||||
case Cfeq: x = l.fs == r.fs; break;
|
||||
case Cfo: x = l.fs < r.fs || l.fs >= r.fs; break;
|
||||
case Cfuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
|
||||
default: die("unreachable");
|
||||
}
|
||||
}
|
||||
else if (Ocmpd <= op && op <= Ocmpd1) {
|
||||
switch (op - Ocmpd) {
|
||||
case Cfle: x = l.fd <= r.fd; break;
|
||||
case Cflt: x = l.fd < r.fd; break;
|
||||
case Cfgt: x = l.fd > r.fd; break;
|
||||
case Cfge: x = l.fd >= r.fd; break;
|
||||
case Cfne: x = l.fd != r.fd; break;
|
||||
case Cfeq: x = l.fd == r.fd; break;
|
||||
case Cfo: x = l.fd < r.fd || l.fd >= r.fd; break;
|
||||
case Cfuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
|
||||
default: die("unreachable");
|
||||
}
|
||||
}
|
||||
else
|
||||
die("unreachable");
|
||||
}
|
||||
*res = (Con){.type=typ, .sym=sym, .bits={.i=x}};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
foldflt(Con *res, int op, int w, Con *cl, Con *cr)
|
||||
{
|
||||
float xs, ls, rs;
|
||||
double xd, ld, rd;
|
||||
|
||||
if (cl->type != CBits || cr->type != CBits)
|
||||
err("invalid address operand for '%s'", optab[op].name);
|
||||
*res = (Con){.type = CBits};
|
||||
memset(&res->bits, 0, sizeof(res->bits));
|
||||
if (w) {
|
||||
ld = cl->bits.d;
|
||||
rd = cr->bits.d;
|
||||
switch (op) {
|
||||
case Oadd: xd = ld + rd; break;
|
||||
case Osub: xd = ld - rd; break;
|
||||
case Oneg: xd = -ld; break;
|
||||
case Odiv: xd = ld / rd; break;
|
||||
case Omul: xd = ld * rd; break;
|
||||
case Oswtof: xd = (int32_t)cl->bits.i; break;
|
||||
case Ouwtof: xd = (uint32_t)cl->bits.i; break;
|
||||
case Osltof: xd = (int64_t)cl->bits.i; break;
|
||||
case Oultof: xd = (uint64_t)cl->bits.i; break;
|
||||
case Oexts: xd = cl->bits.s; break;
|
||||
case Ocast: xd = ld; break;
|
||||
default: die("unreachable");
|
||||
}
|
||||
res->bits.d = xd;
|
||||
res->flt = 2;
|
||||
} else {
|
||||
ls = cl->bits.s;
|
||||
rs = cr->bits.s;
|
||||
switch (op) {
|
||||
case Oadd: xs = ls + rs; break;
|
||||
case Osub: xs = ls - rs; break;
|
||||
case Oneg: xs = -ls; break;
|
||||
case Odiv: xs = ls / rs; break;
|
||||
case Omul: xs = ls * rs; break;
|
||||
case Oswtof: xs = (int32_t)cl->bits.i; break;
|
||||
case Ouwtof: xs = (uint32_t)cl->bits.i; break;
|
||||
case Osltof: xs = (int64_t)cl->bits.i; break;
|
||||
case Oultof: xs = (uint64_t)cl->bits.i; break;
|
||||
case Otruncd: xs = cl->bits.d; break;
|
||||
case Ocast: xs = ls; break;
|
||||
default: die("unreachable");
|
||||
}
|
||||
res->bits.s = xs;
|
||||
res->flt = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static Ref
|
||||
opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
Con c;
|
||||
|
||||
if (cls == Kw || cls == Kl) {
|
||||
if (foldint(&c, op, cls == Kl, cl, cr))
|
||||
return R;
|
||||
} else
|
||||
foldflt(&c, op, cls == Kd, cl, cr);
|
||||
if (!KWIDE(cls))
|
||||
c.bits.i &= 0xffffffff;
|
||||
r = newcon(&c, fn);
|
||||
assert(!(cls == Ks || cls == Kd) || c.flt);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* used by GVN */
|
||||
Ref
|
||||
foldref(Fn *fn, Ins *i)
|
||||
{
|
||||
Ref rr;
|
||||
Con *cl, *cr;
|
||||
|
||||
if (rtype(i->to) != RTmp)
|
||||
return R;
|
||||
if (optab[i->op].canfold) {
|
||||
if (rtype(i->arg[0]) != RCon)
|
||||
return R;
|
||||
cl = &fn->con[i->arg[0].val];
|
||||
rr = i->arg[1];
|
||||
if (req(rr, R))
|
||||
rr = CON_Z;
|
||||
if (rtype(rr) != RCon)
|
||||
return R;
|
||||
cr = &fn->con[rr.val];
|
||||
|
||||
return opfold(i->op, i->cls, cl, cr, fn);
|
||||
}
|
||||
return R;
|
||||
}
|
||||
460
src/qbe/gcm.c
Normal file
460
src/qbe/gcm.c
Normal file
@@ -0,0 +1,460 @@
|
||||
#include "all.h"
|
||||
|
||||
#define NOBID (-1u)
|
||||
|
||||
static int
|
||||
isdivwl(Ins *i)
|
||||
{
|
||||
switch (i->op) {
|
||||
case Odiv:
|
||||
case Orem:
|
||||
case Oudiv:
|
||||
case Ourem:
|
||||
return KBASE(i->cls) == 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
pinned(Ins *i)
|
||||
{
|
||||
return optab[i->op].pinned || isdivwl(i);
|
||||
}
|
||||
|
||||
/* pinned ins that can be eliminated if unused */
|
||||
static int
|
||||
canelim(Ins *i)
|
||||
{
|
||||
return isload(i->op) || isalloc(i->op) || isdivwl(i);
|
||||
}
|
||||
|
||||
static uint earlyins(Fn *, Blk *, Ins *);
|
||||
|
||||
static uint
|
||||
schedearly(Fn *fn, Ref r)
|
||||
{
|
||||
Tmp *t;
|
||||
Blk *b;
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return 0;
|
||||
|
||||
t = &fn->tmp[r.val];
|
||||
if (t->gcmbid != NOBID)
|
||||
return t->gcmbid;
|
||||
|
||||
b = fn->rpo[t->bid];
|
||||
if (t->def) {
|
||||
assert(b->ins <= t->def && t->def < &b->ins[b->nins]);
|
||||
t->gcmbid = 0; /* mark as visiting */
|
||||
t->gcmbid = earlyins(fn, b, t->def);
|
||||
} else {
|
||||
/* phis do not move */
|
||||
t->gcmbid = t->bid;
|
||||
}
|
||||
|
||||
return t->gcmbid;
|
||||
}
|
||||
|
||||
static uint
|
||||
earlyins(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
uint b0, b1;
|
||||
|
||||
b0 = schedearly(fn, i->arg[0]);
|
||||
assert(b0 != NOBID);
|
||||
b1 = schedearly(fn, i->arg[1]);
|
||||
assert(b1 != NOBID);
|
||||
if (fn->rpo[b0]->depth < fn->rpo[b1]->depth) {
|
||||
assert(dom(fn->rpo[b0], fn->rpo[b1]));
|
||||
b0 = b1;
|
||||
}
|
||||
return pinned(i) ? b->id : b0;
|
||||
}
|
||||
|
||||
static void
|
||||
earlyblk(Fn *fn, uint bid)
|
||||
{
|
||||
Blk *b;
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
uint n;
|
||||
|
||||
b = fn->rpo[bid];
|
||||
for (p=b->phi; p; p=p->link)
|
||||
for (n=0; n<p->narg; n++)
|
||||
schedearly(fn, p->arg[n]);
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (pinned(i)) {
|
||||
schedearly(fn, i->arg[0]);
|
||||
schedearly(fn, i->arg[1]);
|
||||
}
|
||||
schedearly(fn, b->jmp.arg);
|
||||
}
|
||||
|
||||
/* least common ancestor in dom tree */
|
||||
static uint
|
||||
lcabid(Fn *fn, uint bid1, uint bid2)
|
||||
{
|
||||
Blk *b;
|
||||
|
||||
if (bid1 == NOBID)
|
||||
return bid2;
|
||||
if (bid2 == NOBID)
|
||||
return bid1;
|
||||
|
||||
b = lca(fn->rpo[bid1], fn->rpo[bid2]);
|
||||
assert(b);
|
||||
return b->id;
|
||||
}
|
||||
|
||||
static uint
|
||||
bestbid(Fn *fn, uint earlybid, uint latebid)
|
||||
{
|
||||
Blk *curb, *earlyb, *bestb;
|
||||
|
||||
if (latebid == NOBID)
|
||||
return NOBID; /* unused */
|
||||
|
||||
assert(earlybid != NOBID);
|
||||
|
||||
earlyb = fn->rpo[earlybid];
|
||||
bestb = curb = fn->rpo[latebid];
|
||||
assert(dom(earlyb, curb));
|
||||
|
||||
while (curb != earlyb) {
|
||||
curb = curb->idom;
|
||||
if (curb->loop < bestb->loop)
|
||||
bestb = curb;
|
||||
}
|
||||
return bestb->id;
|
||||
}
|
||||
|
||||
static uint lateins(Fn *, Blk *, Ins *, Ref r);
|
||||
static uint latephi(Fn *, Phi *, Ref r);
|
||||
static uint latejmp(Blk *, Ref r);
|
||||
|
||||
/* return lca bid of ref uses */
|
||||
static uint
|
||||
schedlate(Fn *fn, Ref r)
|
||||
{
|
||||
Tmp *t;
|
||||
Blk *b;
|
||||
Use *u;
|
||||
uint earlybid;
|
||||
uint latebid;
|
||||
uint uselatebid;
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return NOBID;
|
||||
|
||||
t = &fn->tmp[r.val];
|
||||
if (t->visit)
|
||||
return t->gcmbid;
|
||||
|
||||
t->visit = 1;
|
||||
earlybid = t->gcmbid;
|
||||
if (earlybid == NOBID)
|
||||
return NOBID; /* not used */
|
||||
|
||||
/* reuse gcmbid for late bid */
|
||||
t->gcmbid = t->bid;
|
||||
latebid = NOBID;
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
assert(u->bid < fn->nblk);
|
||||
b = fn->rpo[u->bid];
|
||||
switch (u->type) {
|
||||
case UXXX:
|
||||
die("unreachable");
|
||||
break;
|
||||
case UPhi:
|
||||
uselatebid = latephi(fn, u->u.phi, r);
|
||||
break;
|
||||
case UIns:
|
||||
uselatebid = lateins(fn, b, u->u.ins, r);
|
||||
break;
|
||||
case UJmp:
|
||||
uselatebid = latejmp(b, r);
|
||||
break;
|
||||
}
|
||||
latebid = lcabid(fn, latebid, uselatebid);
|
||||
}
|
||||
/* latebid may be NOBID if the temp is used
|
||||
* in fixed instructions that may be eliminated
|
||||
* and are themselves unused transitively */
|
||||
|
||||
if (t->def && !pinned(t->def))
|
||||
t->gcmbid = bestbid(fn, earlybid, latebid);
|
||||
/* else, keep the early one */
|
||||
|
||||
/* now, gcmbid is the best bid */
|
||||
return t->gcmbid;
|
||||
}
|
||||
|
||||
/* returns lca bid of uses or NOBID if
|
||||
* the definition can be eliminated */
|
||||
static uint
|
||||
lateins(Fn *fn, Blk *b, Ins *i, Ref r)
|
||||
{
|
||||
uint latebid;
|
||||
|
||||
assert(b->ins <= i && i < &b->ins[b->nins]);
|
||||
assert(req(i->arg[0], r) || req(i->arg[1], r));
|
||||
|
||||
latebid = schedlate(fn, i->to);
|
||||
if (pinned(i)) {
|
||||
if (latebid == NOBID)
|
||||
if (canelim(i))
|
||||
return NOBID;
|
||||
return b->id;
|
||||
}
|
||||
|
||||
return latebid;
|
||||
}
|
||||
|
||||
static uint
|
||||
latephi(Fn *fn, Phi *p, Ref r)
|
||||
{
|
||||
uint n;
|
||||
uint latebid;
|
||||
|
||||
if (!p->narg)
|
||||
return NOBID; /* marked as unused */
|
||||
|
||||
latebid = NOBID;
|
||||
for (n = 0; n < p->narg; n++)
|
||||
if (req(p->arg[n], r))
|
||||
latebid = lcabid(fn, latebid, p->blk[n]->id);
|
||||
|
||||
assert(latebid != NOBID);
|
||||
return latebid;
|
||||
}
|
||||
|
||||
static uint
|
||||
latejmp(Blk *b, Ref r)
|
||||
{
|
||||
if (req(b->jmp.arg, R))
|
||||
return NOBID;
|
||||
else {
|
||||
assert(req(b->jmp.arg, r));
|
||||
return b->id;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lateblk(Fn *fn, uint bid)
|
||||
{
|
||||
Blk *b;
|
||||
Phi **pp;
|
||||
Ins *i;
|
||||
|
||||
b = fn->rpo[bid];
|
||||
for (pp=&b->phi; *(pp);)
|
||||
if (schedlate(fn, (*pp)->to) == NOBID) {
|
||||
(*pp)->narg = 0; /* mark unused */
|
||||
*pp = (*pp)->link; /* remove phi */
|
||||
} else
|
||||
pp = &(*pp)->link;
|
||||
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (pinned(i))
|
||||
schedlate(fn, i->to);
|
||||
}
|
||||
|
||||
static void
|
||||
addgcmins(Fn *fn, Ins *vins, uint nins)
|
||||
{
|
||||
Ins *i;
|
||||
Tmp *t;
|
||||
Blk *b;
|
||||
|
||||
for (i=vins; i<&vins[nins]; i++) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
t = &fn->tmp[i->to.val];
|
||||
b = fn->rpo[t->gcmbid];
|
||||
addins(&b->ins, &b->nins, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* move live instructions to the
|
||||
* end of their target block; use-
|
||||
* before-def errors are fixed by
|
||||
* schedblk */
|
||||
static void
|
||||
gcmmove(Fn *fn)
|
||||
{
|
||||
Tmp *t;
|
||||
Ins *vins, *i;
|
||||
uint nins;
|
||||
|
||||
nins = 0;
|
||||
vins = vnew(nins, sizeof vins[0], PFn);
|
||||
|
||||
for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) {
|
||||
if (t->def == 0)
|
||||
continue;
|
||||
if (t->bid == t->gcmbid)
|
||||
continue;
|
||||
i = t->def;
|
||||
if (pinned(i) && !canelim(i))
|
||||
continue;
|
||||
assert(rtype(i->to) == RTmp);
|
||||
assert(t == &fn->tmp[i->to.val]);
|
||||
if (t->gcmbid != NOBID)
|
||||
addins(&vins, &nins, i);
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
addgcmins(fn, vins, nins);
|
||||
}
|
||||
|
||||
/* dfs ordering */
|
||||
static Ins *
|
||||
schedins(Fn *fn, Blk *b, Ins *i, Ins **pvins, uint *pnins)
|
||||
{
|
||||
Ins *i0, *i1;
|
||||
Tmp *t;
|
||||
uint n;
|
||||
|
||||
igroup(b, i, &i0, &i1);
|
||||
for (i=i0; i<i1; i++)
|
||||
for (n=0; n<2; n++) {
|
||||
if (rtype(i->arg[n]) != RTmp)
|
||||
continue;
|
||||
t = &fn->tmp[i->arg[n].val];
|
||||
if (t->bid != b->id || !t->def)
|
||||
continue;
|
||||
schedins(fn, b, t->def, pvins, pnins);
|
||||
}
|
||||
for (i=i0; i<i1; i++) {
|
||||
addins(pvins, pnins, i);
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
return i1;
|
||||
}
|
||||
|
||||
/* order ins within a block */
|
||||
static void
|
||||
schedblk(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i, *vins;
|
||||
uint nins;
|
||||
|
||||
vins = vnew(0, sizeof vins[0], PHeap);
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
nins = 0;
|
||||
for (i=b->ins; i<&b->ins[b->nins];)
|
||||
i = schedins(fn, b, i, &vins, &nins);
|
||||
idup(b, vins, nins);
|
||||
}
|
||||
vfree(vins);
|
||||
}
|
||||
|
||||
static int
|
||||
cheap(Ins *i)
|
||||
{
|
||||
int x;
|
||||
|
||||
if (KBASE(i->cls) != 0)
|
||||
return 0;
|
||||
switch (i->op) {
|
||||
case Oneg:
|
||||
case Oadd:
|
||||
case Osub:
|
||||
case Omul:
|
||||
case Oand:
|
||||
case Oor:
|
||||
case Oxor:
|
||||
case Osar:
|
||||
case Oshr:
|
||||
case Oshl:
|
||||
return 1;
|
||||
default:
|
||||
return iscmp(i->op, &x, &x);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sinkref(Fn *fn, Blk *b, Ref *pr)
|
||||
{
|
||||
Ins i;
|
||||
Tmp *t;
|
||||
Ref r;
|
||||
|
||||
if (rtype(*pr) != RTmp)
|
||||
return;
|
||||
t = &fn->tmp[pr->val];
|
||||
if (!t->def
|
||||
|| t->bid == b->id
|
||||
|| pinned(t->def)
|
||||
|| !cheap(t->def))
|
||||
return;
|
||||
|
||||
/* sink t->def to b */
|
||||
i = *t->def;
|
||||
r = newtmp("snk", t->cls, fn);
|
||||
t = 0; /* invalidated */
|
||||
*pr = r;
|
||||
i.to = r;
|
||||
fn->tmp[r.val].gcmbid = b->id;
|
||||
emiti(i);
|
||||
sinkref(fn, b, &i.arg[0]);
|
||||
sinkref(fn, b, &i.arg[1]);
|
||||
}
|
||||
|
||||
/* redistribute trivial ops to point of
|
||||
* use to reduce register pressure
|
||||
* requires rpo, use; breaks use
|
||||
*/
|
||||
static void
|
||||
sink(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i;
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (isload(i->op))
|
||||
sinkref(fn, b, &i->arg[0]);
|
||||
else if (isstore(i->op))
|
||||
sinkref(fn, b, &i->arg[1]);
|
||||
sinkref(fn, b, &b->jmp.arg);
|
||||
}
|
||||
addgcmins(fn, curi, &insb[NIns] - curi);
|
||||
}
|
||||
|
||||
/* requires use dom
|
||||
* maintains rpo pred dom
|
||||
* breaks use
|
||||
*/
|
||||
void
|
||||
gcm(Fn *fn)
|
||||
{
|
||||
Tmp *t;
|
||||
uint bid;
|
||||
|
||||
filldepth(fn);
|
||||
fillloop(fn);
|
||||
|
||||
for (t=fn->tmp; t<&fn->tmp[fn->ntmp]; t++) {
|
||||
t->visit = 0;
|
||||
t->gcmbid = NOBID;
|
||||
}
|
||||
for (bid=0; bid<fn->nblk; bid++)
|
||||
earlyblk(fn, bid);
|
||||
for (bid=0; bid<fn->nblk; bid++)
|
||||
lateblk(fn, bid);
|
||||
|
||||
gcmmove(fn);
|
||||
filluse(fn);
|
||||
curi = &insb[NIns];
|
||||
sink(fn);
|
||||
filluse(fn);
|
||||
schedblk(fn);
|
||||
|
||||
if (debug['G']) {
|
||||
fprintf(stderr, "\n> After GCM:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
508
src/qbe/gvn.c
Normal file
508
src/qbe/gvn.c
Normal file
@@ -0,0 +1,508 @@
|
||||
#include "all.h"
|
||||
|
||||
Ref con01[2];
|
||||
|
||||
static inline uint
|
||||
mix(uint x0, uint x1)
|
||||
{
|
||||
return x0 + 17*x1;
|
||||
}
|
||||
|
||||
static inline uint
|
||||
rhash(Ref r)
|
||||
{
|
||||
return mix(r.type, r.val);
|
||||
}
|
||||
|
||||
static uint
|
||||
ihash(Ins *i)
|
||||
{
|
||||
uint h;
|
||||
|
||||
h = mix(i->op, i->cls);
|
||||
h = mix(h, rhash(i->arg[0]));
|
||||
h = mix(h, rhash(i->arg[1]));
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
static int
|
||||
ieq(Ins *ia, Ins *ib)
|
||||
{
|
||||
if (ia->op == ib->op)
|
||||
if (ia->cls == ib->cls)
|
||||
if (req(ia->arg[0], ib->arg[0]))
|
||||
if (req(ia->arg[1], ib->arg[1]))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Ins **gvntbl;
|
||||
static uint gvntbln;
|
||||
|
||||
static Ins *
|
||||
gvndup(Ins *i, int insert)
|
||||
{
|
||||
uint idx, n;
|
||||
Ins *ii;
|
||||
|
||||
idx = ihash(i) % gvntbln;
|
||||
for (n=1;; n++) {
|
||||
ii = gvntbl[idx];
|
||||
if (!ii)
|
||||
break;
|
||||
if (ieq(i, ii))
|
||||
return ii;
|
||||
|
||||
idx++;
|
||||
if (gvntbln <= idx)
|
||||
idx = 0;
|
||||
}
|
||||
if (insert)
|
||||
gvntbl[idx] = i;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
replaceuse(Fn *fn, Use *u, Ref r1, Ref r2)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
Ref *pr;
|
||||
Tmp *t2;
|
||||
int n;
|
||||
|
||||
t2 = 0;
|
||||
if (rtype(r2) == RTmp)
|
||||
t2 = &fn->tmp[r2.val];
|
||||
b = fn->rpo[u->bid];
|
||||
switch (u->type) {
|
||||
case UPhi:
|
||||
p = u->u.phi;
|
||||
for (pr=p->arg; pr<&p->arg[p->narg]; pr++)
|
||||
if (req(*pr, r1))
|
||||
*pr = r2;
|
||||
if (t2)
|
||||
adduse(t2, UPhi, b, p);
|
||||
break;
|
||||
case UIns:
|
||||
i = u->u.ins;
|
||||
for (n=0; n<2; n++)
|
||||
if (req(i->arg[n], r1))
|
||||
i->arg[n] = r2;
|
||||
if (t2)
|
||||
adduse(t2, UIns, b, i);
|
||||
break;
|
||||
case UJmp:
|
||||
if (req(b->jmp.arg, r1))
|
||||
b->jmp.arg = r2;
|
||||
if (t2)
|
||||
adduse(t2, UJmp, b);
|
||||
break;
|
||||
case UXXX:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
replaceuses(Fn *fn, Ref r1, Ref r2)
|
||||
{
|
||||
Tmp *t1;
|
||||
Use *u;
|
||||
|
||||
assert(rtype(r1) == RTmp);
|
||||
t1 = &fn->tmp[r1.val];
|
||||
for (u=t1->use; u<&t1->use[t1->nuse]; u++)
|
||||
replaceuse(fn, u, r1, r2);
|
||||
t1->nuse = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
dedupphi(Fn *fn, Blk *b)
|
||||
{
|
||||
Phi *p, **pp;
|
||||
Ref r;
|
||||
|
||||
for (pp=&b->phi; (p=*pp);) {
|
||||
r = phicopyref(fn, b, p);
|
||||
if (!req(r, R)) {
|
||||
replaceuses(fn, p->to, r);
|
||||
p->to = R;
|
||||
*pp = p->link;
|
||||
} else
|
||||
pp = &p->link;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rcmp(Ref a, Ref b)
|
||||
{
|
||||
if (rtype(a) != rtype(b))
|
||||
return rtype(a) - rtype(b);
|
||||
return a.val - b.val;
|
||||
}
|
||||
|
||||
static void
|
||||
normins(Fn *fn, Ins *i)
|
||||
{
|
||||
uint n;
|
||||
int64_t v;
|
||||
Ref r;
|
||||
|
||||
/* truncate constant bits to
|
||||
* 32 bits for s/w uses */
|
||||
for (n=0; n<2; n++) {
|
||||
if (!KWIDE(argcls(i, n)))
|
||||
if (isconbits(fn, i->arg[n], &v))
|
||||
if ((v & 0xffffffff) != v)
|
||||
i->arg[n] = getcon(v & 0xffffffff, fn);
|
||||
}
|
||||
/* order arg[0] <= arg[1] for
|
||||
* commutative ops, preferring
|
||||
* RTmp in arg[0] */
|
||||
if (optab[i->op].commutes)
|
||||
if (rcmp(i->arg[0], i->arg[1]) > 0) {
|
||||
r = i->arg[1];
|
||||
i->arg[1] = i->arg[0];
|
||||
i->arg[0] = r;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
negcon(int cls, Con *c)
|
||||
{
|
||||
static Con z = {.type = CBits, .bits.i = 0};
|
||||
|
||||
return foldint(c, Osub, cls, &z, c);
|
||||
}
|
||||
|
||||
static void
|
||||
assoccon(Fn *fn, Blk *b, Ins *i1)
|
||||
{
|
||||
Tmp *t2;
|
||||
Ins *i2;
|
||||
int op, fail;
|
||||
Con c, c1, c2;
|
||||
|
||||
op = i1->op;
|
||||
if (op == Osub)
|
||||
op = Oadd;
|
||||
|
||||
if (!optab[op].assoc
|
||||
|| KBASE(i1->cls) != 0
|
||||
|| rtype(i1->arg[0]) != RTmp
|
||||
|| rtype(i1->arg[1]) != RCon)
|
||||
return;
|
||||
c1 = fn->con[i1->arg[1].val];
|
||||
|
||||
t2 = &fn->tmp[i1->arg[0].val];
|
||||
if (t2->def == 0)
|
||||
return;
|
||||
i2 = t2->def;
|
||||
|
||||
if (op != (i2->op == Osub ? Oadd : i2->op)
|
||||
|| rtype(i2->arg[1]) != RCon)
|
||||
return;
|
||||
c2 = fn->con[i2->arg[1].val];
|
||||
|
||||
assert(KBASE(i2->cls) == 0);
|
||||
assert(KWIDE(i2->cls) >= KWIDE(i1->cls));
|
||||
|
||||
if (i1->op == Osub && negcon(i1->cls, &c1))
|
||||
return;
|
||||
if (i2->op == Osub && negcon(i2->cls, &c2))
|
||||
return;
|
||||
if (foldint(&c, op, i1->cls, &c1, &c2))
|
||||
return;
|
||||
|
||||
if (op == Oadd && c.type == CBits)
|
||||
if ((i1->cls == Kl && c.bits.i < 0)
|
||||
|| (i1->cls == Kw && (int32_t)c.bits.i < 0)) {
|
||||
fail = negcon(i1->cls, &c);
|
||||
assert(fail == 0);
|
||||
op = Osub;
|
||||
}
|
||||
|
||||
i1->op = op;
|
||||
i1->arg[0] = i2->arg[0];
|
||||
i1->arg[1] = newcon(&c, fn);
|
||||
adduse(&fn->tmp[i1->arg[0].val], UIns, b, i1);
|
||||
}
|
||||
|
||||
static void
|
||||
killins(Fn *fn, Ins *i, Ref r)
|
||||
{
|
||||
replaceuses(fn, i->to, r);
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
|
||||
static void
|
||||
dedupins(Fn *fn, Blk *b, Ins *i)
|
||||
{
|
||||
Ref r;
|
||||
Ins *i1;
|
||||
|
||||
normins(fn, i);
|
||||
if (i->op == Onop || pinned(i))
|
||||
return;
|
||||
|
||||
/* when sel instructions are inserted
|
||||
* before gvn, we may want to optimize
|
||||
* them here */
|
||||
assert(i->op != Osel0);
|
||||
assert(!req(i->to, R));
|
||||
assoccon(fn, b, i);
|
||||
|
||||
r = copyref(fn, b, i);
|
||||
if (!req(r, R)) {
|
||||
killins(fn, i, r);
|
||||
return;
|
||||
}
|
||||
r = foldref(fn, i);
|
||||
if (!req(r, R)) {
|
||||
killins(fn, i, r);
|
||||
return;
|
||||
}
|
||||
i1 = gvndup(i, 1);
|
||||
if (i1) {
|
||||
killins(fn, i, i1->to);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
cmpeqz(Fn *fn, Ref r, Ref *arg, int *cls, int *eqval)
|
||||
{
|
||||
Ins *i;
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return 0;
|
||||
i = fn->tmp[r.val].def;
|
||||
if (i)
|
||||
if (optab[i->op].cmpeqwl)
|
||||
if (req(i->arg[1], CON_Z)) {
|
||||
*arg = i->arg[0];
|
||||
*cls = argcls(i, 0);
|
||||
*eqval = optab[i->op].eqval;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
branchdom(Fn *fn, Blk *bif, Blk *bbr1, Blk *bbr2, Blk *b)
|
||||
{
|
||||
assert(bif->jmp.type == Jjnz);
|
||||
|
||||
if (b != bif
|
||||
&& dom(bbr1, b)
|
||||
&& !reachesnotvia(fn, bbr2, b, bif))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
domzero(Fn *fn, Blk *d, Blk *b, int *z)
|
||||
{
|
||||
if (branchdom(fn, d, d->s1, d->s2, b)) {
|
||||
*z = 0;
|
||||
return 1;
|
||||
}
|
||||
if (branchdom(fn, d, d->s2, d->s1, b)) {
|
||||
*z = 1;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* infer 0/non-0 value from dominating jnz */
|
||||
int
|
||||
zeroval(Fn *fn, Blk *b, Ref r, int cls, int *z)
|
||||
{
|
||||
Blk *d;
|
||||
Ref arg;
|
||||
int cls1, eqval;
|
||||
|
||||
for (d=b->idom; d; d=d->idom) {
|
||||
if (d->jmp.type != Jjnz)
|
||||
continue;
|
||||
if (req(r, d->jmp.arg)
|
||||
&& cls == Kw
|
||||
&& domzero(fn, d, b, z)) {
|
||||
return 1;
|
||||
}
|
||||
if (cmpeqz(fn, d->jmp.arg, &arg, &cls1, &eqval)
|
||||
&& req(r, arg)
|
||||
&& cls == cls1
|
||||
&& domzero(fn, d, b, z)) {
|
||||
*z ^= eqval;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
usecls(Use *u, Ref r, int cls)
|
||||
{
|
||||
int k;
|
||||
|
||||
switch (u->type) {
|
||||
case UIns:
|
||||
k = Kx; /* widest use */
|
||||
if (req(u->u.ins->arg[0], r))
|
||||
k = argcls(u->u.ins, 0);
|
||||
if (req(u->u.ins->arg[1], r))
|
||||
if (k == Kx || !KWIDE(k))
|
||||
k = argcls(u->u.ins, 1);
|
||||
return k == Kx ? cls : k;
|
||||
case UPhi:
|
||||
if (req(u->u.phi->to, R))
|
||||
return cls; /* eliminated */
|
||||
return u->u.phi->cls;
|
||||
case UJmp:
|
||||
return Kw;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
die("unreachable");
|
||||
}
|
||||
|
||||
static void
|
||||
propjnz0(Fn *fn, Blk *bif, Blk *s0, Blk *snon0, Ref r, int cls)
|
||||
{
|
||||
Blk *b;
|
||||
Tmp *t;
|
||||
Use *u;
|
||||
|
||||
if (s0->npred != 1 || rtype(r) != RTmp)
|
||||
return;
|
||||
t = &fn->tmp[r.val];
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
b = fn->rpo[u->bid];
|
||||
/* we may compare an l temp with a w
|
||||
* comparison; so check that the use
|
||||
* does not involve high bits */
|
||||
if (usecls(u, r, cls) == cls)
|
||||
if (branchdom(fn, bif, s0, snon0, b))
|
||||
replaceuse(fn, u, r, CON_Z);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dedupjmp(Fn *fn, Blk *b)
|
||||
{
|
||||
Blk **ps;
|
||||
int64_t v;
|
||||
Ref arg;
|
||||
int cls, eqval, z;
|
||||
|
||||
if (b->jmp.type != Jjnz)
|
||||
return;
|
||||
|
||||
/* propagate jmp arg as 0 through s2 */
|
||||
propjnz0(fn, b, b->s2, b->s1, b->jmp.arg, Kw);
|
||||
/* propagate cmp eq/ne 0 def of jmp arg as 0 */
|
||||
if (cmpeqz(fn, b->jmp.arg, &arg, &cls, &eqval)) {
|
||||
ps = (Blk*[]){b->s1, b->s2};
|
||||
propjnz0(fn, b, ps[eqval^1], ps[eqval], arg, cls);
|
||||
}
|
||||
|
||||
/* collapse trivial/constant jnz to jmp */
|
||||
v = 1;
|
||||
z = 0;
|
||||
if (b->s1 == b->s2
|
||||
|| isconbits(fn, b->jmp.arg, &v)
|
||||
|| zeroval(fn, b, b->jmp.arg, Kw, &z)) {
|
||||
if (v == 0 || z)
|
||||
b->s1 = b->s2;
|
||||
/* we later move active ins out of dead blks */
|
||||
b->s2 = 0;
|
||||
b->jmp.type = Jjmp;
|
||||
b->jmp.arg = R;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
rebuildcfg(Fn *fn)
|
||||
{
|
||||
uint n, nblk;
|
||||
Blk *b, *s, **rpo;
|
||||
Ins *i;
|
||||
|
||||
nblk = fn->nblk;
|
||||
rpo = emalloc(nblk * sizeof rpo[0]);
|
||||
memcpy(rpo, fn->rpo, nblk * sizeof rpo[0]);
|
||||
|
||||
fillcfg(fn);
|
||||
|
||||
/* move instructions that were in
|
||||
* killed blocks and may be active
|
||||
* in the computation in the start
|
||||
* block */
|
||||
s = fn->start;
|
||||
for (n=0; n<nblk; n++) {
|
||||
b = rpo[n];
|
||||
if (b->id != -1u)
|
||||
continue;
|
||||
/* blk unreachable after GVN */
|
||||
assert(b != s);
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (!optab[i->op].pinned)
|
||||
if (gvndup(i, 0) == i)
|
||||
addins(&s->ins, &s->nins, i);
|
||||
}
|
||||
free(rpo);
|
||||
}
|
||||
|
||||
/* requires rpo pred ssa use
|
||||
* recreates rpo preds
|
||||
* breaks pred use dom ssa (GCM fixes ssa)
|
||||
*/
|
||||
void
|
||||
gvn(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
uint n, nins;
|
||||
|
||||
con01[0] = getcon(0, fn);
|
||||
con01[1] = getcon(1, fn);
|
||||
|
||||
/* copy.c uses the visit bit */
|
||||
for (b=fn->start; b; b=b->link)
|
||||
for (p=b->phi; p; p=p->link)
|
||||
p->visit = 0;
|
||||
|
||||
fillloop(fn);
|
||||
narrowpars(fn);
|
||||
filluse(fn);
|
||||
ssacheck(fn);
|
||||
|
||||
nins = 0;
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
b->visit = 0;
|
||||
nins += b->nins;
|
||||
}
|
||||
|
||||
gvntbln = nins + nins/2;
|
||||
gvntbl = emalloc(gvntbln * sizeof gvntbl[0]);
|
||||
for (n=0; n<fn->nblk; n++) {
|
||||
b = fn->rpo[n];
|
||||
dedupphi(fn, b);
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
dedupins(fn, b, i);
|
||||
dedupjmp(fn, b);
|
||||
}
|
||||
rebuildcfg(fn);
|
||||
free(gvntbl);
|
||||
gvntbl = 0;
|
||||
|
||||
if (debug['G']) {
|
||||
fprintf(stderr, "\n> After GVN:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
121
src/qbe/ifopt.c
Normal file
121
src/qbe/ifopt.c
Normal file
@@ -0,0 +1,121 @@
|
||||
#include "all.h"
|
||||
|
||||
enum {
|
||||
MaxIns = 2,
|
||||
MaxPhis = 2,
|
||||
};
|
||||
|
||||
static int
|
||||
okbranch(Blk *b)
|
||||
{
|
||||
Ins *i;
|
||||
int n;
|
||||
|
||||
n = 0;
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op != Odbgloc) {
|
||||
if (pinned(i))
|
||||
return 0;
|
||||
if (i->op != Onop)
|
||||
n++;
|
||||
}
|
||||
return n <= MaxIns;
|
||||
}
|
||||
|
||||
static int
|
||||
okjoin(Blk *b)
|
||||
{
|
||||
Phi *p;
|
||||
int n;
|
||||
|
||||
n = 0;
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
if (KBASE(p->cls) != 0)
|
||||
return 0;
|
||||
n++;
|
||||
}
|
||||
return n <= MaxPhis;
|
||||
}
|
||||
|
||||
static int
|
||||
okgraph(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb)
|
||||
{
|
||||
if (joinb->npred != 2 || !okjoin(joinb))
|
||||
return 0;
|
||||
assert(thenb != elseb);
|
||||
if (thenb != ifb && !okbranch(thenb))
|
||||
return 0;
|
||||
if (elseb != ifb && !okbranch(elseb))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
convert(Blk *ifb, Blk *thenb, Blk *elseb, Blk *joinb)
|
||||
{
|
||||
Ins *ins, sel;
|
||||
Phi *p;
|
||||
uint nins;
|
||||
|
||||
ins = vnew(0, sizeof ins[0], PHeap);
|
||||
nins = 0;
|
||||
addbins(&ins, &nins, ifb);
|
||||
if (thenb != ifb)
|
||||
addbins(&ins, &nins, thenb);
|
||||
if (elseb != ifb)
|
||||
addbins(&ins, &nins, elseb);
|
||||
assert(joinb->npred == 2);
|
||||
if (joinb->phi) {
|
||||
sel = (Ins){
|
||||
.op = Osel0, .cls = Kw,
|
||||
.arg = {ifb->jmp.arg},
|
||||
};
|
||||
addins(&ins, &nins, &sel);
|
||||
}
|
||||
sel = (Ins){.op = Osel1};
|
||||
for (p=joinb->phi; p; p=p->link) {
|
||||
sel.to = p->to;
|
||||
sel.cls = p->cls;
|
||||
sel.arg[0] = phiarg(p, thenb);
|
||||
sel.arg[1] = phiarg(p, elseb);
|
||||
addins(&ins, &nins, &sel);
|
||||
}
|
||||
idup(ifb, ins, nins);
|
||||
ifb->jmp.type = Jjmp;
|
||||
ifb->jmp.arg = R;
|
||||
ifb->s1 = joinb;
|
||||
ifb->s2 = 0;
|
||||
joinb->npred = 1;
|
||||
joinb->pred[0] = ifb;
|
||||
joinb->phi = 0;
|
||||
vfree(ins);
|
||||
}
|
||||
|
||||
/* eliminate if-then[-else] graphlets
|
||||
* using sel instructions
|
||||
* needs rpo pred use; breaks cfg use
|
||||
*/
|
||||
void
|
||||
ifconvert(Fn *fn)
|
||||
{
|
||||
Blk *ifb, *thenb, *elseb, *joinb;
|
||||
|
||||
if (debug['K'])
|
||||
fputs("\n> If-conversion:\n", stderr);
|
||||
|
||||
for (ifb=fn->start; ifb; ifb=ifb->link)
|
||||
if (ifgraph(ifb, &thenb, &elseb, &joinb))
|
||||
if (okgraph(ifb, thenb, elseb, joinb)) {
|
||||
if (debug['K'])
|
||||
fprintf(stderr,
|
||||
" @%s -> @%s, @%s -> @%s\n",
|
||||
ifb->name, thenb->name, elseb->name,
|
||||
joinb->name);
|
||||
convert(ifb, thenb, elseb, joinb);
|
||||
}
|
||||
|
||||
if (debug['K']) {
|
||||
fprintf(stderr, "\n> After if-conversion:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
144
src/qbe/live.c
Normal file
144
src/qbe/live.c
Normal file
@@ -0,0 +1,144 @@
|
||||
#include "all.h"
|
||||
|
||||
void
|
||||
liveon(BSet *v, Blk *b, Blk *s)
|
||||
{
|
||||
Phi *p;
|
||||
uint a;
|
||||
|
||||
bscopy(v, s->in);
|
||||
for (p=s->phi; p; p=p->link)
|
||||
if (rtype(p->to) == RTmp)
|
||||
bsclr(v, p->to.val);
|
||||
for (p=s->phi; p; p=p->link)
|
||||
for (a=0; a<p->narg; a++)
|
||||
if (p->blk[a] == b)
|
||||
if (rtype(p->arg[a]) == RTmp) {
|
||||
bsset(v, p->arg[a].val);
|
||||
bsset(b->gen, p->arg[a].val);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bset(Ref r, Blk *b, int *nlv, Tmp *tmp)
|
||||
{
|
||||
|
||||
if (rtype(r) != RTmp)
|
||||
return;
|
||||
bsset(b->gen, r.val);
|
||||
if (!bshas(b->in, r.val)) {
|
||||
nlv[KBASE(tmp[r.val].cls)]++;
|
||||
bsset(b->in, r.val);
|
||||
}
|
||||
}
|
||||
|
||||
/* liveness analysis
|
||||
* requires rpo computation
|
||||
*/
|
||||
void
|
||||
filllive(Fn *f)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i;
|
||||
int k, t, m[2], n, chg, nlv[2];
|
||||
BSet u[1], v[1];
|
||||
Mem *ma;
|
||||
|
||||
bsinit(u, f->ntmp);
|
||||
bsinit(v, f->ntmp);
|
||||
for (b=f->start; b; b=b->link) {
|
||||
bsinit(b->in, f->ntmp);
|
||||
bsinit(b->out, f->ntmp);
|
||||
bsinit(b->gen, f->ntmp);
|
||||
}
|
||||
chg = 1;
|
||||
Again:
|
||||
for (n=f->nblk-1; n>=0; n--) {
|
||||
b = f->rpo[n];
|
||||
|
||||
bscopy(u, b->out);
|
||||
if (b->s1) {
|
||||
liveon(v, b, b->s1);
|
||||
bsunion(b->out, v);
|
||||
}
|
||||
if (b->s2) {
|
||||
liveon(v, b, b->s2);
|
||||
bsunion(b->out, v);
|
||||
}
|
||||
chg |= !bsequal(b->out, u);
|
||||
|
||||
memset(nlv, 0, sizeof nlv);
|
||||
b->out->t[0] |= T.rglob;
|
||||
bscopy(b->in, b->out);
|
||||
for (t=0; bsiter(b->in, &t); t++)
|
||||
nlv[KBASE(f->tmp[t].cls)]++;
|
||||
if (rtype(b->jmp.arg) == RCall) {
|
||||
assert((int)bscount(b->in) == T.nrglob &&
|
||||
b->in->t[0] == T.rglob);
|
||||
b->in->t[0] |= T.retregs(b->jmp.arg, nlv);
|
||||
} else
|
||||
bset(b->jmp.arg, b, nlv, f->tmp);
|
||||
for (k=0; k<2; k++)
|
||||
b->nlive[k] = nlv[k];
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
||||
if ((--i)->op == Ocall && rtype(i->arg[1]) == RCall) {
|
||||
b->in->t[0] &= ~T.retregs(i->arg[1], m);
|
||||
for (k=0; k<2; k++) {
|
||||
nlv[k] -= m[k];
|
||||
/* caller-save registers are used
|
||||
* by the callee, in that sense,
|
||||
* right in the middle of the call,
|
||||
* they are live: */
|
||||
nlv[k] += T.nrsave[k];
|
||||
if (nlv[k] > b->nlive[k])
|
||||
b->nlive[k] = nlv[k];
|
||||
}
|
||||
b->in->t[0] |= T.argregs(i->arg[1], m);
|
||||
for (k=0; k<2; k++) {
|
||||
nlv[k] -= T.nrsave[k];
|
||||
nlv[k] += m[k];
|
||||
}
|
||||
}
|
||||
if (!req(i->to, R)) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
t = i->to.val;
|
||||
if (bshas(b->in, t))
|
||||
nlv[KBASE(f->tmp[t].cls)]--;
|
||||
bsset(b->gen, t);
|
||||
bsclr(b->in, t);
|
||||
}
|
||||
for (k=0; k<2; k++)
|
||||
switch (rtype(i->arg[k])) {
|
||||
case RMem:
|
||||
ma = &f->mem[i->arg[k].val];
|
||||
bset(ma->base, b, nlv, f->tmp);
|
||||
bset(ma->index, b, nlv, f->tmp);
|
||||
break;
|
||||
default:
|
||||
bset(i->arg[k], b, nlv, f->tmp);
|
||||
break;
|
||||
}
|
||||
for (k=0; k<2; k++)
|
||||
if (nlv[k] > b->nlive[k])
|
||||
b->nlive[k] = nlv[k];
|
||||
}
|
||||
}
|
||||
if (chg) {
|
||||
chg = 0;
|
||||
goto Again;
|
||||
}
|
||||
|
||||
if (debug['L']) {
|
||||
fprintf(stderr, "\n> Liveness analysis:\n");
|
||||
for (b=f->start; b; b=b->link) {
|
||||
fprintf(stderr, "\t%-10sin: ", b->name);
|
||||
dumpts(b->in, f->tmp, stderr);
|
||||
fprintf(stderr, "\t out: ");
|
||||
dumpts(b->out, f->tmp, stderr);
|
||||
fprintf(stderr, "\t gen: ");
|
||||
dumpts(b->gen, f->tmp, stderr);
|
||||
fprintf(stderr, "\t live: ");
|
||||
fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
493
src/qbe/load.c
Normal file
493
src/qbe/load.c
Normal file
@@ -0,0 +1,493 @@
|
||||
#include "all.h"
|
||||
|
||||
#define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
|
||||
|
||||
typedef struct Loc Loc;
|
||||
typedef struct Slice Slice;
|
||||
typedef struct Insert Insert;
|
||||
|
||||
struct Loc {
|
||||
enum {
|
||||
LRoot, /* right above the original load */
|
||||
LLoad, /* inserting a load is allowed */
|
||||
LNoLoad, /* only scalar operations allowed */
|
||||
} type;
|
||||
uint off;
|
||||
Blk *blk;
|
||||
};
|
||||
|
||||
struct Slice {
|
||||
Ref ref;
|
||||
int off;
|
||||
short sz;
|
||||
short cls; /* load class */
|
||||
};
|
||||
|
||||
struct Insert {
|
||||
uint isphi:1;
|
||||
uint num:31;
|
||||
uint bid;
|
||||
uint off;
|
||||
union {
|
||||
Ins ins;
|
||||
struct {
|
||||
Slice m;
|
||||
Phi *p;
|
||||
} phi;
|
||||
} new;
|
||||
};
|
||||
|
||||
static Fn *curf;
|
||||
static uint inum; /* current insertion number */
|
||||
static Insert *ilog; /* global insertion log */
|
||||
static uint nlog; /* number of entries in the log */
|
||||
|
||||
int
|
||||
loadsz(Ins *l)
|
||||
{
|
||||
switch (l->op) {
|
||||
case Oloadsb: case Oloadub: return 1;
|
||||
case Oloadsh: case Oloaduh: return 2;
|
||||
case Oloadsw: case Oloaduw: return 4;
|
||||
case Oload: return KWIDE(l->cls) ? 8 : 4;
|
||||
}
|
||||
die("unreachable");
|
||||
}
|
||||
|
||||
int
|
||||
storesz(Ins *s)
|
||||
{
|
||||
switch (s->op) {
|
||||
case Ostoreb: return 1;
|
||||
case Ostoreh: return 2;
|
||||
case Ostorew: case Ostores: return 4;
|
||||
case Ostorel: case Ostored: return 8;
|
||||
}
|
||||
die("unreachable");
|
||||
}
|
||||
|
||||
static Ref
|
||||
iins(int cls, int op, Ref a0, Ref a1, Loc *l)
|
||||
{
|
||||
Insert *ist;
|
||||
|
||||
vgrow(&ilog, ++nlog);
|
||||
ist = &ilog[nlog-1];
|
||||
ist->isphi = 0;
|
||||
ist->num = inum++;
|
||||
ist->bid = l->blk->id;
|
||||
ist->off = l->off;
|
||||
ist->new.ins = (Ins){op, cls, R, {a0, a1}};
|
||||
return ist->new.ins.to = newtmp("ld", cls, curf);
|
||||
}
|
||||
|
||||
static void
|
||||
cast(Ref *r, int cls, Loc *l)
|
||||
{
|
||||
int cls0;
|
||||
|
||||
if (rtype(*r) == RCon)
|
||||
return;
|
||||
assert(rtype(*r) == RTmp);
|
||||
cls0 = curf->tmp[r->val].cls;
|
||||
if (cls0 == cls || (cls == Kw && cls0 == Kl))
|
||||
return;
|
||||
if (KWIDE(cls0) < KWIDE(cls)) {
|
||||
if (cls0 == Ks)
|
||||
*r = iins(Kw, Ocast, *r, R, l);
|
||||
*r = iins(Kl, Oextuw, *r, R, l);
|
||||
if (cls == Kd)
|
||||
*r = iins(Kd, Ocast, *r, R, l);
|
||||
} else {
|
||||
if (cls0 == Kd && cls != Kl)
|
||||
*r = iins(Kl, Ocast, *r, R, l);
|
||||
if (cls0 != Kd || cls != Kw)
|
||||
*r = iins(cls, Ocast, *r, R, l);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
mask(int cls, Ref *r, bits msk, Loc *l)
|
||||
{
|
||||
cast(r, cls, l);
|
||||
*r = iins(cls, Oand, *r, getcon(msk, curf), l);
|
||||
}
|
||||
|
||||
static Ref
|
||||
load(Slice sl, bits msk, Loc *l)
|
||||
{
|
||||
Alias *a;
|
||||
Ref r, r1;
|
||||
int ld, cls, all;
|
||||
Con c;
|
||||
|
||||
ld = (int[]){
|
||||
[1] = Oloadub,
|
||||
[2] = Oloaduh,
|
||||
[4] = Oloaduw,
|
||||
[8] = Oload
|
||||
}[sl.sz];
|
||||
all = msk == MASK(sl.sz);
|
||||
if (all)
|
||||
cls = sl.cls;
|
||||
else
|
||||
cls = sl.sz > 4 ? Kl : Kw;
|
||||
r = sl.ref;
|
||||
/* sl.ref might not be live here,
|
||||
* but its alias base ref will be
|
||||
* (see killsl() below) */
|
||||
if (rtype(r) == RTmp) {
|
||||
a = &curf->tmp[r.val].alias;
|
||||
switch (a->type) {
|
||||
default:
|
||||
die("unreachable");
|
||||
case ALoc:
|
||||
case AEsc:
|
||||
case AUnk:
|
||||
r = TMP(a->base);
|
||||
if (!a->offset)
|
||||
break;
|
||||
r1 = getcon(a->offset, curf);
|
||||
r = iins(Kl, Oadd, r, r1, l);
|
||||
break;
|
||||
case ACon:
|
||||
case ASym:
|
||||
memset(&c, 0, sizeof c);
|
||||
c.type = CAddr;
|
||||
c.sym = a->u.sym;
|
||||
c.bits.i = a->offset;
|
||||
r = newcon(&c, curf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
r = iins(cls, ld, r, R, l);
|
||||
if (!all)
|
||||
mask(cls, &r, msk, l);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
killsl(Ref r, Slice sl)
|
||||
{
|
||||
Alias *a;
|
||||
|
||||
if (rtype(sl.ref) != RTmp)
|
||||
return 0;
|
||||
a = &curf->tmp[sl.ref.val].alias;
|
||||
switch (a->type) {
|
||||
default: die("unreachable");
|
||||
case ALoc:
|
||||
case AEsc:
|
||||
case AUnk: return req(TMP(a->base), r);
|
||||
case ACon:
|
||||
case ASym: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* returns a ref containing the contents of the slice
|
||||
* passed as argument, all the bits set to 0 in the
|
||||
* mask argument are zeroed in the result;
|
||||
* the returned ref has an integer class when the
|
||||
* mask does not cover all the bits of the slice,
|
||||
* otherwise, it has class sl.cls
|
||||
* the procedure returns R when it fails */
|
||||
static Ref
|
||||
def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
|
||||
{
|
||||
Slice sl1;
|
||||
Blk *bp;
|
||||
bits msk1, msks;
|
||||
int off, cls, cls1, op, sz, ld;
|
||||
uint np, oldl, oldt;
|
||||
Ref r, r1;
|
||||
Phi *p;
|
||||
Insert *ist;
|
||||
Loc l;
|
||||
|
||||
/* invariants:
|
||||
* -1- b dominates il->blk; so we can use
|
||||
* temporaries of b in il->blk
|
||||
* -2- if il->type != LNoLoad, then il->blk
|
||||
* postdominates the original load; so it
|
||||
* is safe to load in il->blk
|
||||
* -3- if il->type != LNoLoad, then b
|
||||
* postdominates il->blk (and by 2, the
|
||||
* original load)
|
||||
*/
|
||||
assert(dom(b, il->blk));
|
||||
oldl = nlog;
|
||||
oldt = curf->ntmp;
|
||||
if (0) {
|
||||
Load:
|
||||
curf->ntmp = oldt;
|
||||
nlog = oldl;
|
||||
if (il->type != LLoad)
|
||||
return R;
|
||||
return load(sl, msk, il);
|
||||
}
|
||||
|
||||
if (!i)
|
||||
i = &b->ins[b->nins];
|
||||
cls = sl.sz > 4 ? Kl : Kw;
|
||||
msks = MASK(sl.sz);
|
||||
|
||||
while (i > b->ins) {
|
||||
--i;
|
||||
if (killsl(i->to, sl)
|
||||
|| (i->op == Ocall && escapes(sl.ref, curf)))
|
||||
goto Load;
|
||||
ld = isload(i->op);
|
||||
if (ld) {
|
||||
sz = loadsz(i);
|
||||
r1 = i->arg[0];
|
||||
r = i->to;
|
||||
} else if (isstore(i->op)) {
|
||||
sz = storesz(i);
|
||||
r1 = i->arg[1];
|
||||
r = i->arg[0];
|
||||
} else if (i->op == Oblit1) {
|
||||
assert(rtype(i->arg[0]) == RInt);
|
||||
sz = abs(rsval(i->arg[0]));
|
||||
assert(i > b->ins);
|
||||
--i;
|
||||
assert(i->op == Oblit0);
|
||||
r1 = i->arg[1];
|
||||
} else
|
||||
continue;
|
||||
switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) {
|
||||
case MustAlias:
|
||||
if (i->op == Oblit0) {
|
||||
sl1 = sl;
|
||||
sl1.ref = i->arg[0];
|
||||
if (off >= 0) {
|
||||
assert(off < sz);
|
||||
sl1.off = off;
|
||||
sz -= off;
|
||||
off = 0;
|
||||
} else {
|
||||
sl1.off = 0;
|
||||
sl1.sz += off;
|
||||
}
|
||||
if (sz > sl1.sz)
|
||||
sz = sl1.sz;
|
||||
assert(sz <= 8);
|
||||
sl1.sz = sz;
|
||||
}
|
||||
if (off < 0) {
|
||||
off = -off;
|
||||
msk1 = (MASK(sz) << 8*off) & msks;
|
||||
op = Oshl;
|
||||
} else {
|
||||
msk1 = (MASK(sz) >> 8*off) & msks;
|
||||
op = Oshr;
|
||||
}
|
||||
if ((msk1 & msk) == 0)
|
||||
continue;
|
||||
if (i->op == Oblit0) {
|
||||
r = def(sl1, MASK(sz), b, i, il);
|
||||
if (req(r, R))
|
||||
goto Load;
|
||||
}
|
||||
if (off) {
|
||||
cls1 = cls;
|
||||
if (op == Oshr && off + sl.sz > 4)
|
||||
cls1 = Kl;
|
||||
cast(&r, cls1, il);
|
||||
r1 = getcon(8*off, curf);
|
||||
r = iins(cls1, op, r, r1, il);
|
||||
}
|
||||
if ((msk1 & msk) != msk1 || off + sz < sl.sz)
|
||||
mask(cls, &r, msk1 & msk, il);
|
||||
if ((msk & ~msk1) != 0) {
|
||||
r1 = def(sl, msk & ~msk1, b, i, il);
|
||||
if (req(r1, R))
|
||||
goto Load;
|
||||
r = iins(cls, Oor, r, r1, il);
|
||||
}
|
||||
if (msk == msks)
|
||||
cast(&r, sl.cls, il);
|
||||
return r;
|
||||
case MayAlias:
|
||||
if (ld)
|
||||
continue;
|
||||
else
|
||||
goto Load;
|
||||
case NoAlias:
|
||||
continue;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
for (ist=ilog; ist<&ilog[nlog]; ++ist)
|
||||
if (ist->isphi && ist->bid == b->id)
|
||||
if (req(ist->new.phi.m.ref, sl.ref))
|
||||
if (ist->new.phi.m.off == sl.off)
|
||||
if (ist->new.phi.m.sz == sl.sz) {
|
||||
r = ist->new.phi.p->to;
|
||||
if (msk != msks)
|
||||
mask(cls, &r, msk, il);
|
||||
else
|
||||
cast(&r, sl.cls, il);
|
||||
return r;
|
||||
}
|
||||
|
||||
for (p=b->phi; p; p=p->link)
|
||||
if (killsl(p->to, sl))
|
||||
/* scanning predecessors in that
|
||||
* case would be unsafe */
|
||||
goto Load;
|
||||
|
||||
if (b->npred == 0)
|
||||
goto Load;
|
||||
if (b->npred == 1) {
|
||||
bp = b->pred[0];
|
||||
assert(bp->loop >= il->blk->loop);
|
||||
l = *il;
|
||||
if (bp->s2)
|
||||
l.type = LNoLoad;
|
||||
r1 = def(sl, msk, bp, 0, &l);
|
||||
if (req(r1, R))
|
||||
goto Load;
|
||||
return r1;
|
||||
}
|
||||
|
||||
r = newtmp("ld", sl.cls, curf);
|
||||
p = alloc(sizeof *p);
|
||||
vgrow(&ilog, ++nlog);
|
||||
ist = &ilog[nlog-1];
|
||||
ist->isphi = 1;
|
||||
ist->bid = b->id;
|
||||
ist->new.phi.m = sl;
|
||||
ist->new.phi.p = p;
|
||||
p->to = r;
|
||||
p->cls = sl.cls;
|
||||
p->narg = b->npred;
|
||||
p->arg = vnew(p->narg, sizeof p->arg[0], PFn);
|
||||
p->blk = vnew(p->narg, sizeof p->blk[0], PFn);
|
||||
for (np=0; np<b->npred; ++np) {
|
||||
bp = b->pred[np];
|
||||
if (!bp->s2
|
||||
&& il->type != LNoLoad
|
||||
&& bp->loop < il->blk->loop)
|
||||
l.type = LLoad;
|
||||
else
|
||||
l.type = LNoLoad;
|
||||
l.blk = bp;
|
||||
l.off = bp->nins;
|
||||
r1 = def(sl, msks, bp, 0, &l);
|
||||
if (req(r1, R))
|
||||
goto Load;
|
||||
p->arg[np] = r1;
|
||||
p->blk[np] = bp;
|
||||
/* XXX - multiplicity in predecessors!!! */
|
||||
}
|
||||
if (msk != msks)
|
||||
mask(cls, &r, msk, il);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
icmp(const void *pa, const void *pb)
|
||||
{
|
||||
Insert *a, *b;
|
||||
int c;
|
||||
|
||||
a = (Insert *)pa;
|
||||
b = (Insert *)pb;
|
||||
if ((c = a->bid - b->bid))
|
||||
return c;
|
||||
if (a->isphi && b->isphi)
|
||||
return 0;
|
||||
if (a->isphi)
|
||||
return -1;
|
||||
if (b->isphi)
|
||||
return +1;
|
||||
if ((c = a->off - b->off))
|
||||
return c;
|
||||
return a->num - b->num;
|
||||
}
|
||||
|
||||
/* require rpo ssa alias */
|
||||
void
|
||||
loadopt(Fn *fn)
|
||||
{
|
||||
Ins *i, *ib;
|
||||
Blk *b;
|
||||
int sz;
|
||||
uint n, ni, ext, nt;
|
||||
Insert *ist;
|
||||
Slice sl;
|
||||
Loc l;
|
||||
|
||||
curf = fn;
|
||||
ilog = vnew(0, sizeof ilog[0], PHeap);
|
||||
nlog = 0;
|
||||
inum = 0;
|
||||
for (b=fn->start; b; b=b->link)
|
||||
for (i=b->ins; i<&b->ins[b->nins]; ++i) {
|
||||
if (!isload(i->op))
|
||||
continue;
|
||||
sz = loadsz(i);
|
||||
sl = (Slice){i->arg[0], 0, sz, i->cls};
|
||||
l = (Loc){LRoot, i-b->ins, b};
|
||||
i->arg[1] = def(sl, MASK(sz), b, i, &l);
|
||||
}
|
||||
qsort(ilog, nlog, sizeof ilog[0], icmp);
|
||||
vgrow(&ilog, nlog+1);
|
||||
ilog[nlog].bid = fn->nblk; /* add a sentinel */
|
||||
ib = vnew(0, sizeof(Ins), PHeap);
|
||||
for (ist=ilog, n=0; n<fn->nblk; ++n) {
|
||||
b = fn->rpo[n];
|
||||
for (; ist->bid == n && ist->isphi; ++ist) {
|
||||
ist->new.phi.p->link = b->phi;
|
||||
b->phi = ist->new.phi.p;
|
||||
}
|
||||
ni = 0;
|
||||
nt = 0;
|
||||
for (;;) {
|
||||
if (ist->bid == n && ist->off == ni)
|
||||
i = &ist++->new.ins;
|
||||
else {
|
||||
if (ni == b->nins)
|
||||
break;
|
||||
i = &b->ins[ni++];
|
||||
if (isload(i->op)
|
||||
&& !req(i->arg[1], R)) {
|
||||
ext = Oextsb + i->op - Oloadsb;
|
||||
switch (i->op) {
|
||||
default:
|
||||
die("unreachable");
|
||||
case Oloadsb:
|
||||
case Oloadub:
|
||||
case Oloadsh:
|
||||
case Oloaduh:
|
||||
i->op = ext;
|
||||
break;
|
||||
case Oloadsw:
|
||||
case Oloaduw:
|
||||
if (i->cls == Kl) {
|
||||
i->op = ext;
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
case Oload:
|
||||
i->op = Ocopy;
|
||||
break;
|
||||
}
|
||||
i->arg[0] = i->arg[1];
|
||||
i->arg[1] = R;
|
||||
}
|
||||
}
|
||||
vgrow(&ib, ++nt);
|
||||
ib[nt-1] = *i;
|
||||
}
|
||||
idup(b, ib, nt);
|
||||
}
|
||||
vfree(ib);
|
||||
vfree(ilog);
|
||||
if (debug['M']) {
|
||||
fprintf(stderr, "\n> After load elimination:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
212
src/qbe/main.c
Normal file
212
src/qbe/main.c
Normal file
@@ -0,0 +1,212 @@
|
||||
#include "all.h"
|
||||
#include "config.h"
|
||||
#include <ctype.h>
|
||||
#include <getopt.h>
|
||||
|
||||
Target T;
|
||||
|
||||
char debug['Z'+1] = {
|
||||
['P'] = 0, /* parsing */
|
||||
['M'] = 0, /* memory optimization */
|
||||
['N'] = 0, /* ssa construction */
|
||||
['C'] = 0, /* copy elimination */
|
||||
['F'] = 0, /* constant folding */
|
||||
['K'] = 0, /* if-conversion */
|
||||
['A'] = 0, /* abi lowering */
|
||||
['I'] = 0, /* instruction selection */
|
||||
['L'] = 0, /* liveness */
|
||||
['S'] = 0, /* spilling */
|
||||
['R'] = 0, /* reg. allocation */
|
||||
};
|
||||
|
||||
extern Target T_amd64_sysv;
|
||||
extern Target T_amd64_apple;
|
||||
extern Target T_amd64_win;
|
||||
extern Target T_arm64;
|
||||
extern Target T_arm64_apple;
|
||||
extern Target T_rv64;
|
||||
|
||||
static Target *tlist[] = {
|
||||
&T_amd64_sysv,
|
||||
&T_amd64_apple,
|
||||
&T_amd64_win,
|
||||
&T_arm64,
|
||||
&T_arm64_apple,
|
||||
&T_rv64,
|
||||
0
|
||||
};
|
||||
static FILE *outf;
|
||||
static int dbg;
|
||||
|
||||
static void
|
||||
data(Dat *d)
|
||||
{
|
||||
if (dbg)
|
||||
return;
|
||||
emitdat(d, outf);
|
||||
if (d->type == DEnd) {
|
||||
fputs("/* end data */\n\n", outf);
|
||||
freeall();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
func(Fn *fn)
|
||||
{
|
||||
uint n;
|
||||
|
||||
if (dbg)
|
||||
fprintf(stderr, "**** Function %s ****", fn->name);
|
||||
if (debug['P']) {
|
||||
fprintf(stderr, "\n> After parsing:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
T.abi0(fn);
|
||||
fillcfg(fn);
|
||||
filluse(fn);
|
||||
promote(fn);
|
||||
filluse(fn);
|
||||
ssa(fn);
|
||||
filluse(fn);
|
||||
ssacheck(fn);
|
||||
fillalias(fn);
|
||||
loadopt(fn);
|
||||
filluse(fn);
|
||||
fillalias(fn);
|
||||
coalesce(fn);
|
||||
filluse(fn);
|
||||
filldom(fn);
|
||||
ssacheck(fn);
|
||||
gvn(fn);
|
||||
fillcfg(fn);
|
||||
simplcfg(fn);
|
||||
filluse(fn);
|
||||
filldom(fn);
|
||||
gcm(fn);
|
||||
filluse(fn);
|
||||
ssacheck(fn);
|
||||
if (T.cansel) {
|
||||
ifconvert(fn);
|
||||
fillcfg(fn);
|
||||
filluse(fn);
|
||||
filldom(fn);
|
||||
ssacheck(fn);
|
||||
}
|
||||
T.abi1(fn);
|
||||
simpl(fn);
|
||||
fillcfg(fn);
|
||||
filluse(fn);
|
||||
T.isel(fn);
|
||||
fillcfg(fn);
|
||||
filllive(fn);
|
||||
fillloop(fn);
|
||||
fillcost(fn);
|
||||
spill(fn);
|
||||
rega(fn);
|
||||
fillcfg(fn);
|
||||
simpljmp(fn);
|
||||
fillcfg(fn);
|
||||
assert(fn->rpo[0] == fn->start);
|
||||
for (n=0;; n++)
|
||||
if (n == fn->nblk-1) {
|
||||
fn->rpo[n]->link = 0;
|
||||
break;
|
||||
} else
|
||||
fn->rpo[n]->link = fn->rpo[n+1];
|
||||
if (!dbg) {
|
||||
T.emitfn(fn, outf);
|
||||
fprintf(outf, "/* end function %s */\n\n", fn->name);
|
||||
} else
|
||||
fprintf(stderr, "\n");
|
||||
freeall();
|
||||
}
|
||||
|
||||
static void
|
||||
dbgfile(char *fn)
|
||||
{
|
||||
emitdbgfile(fn, outf);
|
||||
}
|
||||
|
||||
int
|
||||
main(int ac, char *av[])
|
||||
{
|
||||
Target **t;
|
||||
FILE *inf, *hf;
|
||||
char *f, *sep;
|
||||
int c;
|
||||
|
||||
T = Deftgt;
|
||||
outf = stdout;
|
||||
while ((c = getopt(ac, av, "hd:o:t:")) != -1)
|
||||
switch (c) {
|
||||
case 'd':
|
||||
for (; *optarg; optarg++)
|
||||
if (isalpha(*optarg)) {
|
||||
debug[toupper(*optarg)] = 1;
|
||||
dbg = 1;
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if (strcmp(optarg, "-") != 0) {
|
||||
outf = fopen(optarg, "w");
|
||||
if (!outf) {
|
||||
fprintf(stderr, "cannot open '%s'\n", optarg);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if (strcmp(optarg, "?") == 0) {
|
||||
puts(T.name);
|
||||
exit(0);
|
||||
}
|
||||
for (t=tlist;; t++) {
|
||||
if (!*t) {
|
||||
fprintf(stderr, "unknown target '%s'\n", optarg);
|
||||
exit(1);
|
||||
}
|
||||
if (strcmp(optarg, (*t)->name) == 0) {
|
||||
T = **t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
hf = c != 'h' ? stderr : stdout;
|
||||
fprintf(hf, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
|
||||
fprintf(hf, "\t%-11s prints this help\n", "-h");
|
||||
fprintf(hf, "\t%-11s output to file\n", "-o file");
|
||||
fprintf(hf, "\t%-11s generate for a target among:\n", "-t <target>");
|
||||
fprintf(hf, "\t%-11s ", "");
|
||||
for (t=tlist, sep=""; *t; t++, sep=", ") {
|
||||
fprintf(hf, "%s%s", sep, (*t)->name);
|
||||
if (*t == &Deftgt)
|
||||
fputs(" (default)", hf);
|
||||
}
|
||||
fprintf(hf, "\n");
|
||||
fprintf(hf, "\t%-11s dump debug information\n", "-d <flags>");
|
||||
exit(c != 'h');
|
||||
}
|
||||
|
||||
do {
|
||||
f = av[optind];
|
||||
if (!f || strcmp(f, "-") == 0) {
|
||||
inf = stdin;
|
||||
f = "-";
|
||||
} else {
|
||||
inf = fopen(f, "r");
|
||||
if (!inf) {
|
||||
fprintf(stderr, "cannot open '%s'\n", f);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
parse(inf, f, dbgfile, data, func);
|
||||
fclose(inf);
|
||||
} while (++optind < ac);
|
||||
|
||||
if (!dbg)
|
||||
T.emitfin(outf);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
488
src/qbe/mem.c
Normal file
488
src/qbe/mem.c
Normal file
@@ -0,0 +1,488 @@
|
||||
#include "all.h"
|
||||
|
||||
typedef struct Range Range;
|
||||
typedef struct Store Store;
|
||||
typedef struct Slot Slot;
|
||||
|
||||
/* require use, maintains use counts */
|
||||
void
|
||||
promote(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i, *l;
|
||||
Tmp *t;
|
||||
Use *u, *ue;
|
||||
int s, k;
|
||||
|
||||
/* promote uniform stack slots to temporaries */
|
||||
b = fn->start;
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (Oalloc > i->op || i->op > Oalloc1)
|
||||
continue;
|
||||
/* specific to NAlign == 3 */
|
||||
assert(rtype(i->to) == RTmp);
|
||||
t = &fn->tmp[i->to.val];
|
||||
if (t->ndef != 1)
|
||||
goto Skip;
|
||||
k = -1;
|
||||
s = -1;
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
if (u->type != UIns)
|
||||
goto Skip;
|
||||
l = u->u.ins;
|
||||
if (isload(l->op))
|
||||
if (s == -1 || s == loadsz(l)) {
|
||||
s = loadsz(l);
|
||||
continue;
|
||||
}
|
||||
if (isstore(l->op))
|
||||
if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0]))
|
||||
if (s == -1 || s == storesz(l))
|
||||
if (k == -1 || k == optab[l->op].argcls[0][0]) {
|
||||
s = storesz(l);
|
||||
k = optab[l->op].argcls[0][0];
|
||||
continue;
|
||||
}
|
||||
goto Skip;
|
||||
}
|
||||
/* get rid of the alloc and replace uses */
|
||||
*i = (Ins){.op = Onop};
|
||||
t->ndef--;
|
||||
ue = &t->use[t->nuse];
|
||||
for (u=t->use; u!=ue; u++) {
|
||||
l = u->u.ins;
|
||||
if (isstore(l->op)) {
|
||||
l->cls = k;
|
||||
l->op = Ocopy;
|
||||
l->to = l->arg[1];
|
||||
l->arg[1] = R;
|
||||
t->nuse--;
|
||||
t->ndef++;
|
||||
} else {
|
||||
if (k == -1)
|
||||
err("slot %%%s is read but never stored to",
|
||||
fn->tmp[l->arg[0].val].name);
|
||||
/* try to turn loads into copies so we
|
||||
* can eliminate them later */
|
||||
switch(l->op) {
|
||||
case Oloadsw:
|
||||
case Oloaduw:
|
||||
if (k == Kl)
|
||||
goto Extend;
|
||||
/* fall through */
|
||||
case Oload:
|
||||
if (KBASE(k) != KBASE(l->cls))
|
||||
l->op = Ocast;
|
||||
else
|
||||
l->op = Ocopy;
|
||||
break;
|
||||
default:
|
||||
Extend:
|
||||
l->op = Oextsb + (l->op - Oloadsb);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Skip:;
|
||||
}
|
||||
if (debug['M']) {
|
||||
fprintf(stderr, "\n> After slot promotion:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
|
||||
/* [a, b) with 0 <= a */
|
||||
struct Range {
|
||||
int a, b;
|
||||
};
|
||||
|
||||
struct Store {
|
||||
int ip;
|
||||
Ins *i;
|
||||
};
|
||||
|
||||
struct Slot {
|
||||
int t;
|
||||
int sz;
|
||||
bits m;
|
||||
bits l;
|
||||
Range r;
|
||||
Slot *s;
|
||||
Store *st;
|
||||
int nst;
|
||||
};
|
||||
|
||||
static inline int
|
||||
rin(Range r, int n)
|
||||
{
|
||||
return r.a <= n && n < r.b;
|
||||
}
|
||||
|
||||
static inline int
|
||||
rovlap(Range r0, Range r1)
|
||||
{
|
||||
return r0.b && r1.b && r0.a < r1.b && r1.a < r0.b;
|
||||
}
|
||||
|
||||
static void
|
||||
radd(Range *r, int n)
|
||||
{
|
||||
if (!r->b)
|
||||
*r = (Range){n, n+1};
|
||||
else if (n < r->a)
|
||||
r->a = n;
|
||||
else if (n >= r->b)
|
||||
r->b = n+1;
|
||||
}
|
||||
|
||||
static int
|
||||
slot(Slot **ps, int64_t *off, Ref r, Fn *fn, Slot *sl)
|
||||
{
|
||||
Alias a;
|
||||
Tmp *t;
|
||||
|
||||
getalias(&a, r, fn);
|
||||
if (a.type != ALoc)
|
||||
return 0;
|
||||
t = &fn->tmp[a.base];
|
||||
if (t->visit < 0)
|
||||
return 0;
|
||||
*off = a.offset;
|
||||
*ps = &sl[t->visit];
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
load(Ref r, bits x, int ip, Fn *fn, Slot *sl)
|
||||
{
|
||||
int64_t off;
|
||||
Slot *s;
|
||||
|
||||
if (slot(&s, &off, r, fn, sl)) {
|
||||
s->l |= x << off;
|
||||
s->l &= s->m;
|
||||
if (s->l)
|
||||
radd(&s->r, ip);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
store(Ref r, bits x, int ip, Ins *i, Fn *fn, Slot *sl)
|
||||
{
|
||||
int64_t off;
|
||||
Slot *s;
|
||||
|
||||
if (slot(&s, &off, r, fn, sl)) {
|
||||
if (s->l) {
|
||||
radd(&s->r, ip);
|
||||
s->l &= ~(x << off);
|
||||
} else {
|
||||
vgrow(&s->st, ++s->nst);
|
||||
s->st[s->nst-1].ip = ip;
|
||||
s->st[s->nst-1].i = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
scmp(const void *pa, const void *pb)
|
||||
{
|
||||
Slot *a, *b;
|
||||
|
||||
a = (Slot *)pa, b = (Slot *)pb;
|
||||
if (a->sz != b->sz)
|
||||
return b->sz - a->sz;
|
||||
return a->r.a - b->r.a;
|
||||
}
|
||||
|
||||
static void
|
||||
maxrpo(Blk *hd, Blk *b)
|
||||
{
|
||||
if (hd->loop < (int)b->id)
|
||||
hd->loop = b->id;
|
||||
}
|
||||
|
||||
void
|
||||
coalesce(Fn *fn)
|
||||
{
|
||||
Range r, *br;
|
||||
Slot *s, *s0, *sl;
|
||||
Blk *b, **ps, *succ[3];
|
||||
Ins *i, **bl;
|
||||
Use *u;
|
||||
Tmp *t, *ts;
|
||||
Ref *arg;
|
||||
bits x;
|
||||
int64_t off0, off1;
|
||||
int n, m, ip, sz, nsl, nbl, *stk;
|
||||
uint total, freed, fused;
|
||||
|
||||
/* minimize the stack usage
|
||||
* by coalescing slots
|
||||
*/
|
||||
nsl = 0;
|
||||
sl = vnew(0, sizeof sl[0], PHeap);
|
||||
for (n=Tmp0; n<fn->ntmp; n++) {
|
||||
t = &fn->tmp[n];
|
||||
t->visit = -1;
|
||||
if (t->alias.type == ALoc)
|
||||
if (t->alias.slot == &t->alias)
|
||||
if (t->bid == fn->start->id)
|
||||
if (t->alias.u.loc.sz != -1) {
|
||||
t->visit = nsl;
|
||||
vgrow(&sl, ++nsl);
|
||||
s = &sl[nsl-1];
|
||||
s->t = n;
|
||||
s->sz = t->alias.u.loc.sz;
|
||||
s->m = t->alias.u.loc.m;
|
||||
s->s = 0;
|
||||
s->st = vnew(0, sizeof s->st[0], PHeap);
|
||||
s->nst = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* one-pass liveness analysis */
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->loop = -1;
|
||||
loopiter(fn, maxrpo);
|
||||
nbl = 0;
|
||||
bl = vnew(0, sizeof bl[0], PHeap);
|
||||
br = emalloc(fn->nblk * sizeof br[0]);
|
||||
ip = INT_MAX - 1;
|
||||
for (n=fn->nblk-1; n>=0; n--) {
|
||||
b = fn->rpo[n];
|
||||
succ[0] = b->s1;
|
||||
succ[1] = b->s2;
|
||||
succ[2] = 0;
|
||||
br[n].b = ip--;
|
||||
for (s=sl; s<&sl[nsl]; s++) {
|
||||
s->l = 0;
|
||||
for (ps=succ; *ps; ps++) {
|
||||
m = (*ps)->id;
|
||||
if (m > n && rin(s->r, br[m].a)) {
|
||||
s->l = s->m;
|
||||
radd(&s->r, ip);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (b->jmp.type == Jretc)
|
||||
load(b->jmp.arg, -1, --ip, fn, sl);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
||||
--i;
|
||||
arg = i->arg;
|
||||
if (i->op == Oargc) {
|
||||
load(arg[1], -1, --ip, fn, sl);
|
||||
}
|
||||
if (isload(i->op)) {
|
||||
x = BIT(loadsz(i)) - 1;
|
||||
load(arg[0], x, --ip, fn, sl);
|
||||
}
|
||||
if (isstore(i->op)) {
|
||||
x = BIT(storesz(i)) - 1;
|
||||
store(arg[1], x, ip--, i, fn, sl);
|
||||
}
|
||||
if (i->op == Oblit0) {
|
||||
assert((i+1)->op == Oblit1);
|
||||
assert(rtype((i+1)->arg[0]) == RInt);
|
||||
sz = abs(rsval((i+1)->arg[0]));
|
||||
x = sz >= NBit ? (bits)-1 : BIT(sz) - 1;
|
||||
store(arg[1], x, ip--, i, fn, sl);
|
||||
load(arg[0], x, ip, fn, sl);
|
||||
vgrow(&bl, ++nbl);
|
||||
bl[nbl-1] = i;
|
||||
}
|
||||
}
|
||||
for (s=sl; s<&sl[nsl]; s++)
|
||||
if (s->l) {
|
||||
radd(&s->r, ip);
|
||||
if (b->loop != -1) {
|
||||
assert(b->loop >= n);
|
||||
radd(&s->r, br[b->loop].b - 1);
|
||||
}
|
||||
}
|
||||
br[n].a = ip;
|
||||
}
|
||||
free(br);
|
||||
|
||||
/* kill dead stores */
|
||||
for (s=sl; s<&sl[nsl]; s++)
|
||||
for (n=0; n<s->nst; n++)
|
||||
if (!rin(s->r, s->st[n].ip)) {
|
||||
i = s->st[n].i;
|
||||
if (i->op == Oblit0)
|
||||
*(i+1) = (Ins){.op = Onop};
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
|
||||
/* kill slots with an empty live range */
|
||||
total = 0;
|
||||
freed = 0;
|
||||
stk = vnew(0, sizeof stk[0], PHeap);
|
||||
n = 0;
|
||||
for (s=s0=sl; s<&sl[nsl]; s++) {
|
||||
total += s->sz;
|
||||
if (!s->r.b) {
|
||||
vfree(s->st);
|
||||
vgrow(&stk, ++n);
|
||||
stk[n-1] = s->t;
|
||||
freed += s->sz;
|
||||
} else
|
||||
*s0++ = *s;
|
||||
}
|
||||
nsl = s0-sl;
|
||||
if (debug['M']) {
|
||||
fputs("\n> Slot coalescing:\n", stderr);
|
||||
if (n) {
|
||||
fputs("\tkill [", stderr);
|
||||
for (m=0; m<n; m++)
|
||||
fprintf(stderr, " %%%s",
|
||||
fn->tmp[stk[m]].name);
|
||||
fputs(" ]\n", stderr);
|
||||
}
|
||||
}
|
||||
while (n--) {
|
||||
t = &fn->tmp[stk[n]];
|
||||
assert(t->ndef == 1 && t->def);
|
||||
i = t->def;
|
||||
if (isload(i->op)) {
|
||||
i->op = Ocopy;
|
||||
i->arg[0] = UNDEF;
|
||||
continue;
|
||||
}
|
||||
*i = (Ins){.op = Onop};
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
if (u->type == UJmp) {
|
||||
b = fn->rpo[u->bid];
|
||||
assert(isret(b->jmp.type));
|
||||
b->jmp.type = Jret0;
|
||||
b->jmp.arg = R;
|
||||
continue;
|
||||
}
|
||||
assert(u->type == UIns);
|
||||
i = u->u.ins;
|
||||
if (!req(i->to, R)) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
vgrow(&stk, ++n);
|
||||
stk[n-1] = i->to.val;
|
||||
} else if (isarg(i->op)) {
|
||||
assert(i->op == Oargc);
|
||||
i->arg[1] = CON_Z; /* crash */
|
||||
} else {
|
||||
if (i->op == Oblit0)
|
||||
*(i+1) = (Ins){.op = Onop};
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
}
|
||||
}
|
||||
vfree(stk);
|
||||
|
||||
/* fuse slots by decreasing size */
|
||||
qsort(sl, nsl, sizeof *sl, scmp);
|
||||
fused = 0;
|
||||
for (n=0; n<nsl; n++) {
|
||||
s0 = &sl[n];
|
||||
if (s0->s)
|
||||
continue;
|
||||
s0->s = s0;
|
||||
r = s0->r;
|
||||
for (s=s0+1; s<&sl[nsl]; s++) {
|
||||
if (s->s || !s->r.b)
|
||||
goto Skip;
|
||||
if (rovlap(r, s->r))
|
||||
/* O(n); can be approximated
|
||||
* by 'goto Skip;' if need be
|
||||
*/
|
||||
for (m=n; &sl[m]<s; m++)
|
||||
if (sl[m].s == s0)
|
||||
if (rovlap(sl[m].r, s->r))
|
||||
goto Skip;
|
||||
radd(&r, s->r.a);
|
||||
radd(&r, s->r.b - 1);
|
||||
s->s = s0;
|
||||
fused += s->sz;
|
||||
Skip:;
|
||||
}
|
||||
}
|
||||
|
||||
/* substitute fused slots */
|
||||
for (s=sl; s<&sl[nsl]; s++) {
|
||||
t = &fn->tmp[s->t];
|
||||
/* the visit link is stale,
|
||||
* reset it before the slot()
|
||||
* calls below
|
||||
*/
|
||||
t->visit = s-sl;
|
||||
assert(t->ndef == 1 && t->def);
|
||||
if (s->s == s)
|
||||
continue;
|
||||
*t->def = (Ins){.op = Onop};
|
||||
ts = &fn->tmp[s->s->t];
|
||||
assert(t->bid == ts->bid);
|
||||
if (t->def < ts->def) {
|
||||
/* make sure the slot we
|
||||
* selected has a def that
|
||||
* dominates its new uses
|
||||
*/
|
||||
*t->def = *ts->def;
|
||||
*ts->def = (Ins){.op = Onop};
|
||||
ts->def = t->def;
|
||||
}
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
if (u->type == UJmp) {
|
||||
b = fn->rpo[u->bid];
|
||||
b->jmp.arg = TMP(s->s->t);
|
||||
continue;
|
||||
}
|
||||
assert(u->type == UIns);
|
||||
arg = u->u.ins->arg;
|
||||
for (n=0; n<2; n++)
|
||||
if (req(arg[n], TMP(s->t)))
|
||||
arg[n] = TMP(s->s->t);
|
||||
}
|
||||
}
|
||||
|
||||
/* fix newly overlapping blits */
|
||||
for (n=0; n<nbl; n++) {
|
||||
i = bl[n];
|
||||
if (i->op == Oblit0)
|
||||
if (slot(&s, &off0, i->arg[0], fn, sl))
|
||||
if (slot(&s0, &off1, i->arg[1], fn, sl))
|
||||
if (s->s == s0->s) {
|
||||
if (off0 < off1) {
|
||||
sz = rsval((i+1)->arg[0]);
|
||||
assert(sz >= 0);
|
||||
(i+1)->arg[0] = INT(-sz);
|
||||
} else if (off0 == off1) {
|
||||
*i = (Ins){.op = Onop};
|
||||
*(i+1) = (Ins){.op = Onop};
|
||||
}
|
||||
}
|
||||
}
|
||||
vfree(bl);
|
||||
|
||||
if (debug['M']) {
|
||||
for (s0=sl; s0<&sl[nsl]; s0++) {
|
||||
if (s0->s != s0)
|
||||
continue;
|
||||
fprintf(stderr, "\tfuse (% 3db) [", s0->sz);
|
||||
for (s=s0; s<&sl[nsl]; s++) {
|
||||
if (s->s != s0)
|
||||
continue;
|
||||
fprintf(stderr, " %%%s", fn->tmp[s->t].name);
|
||||
if (s->r.b)
|
||||
fprintf(stderr, "[%d,%d)",
|
||||
s->r.a-ip, s->r.b-ip);
|
||||
else
|
||||
fputs("{}", stderr);
|
||||
}
|
||||
fputs(" ]\n", stderr);
|
||||
}
|
||||
fprintf(stderr, "\tsums %u/%u/%u (killed/fused/total)\n\n",
|
||||
freed, fused, total);
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
|
||||
for (s=sl; s<&sl[nsl]; s++)
|
||||
vfree(s->st);
|
||||
vfree(sl);
|
||||
}
|
||||
4
src/qbe/minic/.gitignore
vendored
Normal file
4
src/qbe/minic/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
minic
|
||||
yacc
|
||||
y.*
|
||||
*.out
|
||||
12
src/qbe/minic/Makefile
Normal file
12
src/qbe/minic/Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
BIN = minic
|
||||
|
||||
CFLAGS += -g -Wall
|
||||
|
||||
$(BIN): yacc minic.y
|
||||
./yacc minic.y
|
||||
$(CC) $(CFLAGS) -o $@ y.tab.c
|
||||
|
||||
clean:
|
||||
rm -f yacc minic y.*
|
||||
|
||||
.PHONY: clean
|
||||
44
src/qbe/minic/mcc
Executable file
44
src/qbe/minic/mcc
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/bin/sh
|
||||
|
||||
DIR=`cd $(dirname $0); pwd`
|
||||
QBE=$DIR/../qbe
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: mcc [LDFLAGS] file.c" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
for i
|
||||
do
|
||||
case $i in
|
||||
-*)
|
||||
flags="$flags $i"
|
||||
;;
|
||||
*)
|
||||
if ! test -z $file
|
||||
then
|
||||
usage
|
||||
fi
|
||||
file=$i
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if test -z $file
|
||||
then
|
||||
usage
|
||||
fi
|
||||
|
||||
|
||||
$DIR/minic < $file > /tmp/minic.ssa &&
|
||||
$QBE < /tmp/minic.ssa > /tmp/minic.s &&
|
||||
cc /tmp/minic.s $flags
|
||||
|
||||
if test $? -ne 0
|
||||
then
|
||||
echo "error processing file $file" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
951
src/qbe/minic/minic.y
Normal file
951
src/qbe/minic/minic.y
Normal file
@@ -0,0 +1,951 @@
|
||||
%{
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
enum {
|
||||
NString = 32,
|
||||
NGlo = 256,
|
||||
NVar = 512,
|
||||
NStr = 256,
|
||||
};
|
||||
|
||||
enum { /* minic types */
|
||||
NIL,
|
||||
INT,
|
||||
LNG,
|
||||
PTR,
|
||||
FUN,
|
||||
};
|
||||
|
||||
#define IDIR(x) (((x) << 3) + PTR)
|
||||
#define FUNC(x) (((x) << 3) + FUN)
|
||||
#define DREF(x) ((x) >> 3)
|
||||
#define KIND(x) ((x) & 7)
|
||||
#define SIZE(x) \
|
||||
(x == NIL ? (die("void has no size"), 0) : \
|
||||
x == INT ? 4 : 8)
|
||||
|
||||
typedef struct Node Node;
|
||||
typedef struct Symb Symb;
|
||||
typedef struct Stmt Stmt;
|
||||
|
||||
struct Symb {
|
||||
enum {
|
||||
Con,
|
||||
Tmp,
|
||||
Var,
|
||||
Glo,
|
||||
} t;
|
||||
union {
|
||||
int n;
|
||||
char v[NString];
|
||||
} u;
|
||||
unsigned long ctyp;
|
||||
};
|
||||
|
||||
struct Node {
|
||||
char op;
|
||||
union {
|
||||
int n;
|
||||
char v[NString];
|
||||
Symb s;
|
||||
} u;
|
||||
Node *l, *r;
|
||||
};
|
||||
|
||||
struct Stmt {
|
||||
enum {
|
||||
If,
|
||||
While,
|
||||
Seq,
|
||||
Expr,
|
||||
Break,
|
||||
Ret,
|
||||
} t;
|
||||
void *p1, *p2, *p3;
|
||||
};
|
||||
|
||||
int yylex(void), yyerror(char *);
|
||||
Symb expr(Node *), lval(Node *);
|
||||
void branch(Node *, int, int);
|
||||
|
||||
FILE *of;
|
||||
int line;
|
||||
int lbl, tmp, nglo;
|
||||
char *ini[NGlo];
|
||||
struct {
|
||||
char v[NString];
|
||||
unsigned ctyp;
|
||||
int glo;
|
||||
} varh[NVar];
|
||||
|
||||
void
|
||||
die(char *s)
|
||||
{
|
||||
fprintf(stderr, "error:%d: %s\n", line, s);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void *
|
||||
alloc(size_t s)
|
||||
{
|
||||
void *p;
|
||||
|
||||
p = malloc(s);
|
||||
if (!p)
|
||||
die("out of memory");
|
||||
return p;
|
||||
}
|
||||
|
||||
unsigned
|
||||
hash(char *s)
|
||||
{
|
||||
unsigned h;
|
||||
|
||||
h = 42;
|
||||
while (*s)
|
||||
h += 11 * h + *s++;
|
||||
return h % NVar;
|
||||
}
|
||||
|
||||
void
|
||||
varclr()
|
||||
{
|
||||
unsigned h;
|
||||
|
||||
for (h=0; h<NVar; h++)
|
||||
if (!varh[h].glo)
|
||||
varh[h].v[0] = 0;
|
||||
}
|
||||
|
||||
void
|
||||
varadd(char *v, int glo, unsigned ctyp)
|
||||
{
|
||||
unsigned h0, h;
|
||||
|
||||
h0 = hash(v);
|
||||
h = h0;
|
||||
do {
|
||||
if (varh[h].v[0] == 0) {
|
||||
strcpy(varh[h].v, v);
|
||||
varh[h].glo = glo;
|
||||
varh[h].ctyp = ctyp;
|
||||
return;
|
||||
}
|
||||
if (strcmp(varh[h].v, v) == 0)
|
||||
die("double definition");
|
||||
h = (h+1) % NVar;
|
||||
} while(h != h0);
|
||||
die("too many variables");
|
||||
}
|
||||
|
||||
Symb *
|
||||
varget(char *v)
|
||||
{
|
||||
static Symb s;
|
||||
unsigned h0, h;
|
||||
|
||||
h0 = hash(v);
|
||||
h = h0;
|
||||
do {
|
||||
if (strcmp(varh[h].v, v) == 0) {
|
||||
if (!varh[h].glo) {
|
||||
s.t = Var;
|
||||
strcpy(s.u.v, v);
|
||||
} else {
|
||||
s.t = Glo;
|
||||
s.u.n = varh[h].glo;
|
||||
}
|
||||
s.ctyp = varh[h].ctyp;
|
||||
return &s;
|
||||
}
|
||||
h = (h+1) % NVar;
|
||||
} while (h != h0 && varh[h].v[0] != 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char
|
||||
irtyp(unsigned ctyp)
|
||||
{
|
||||
return SIZE(ctyp) == 8 ? 'l' : 'w';
|
||||
}
|
||||
|
||||
void
|
||||
psymb(Symb s)
|
||||
{
|
||||
switch (s.t) {
|
||||
case Tmp:
|
||||
fprintf(of, "%%t%d", s.u.n);
|
||||
break;
|
||||
case Var:
|
||||
fprintf(of, "%%%s", s.u.v);
|
||||
break;
|
||||
case Glo:
|
||||
fprintf(of, "$glo%d", s.u.n);
|
||||
break;
|
||||
case Con:
|
||||
fprintf(of, "%d", s.u.n);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
sext(Symb *s)
|
||||
{
|
||||
fprintf(of, "\t%%t%d =l extsw ", tmp);
|
||||
psymb(*s);
|
||||
fprintf(of, "\n");
|
||||
s->t = Tmp;
|
||||
s->ctyp = LNG;
|
||||
s->u.n = tmp++;
|
||||
}
|
||||
|
||||
unsigned
|
||||
prom(int op, Symb *l, Symb *r)
|
||||
{
|
||||
Symb *t;
|
||||
int sz;
|
||||
|
||||
if (l->ctyp == r->ctyp && KIND(l->ctyp) != PTR)
|
||||
return l->ctyp;
|
||||
|
||||
if (l->ctyp == LNG && r->ctyp == INT) {
|
||||
sext(r);
|
||||
return LNG;
|
||||
}
|
||||
if (l->ctyp == INT && r->ctyp == LNG) {
|
||||
sext(l);
|
||||
return LNG;
|
||||
}
|
||||
|
||||
if (op == '+') {
|
||||
if (KIND(r->ctyp) == PTR) {
|
||||
t = l;
|
||||
l = r;
|
||||
r = t;
|
||||
}
|
||||
if (KIND(r->ctyp) == PTR)
|
||||
die("pointers added");
|
||||
goto Scale;
|
||||
}
|
||||
|
||||
if (op == '-') {
|
||||
if (KIND(l->ctyp) != PTR)
|
||||
die("pointer substracted from integer");
|
||||
if (KIND(r->ctyp) != PTR)
|
||||
goto Scale;
|
||||
if (l->ctyp != r->ctyp)
|
||||
die("non-homogeneous pointers in substraction");
|
||||
return LNG;
|
||||
}
|
||||
|
||||
Scale:
|
||||
sz = SIZE(DREF(l->ctyp));
|
||||
if (r->t == Con)
|
||||
r->u.n *= sz;
|
||||
else {
|
||||
if (irtyp(r->ctyp) != 'l')
|
||||
sext(r);
|
||||
fprintf(of, "\t%%t%d =l mul %d, ", tmp, sz);
|
||||
psymb(*r);
|
||||
fprintf(of, "\n");
|
||||
r->u.n = tmp++;
|
||||
}
|
||||
return l->ctyp;
|
||||
}
|
||||
|
||||
void
|
||||
load(Symb d, Symb s)
|
||||
{
|
||||
char t;
|
||||
|
||||
fprintf(of, "\t");
|
||||
psymb(d);
|
||||
t = irtyp(d.ctyp);
|
||||
fprintf(of, " =%c load%c ", t, t);
|
||||
psymb(s);
|
||||
fprintf(of, "\n");
|
||||
}
|
||||
|
||||
void
|
||||
call(Node *n, Symb *sr)
|
||||
{
|
||||
Node *a;
|
||||
char *f;
|
||||
unsigned ft;
|
||||
|
||||
f = n->l->u.v;
|
||||
if (varget(f)) {
|
||||
ft = varget(f)->ctyp;
|
||||
if (KIND(ft) != FUN)
|
||||
die("invalid call");
|
||||
} else
|
||||
ft = FUNC(INT);
|
||||
sr->ctyp = DREF(ft);
|
||||
for (a=n->r; a; a=a->r)
|
||||
a->u.s = expr(a->l);
|
||||
fprintf(of, "\t");
|
||||
psymb(*sr);
|
||||
fprintf(of, " =%c call $%s(", irtyp(sr->ctyp), f);
|
||||
for (a=n->r; a; a=a->r) {
|
||||
fprintf(of, "%c ", irtyp(a->u.s.ctyp));
|
||||
psymb(a->u.s);
|
||||
fprintf(of, ", ");
|
||||
}
|
||||
fprintf(of, "...)\n");
|
||||
}
|
||||
|
||||
Symb
|
||||
expr(Node *n)
|
||||
{
|
||||
static char *otoa[] = {
|
||||
['+'] = "add",
|
||||
['-'] = "sub",
|
||||
['*'] = "mul",
|
||||
['/'] = "div",
|
||||
['%'] = "rem",
|
||||
['&'] = "and",
|
||||
['<'] = "cslt", /* meeeeh, wrong for pointers! */
|
||||
['l'] = "csle",
|
||||
['e'] = "ceq",
|
||||
['n'] = "cne",
|
||||
};
|
||||
Symb sr, s0, s1, sl;
|
||||
int o, l;
|
||||
char ty[2];
|
||||
|
||||
sr.t = Tmp;
|
||||
sr.u.n = tmp++;
|
||||
|
||||
switch (n->op) {
|
||||
|
||||
case 0:
|
||||
abort();
|
||||
|
||||
case 'o':
|
||||
case 'a':
|
||||
l = lbl;
|
||||
lbl += 3;
|
||||
branch(n, l, l+1);
|
||||
fprintf(of, "@l%d\n", l);
|
||||
fprintf(of, "\tjmp @l%d\n", l+2);
|
||||
fprintf(of, "@l%d\n", l+1);
|
||||
fprintf(of, "\tjmp @l%d\n", l+2);
|
||||
fprintf(of, "@l%d\n", l+2);
|
||||
fprintf(of, "\t");
|
||||
sr.ctyp = INT;
|
||||
psymb(sr);
|
||||
fprintf(of, " =w phi @l%d 1, @l%d 0\n", l, l+1);
|
||||
break;
|
||||
|
||||
case 'V':
|
||||
s0 = lval(n);
|
||||
sr.ctyp = s0.ctyp;
|
||||
load(sr, s0);
|
||||
break;
|
||||
|
||||
case 'N':
|
||||
sr.t = Con;
|
||||
sr.u.n = n->u.n;
|
||||
sr.ctyp = INT;
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
sr.t = Glo;
|
||||
sr.u.n = n->u.n;
|
||||
sr.ctyp = IDIR(INT);
|
||||
break;
|
||||
|
||||
case 'C':
|
||||
call(n, &sr);
|
||||
break;
|
||||
|
||||
case '@':
|
||||
s0 = expr(n->l);
|
||||
if (KIND(s0.ctyp) != PTR)
|
||||
die("dereference of a non-pointer");
|
||||
sr.ctyp = DREF(s0.ctyp);
|
||||
load(sr, s0);
|
||||
break;
|
||||
|
||||
case 'A':
|
||||
sr = lval(n->l);
|
||||
sr.ctyp = IDIR(sr.ctyp);
|
||||
break;
|
||||
|
||||
case '=':
|
||||
s0 = expr(n->r);
|
||||
s1 = lval(n->l);
|
||||
sr = s0;
|
||||
if (s1.ctyp == LNG && s0.ctyp == INT)
|
||||
sext(&s0);
|
||||
if (s0.ctyp != IDIR(NIL) || KIND(s1.ctyp) != PTR)
|
||||
if (s1.ctyp != IDIR(NIL) || KIND(s0.ctyp) != PTR)
|
||||
if (s1.ctyp != s0.ctyp)
|
||||
die("invalid assignment");
|
||||
fprintf(of, "\tstore%c ", irtyp(s1.ctyp));
|
||||
goto Args;
|
||||
|
||||
case 'P':
|
||||
case 'M':
|
||||
o = n->op == 'P' ? '+' : '-';
|
||||
sl = lval(n->l);
|
||||
s0.t = Tmp;
|
||||
s0.u.n = tmp++;
|
||||
s0.ctyp = sl.ctyp;
|
||||
load(s0, sl);
|
||||
s1.t = Con;
|
||||
s1.u.n = 1;
|
||||
s1.ctyp = INT;
|
||||
goto Binop;
|
||||
|
||||
default:
|
||||
s0 = expr(n->l);
|
||||
s1 = expr(n->r);
|
||||
o = n->op;
|
||||
Binop:
|
||||
sr.ctyp = prom(o, &s0, &s1);
|
||||
if (strchr("ne<l", n->op)) {
|
||||
sprintf(ty, "%c", irtyp(sr.ctyp));
|
||||
sr.ctyp = INT;
|
||||
} else
|
||||
strcpy(ty, "");
|
||||
fprintf(of, "\t");
|
||||
psymb(sr);
|
||||
fprintf(of, " =%c", irtyp(sr.ctyp));
|
||||
fprintf(of, " %s%s ", otoa[o], ty);
|
||||
Args:
|
||||
psymb(s0);
|
||||
fprintf(of, ", ");
|
||||
psymb(s1);
|
||||
fprintf(of, "\n");
|
||||
break;
|
||||
|
||||
}
|
||||
if (n->op == '-'
|
||||
&& KIND(s0.ctyp) == PTR
|
||||
&& KIND(s1.ctyp) == PTR) {
|
||||
fprintf(of, "\t%%t%d =l div ", tmp);
|
||||
psymb(sr);
|
||||
fprintf(of, ", %d\n", SIZE(DREF(s0.ctyp)));
|
||||
sr.u.n = tmp++;
|
||||
}
|
||||
if (n->op == 'P' || n->op == 'M') {
|
||||
fprintf(of, "\tstore%c ", irtyp(sl.ctyp));
|
||||
psymb(sr);
|
||||
fprintf(of, ", ");
|
||||
psymb(sl);
|
||||
fprintf(of, "\n");
|
||||
sr = s0;
|
||||
}
|
||||
return sr;
|
||||
}
|
||||
|
||||
Symb
|
||||
lval(Node *n)
|
||||
{
|
||||
Symb sr;
|
||||
|
||||
switch (n->op) {
|
||||
default:
|
||||
die("invalid lvalue");
|
||||
case 'V':
|
||||
if (!varget(n->u.v))
|
||||
die("undefined variable");
|
||||
sr = *varget(n->u.v);
|
||||
break;
|
||||
case '@':
|
||||
sr = expr(n->l);
|
||||
if (KIND(sr.ctyp) != PTR)
|
||||
die("dereference of a non-pointer");
|
||||
sr.ctyp = DREF(sr.ctyp);
|
||||
break;
|
||||
}
|
||||
return sr;
|
||||
}
|
||||
|
||||
void
|
||||
branch(Node *n, int lt, int lf)
|
||||
{
|
||||
Symb s;
|
||||
int l;
|
||||
|
||||
switch (n->op) {
|
||||
default:
|
||||
s = expr(n); /* TODO: insert comparison to 0 with proper type */
|
||||
fprintf(of, "\tjnz ");
|
||||
psymb(s);
|
||||
fprintf(of, ", @l%d, @l%d\n", lt, lf);
|
||||
break;
|
||||
case 'o':
|
||||
l = lbl;
|
||||
lbl += 1;
|
||||
branch(n->l, lt, l);
|
||||
fprintf(of, "@l%d\n", l);
|
||||
branch(n->r, lt, lf);
|
||||
break;
|
||||
case 'a':
|
||||
l = lbl;
|
||||
lbl += 1;
|
||||
branch(n->l, l, lf);
|
||||
fprintf(of, "@l%d\n", l);
|
||||
branch(n->r, lt, lf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
stmt(Stmt *s, int b)
|
||||
{
|
||||
int l, r;
|
||||
Symb x;
|
||||
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
switch (s->t) {
|
||||
case Ret:
|
||||
x = expr(s->p1);
|
||||
fprintf(of, "\tret ");
|
||||
psymb(x);
|
||||
fprintf(of, "\n");
|
||||
return 1;
|
||||
case Break:
|
||||
if (b < 0)
|
||||
die("break not in loop");
|
||||
fprintf(of, "\tjmp @l%d\n", b);
|
||||
return 1;
|
||||
case Expr:
|
||||
expr(s->p1);
|
||||
return 0;
|
||||
case Seq:
|
||||
return stmt(s->p1, b) || stmt(s->p2, b);
|
||||
case If:
|
||||
l = lbl;
|
||||
lbl += 3;
|
||||
branch(s->p1, l, l+1);
|
||||
fprintf(of, "@l%d\n", l);
|
||||
if (!(r=stmt(s->p2, b)))
|
||||
if (s->p3)
|
||||
fprintf(of, "\tjmp @l%d\n", l+2);
|
||||
fprintf(of, "@l%d\n", l+1);
|
||||
if (s->p3)
|
||||
if (!(r &= stmt(s->p3, b)))
|
||||
fprintf(of, "@l%d\n", l+2);
|
||||
return s->p3 && r;
|
||||
case While:
|
||||
l = lbl;
|
||||
lbl += 3;
|
||||
fprintf(of, "@l%d\n", l);
|
||||
branch(s->p1, l+1, l+2);
|
||||
fprintf(of, "@l%d\n", l+1);
|
||||
if (!stmt(s->p2, l+2))
|
||||
fprintf(of, "\tjmp @l%d\n", l);
|
||||
fprintf(of, "@l%d\n", l+2);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
Node *
|
||||
mknode(char op, Node *l, Node *r)
|
||||
{
|
||||
Node *n;
|
||||
|
||||
n = alloc(sizeof *n);
|
||||
n->op = op;
|
||||
n->l = l;
|
||||
n->r = r;
|
||||
return n;
|
||||
}
|
||||
|
||||
Node *
|
||||
mkidx(Node *a, Node *i)
|
||||
{
|
||||
Node *n;
|
||||
|
||||
n = mknode('+', a, i);
|
||||
n = mknode('@', n, 0);
|
||||
return n;
|
||||
}
|
||||
|
||||
Node *
|
||||
mkneg(Node *n)
|
||||
{
|
||||
static Node *z;
|
||||
|
||||
if (!z) {
|
||||
z = mknode('N', 0, 0);
|
||||
z->u.n = 0;
|
||||
}
|
||||
return mknode('-', z, n);
|
||||
}
|
||||
|
||||
Stmt *
|
||||
mkstmt(int t, void *p1, void *p2, void *p3)
|
||||
{
|
||||
Stmt *s;
|
||||
|
||||
s = alloc(sizeof *s);
|
||||
s->t = t;
|
||||
s->p1 = p1;
|
||||
s->p2 = p2;
|
||||
s->p3 = p3;
|
||||
return s;
|
||||
}
|
||||
|
||||
Node *
|
||||
param(char *v, unsigned ctyp, Node *pl)
|
||||
{
|
||||
Node *n;
|
||||
|
||||
if (ctyp == NIL)
|
||||
die("invalid void declaration");
|
||||
n = mknode(0, 0, pl);
|
||||
varadd(v, 0, ctyp);
|
||||
strcpy(n->u.v, v);
|
||||
return n;
|
||||
}
|
||||
|
||||
Stmt *
|
||||
mkfor(Node *ini, Node *tst, Node *inc, Stmt *s)
|
||||
{
|
||||
Stmt *s1, *s2;
|
||||
|
||||
if (ini)
|
||||
s1 = mkstmt(Expr, ini, 0, 0);
|
||||
else
|
||||
s1 = 0;
|
||||
if (inc) {
|
||||
s2 = mkstmt(Expr, inc, 0, 0);
|
||||
s2 = mkstmt(Seq, s, s2, 0);
|
||||
} else
|
||||
s2 = s;
|
||||
if (!tst) {
|
||||
tst = mknode('N', 0, 0);
|
||||
tst->u.n = 1;
|
||||
}
|
||||
s2 = mkstmt(While, tst, s2, 0);
|
||||
if (s1)
|
||||
return mkstmt(Seq, s1, s2, 0);
|
||||
else
|
||||
return s2;
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%union {
|
||||
Node *n;
|
||||
Stmt *s;
|
||||
unsigned u;
|
||||
}
|
||||
|
||||
%token <n> NUM
|
||||
%token <n> STR
|
||||
%token <n> IDENT
|
||||
%token PP MM LE GE SIZEOF
|
||||
|
||||
%token TVOID TINT TLNG
|
||||
%token IF ELSE WHILE FOR BREAK RETURN
|
||||
|
||||
%right '='
|
||||
%left OR
|
||||
%left AND
|
||||
%left '&'
|
||||
%left EQ NE
|
||||
%left '<' '>' LE GE
|
||||
%left '+' '-'
|
||||
%left '*' '/' '%'
|
||||
|
||||
%type <u> type
|
||||
%type <s> stmt stmts
|
||||
%type <n> expr exp0 pref post arg0 arg1 par0 par1
|
||||
|
||||
%%
|
||||
|
||||
prog: func prog | fdcl prog | idcl prog | ;
|
||||
|
||||
fdcl: type IDENT '(' ')' ';'
|
||||
{
|
||||
varadd($2->u.v, 1, FUNC($1));
|
||||
};
|
||||
|
||||
idcl: type IDENT ';'
|
||||
{
|
||||
if ($1 == NIL)
|
||||
die("invalid void declaration");
|
||||
if (nglo == NGlo)
|
||||
die("too many string literals");
|
||||
ini[nglo] = alloc(sizeof "{ x 0 }");
|
||||
sprintf(ini[nglo], "{ %c 0 }", irtyp($1));
|
||||
varadd($2->u.v, nglo++, $1);
|
||||
};
|
||||
|
||||
init:
|
||||
{
|
||||
varclr();
|
||||
tmp = 0;
|
||||
};
|
||||
|
||||
func: init prot '{' dcls stmts '}'
|
||||
{
|
||||
if (!stmt($5, -1))
|
||||
fprintf(of, "\tret 0\n");
|
||||
fprintf(of, "}\n\n");
|
||||
};
|
||||
|
||||
prot: IDENT '(' par0 ')'
|
||||
{
|
||||
Symb *s;
|
||||
Node *n;
|
||||
int t, m;
|
||||
|
||||
varadd($1->u.v, 1, FUNC(INT));
|
||||
fprintf(of, "export function w $%s(", $1->u.v);
|
||||
n = $3;
|
||||
if (n)
|
||||
for (;;) {
|
||||
s = varget(n->u.v);
|
||||
fprintf(of, "%c ", irtyp(s->ctyp));
|
||||
fprintf(of, "%%t%d", tmp++);
|
||||
n = n->r;
|
||||
if (n)
|
||||
fprintf(of, ", ");
|
||||
else
|
||||
break;
|
||||
}
|
||||
fprintf(of, ") {\n");
|
||||
fprintf(of, "@l%d\n", lbl++);
|
||||
for (t=0, n=$3; n; t++, n=n->r) {
|
||||
s = varget(n->u.v);
|
||||
m = SIZE(s->ctyp);
|
||||
fprintf(of, "\t%%%s =l alloc%d %d\n", n->u.v, m, m);
|
||||
fprintf(of, "\tstore%c %%t%d", irtyp(s->ctyp), t);
|
||||
fprintf(of, ", %%%s\n", n->u.v);
|
||||
}
|
||||
};
|
||||
|
||||
par0: par1
|
||||
| { $$ = 0; }
|
||||
;
|
||||
par1: type IDENT ',' par1 { $$ = param($2->u.v, $1, $4); }
|
||||
| type IDENT { $$ = param($2->u.v, $1, 0); }
|
||||
;
|
||||
|
||||
|
||||
dcls: | dcls type IDENT ';'
|
||||
{
|
||||
int s;
|
||||
char *v;
|
||||
|
||||
if ($2 == NIL)
|
||||
die("invalid void declaration");
|
||||
v = $3->u.v;
|
||||
s = SIZE($2);
|
||||
varadd(v, 0, $2);
|
||||
fprintf(of, "\t%%%s =l alloc%d %d\n", v, s, s);
|
||||
};
|
||||
|
||||
type: type '*' { $$ = IDIR($1); }
|
||||
| TINT { $$ = INT; }
|
||||
| TLNG { $$ = LNG; }
|
||||
| TVOID { $$ = NIL; }
|
||||
;
|
||||
|
||||
stmt: ';' { $$ = 0; }
|
||||
| '{' stmts '}' { $$ = $2; }
|
||||
| BREAK ';' { $$ = mkstmt(Break, 0, 0, 0); }
|
||||
| RETURN expr ';' { $$ = mkstmt(Ret, $2, 0, 0); }
|
||||
| expr ';' { $$ = mkstmt(Expr, $1, 0, 0); }
|
||||
| WHILE '(' expr ')' stmt { $$ = mkstmt(While, $3, $5, 0); }
|
||||
| IF '(' expr ')' stmt ELSE stmt { $$ = mkstmt(If, $3, $5, $7); }
|
||||
| IF '(' expr ')' stmt { $$ = mkstmt(If, $3, $5, 0); }
|
||||
| FOR '(' exp0 ';' exp0 ';' exp0 ')' stmt
|
||||
{ $$ = mkfor($3, $5, $7, $9); }
|
||||
;
|
||||
|
||||
stmts: stmts stmt { $$ = mkstmt(Seq, $1, $2, 0); }
|
||||
| { $$ = 0; }
|
||||
;
|
||||
|
||||
expr: pref
|
||||
| expr '=' expr { $$ = mknode('=', $1, $3); }
|
||||
| expr '+' expr { $$ = mknode('+', $1, $3); }
|
||||
| expr '-' expr { $$ = mknode('-', $1, $3); }
|
||||
| expr '*' expr { $$ = mknode('*', $1, $3); }
|
||||
| expr '/' expr { $$ = mknode('/', $1, $3); }
|
||||
| expr '%' expr { $$ = mknode('%', $1, $3); }
|
||||
| expr '<' expr { $$ = mknode('<', $1, $3); }
|
||||
| expr '>' expr { $$ = mknode('<', $3, $1); }
|
||||
| expr LE expr { $$ = mknode('l', $1, $3); }
|
||||
| expr GE expr { $$ = mknode('l', $3, $1); }
|
||||
| expr EQ expr { $$ = mknode('e', $1, $3); }
|
||||
| expr NE expr { $$ = mknode('n', $1, $3); }
|
||||
| expr '&' expr { $$ = mknode('&', $1, $3); }
|
||||
| expr AND expr { $$ = mknode('a', $1, $3); }
|
||||
| expr OR expr { $$ = mknode('o', $1, $3); }
|
||||
;
|
||||
|
||||
exp0: expr
|
||||
| { $$ = 0; }
|
||||
;
|
||||
|
||||
pref: post
|
||||
| '-' pref { $$ = mkneg($2); }
|
||||
| '*' pref { $$ = mknode('@', $2, 0); }
|
||||
| '&' pref { $$ = mknode('A', $2, 0); }
|
||||
;
|
||||
|
||||
post: NUM
|
||||
| STR
|
||||
| IDENT
|
||||
| SIZEOF '(' type ')' { $$ = mknode('N', 0, 0); $$->u.n = SIZE($3); }
|
||||
| '(' expr ')' { $$ = $2; }
|
||||
| IDENT '(' arg0 ')' { $$ = mknode('C', $1, $3); }
|
||||
| post '[' expr ']' { $$ = mkidx($1, $3); }
|
||||
| post PP { $$ = mknode('P', $1, 0); }
|
||||
| post MM { $$ = mknode('M', $1, 0); }
|
||||
;
|
||||
|
||||
arg0: arg1
|
||||
| { $$ = 0; }
|
||||
;
|
||||
arg1: expr { $$ = mknode(0, $1, 0); }
|
||||
| expr ',' arg1 { $$ = mknode(0, $1, $3); }
|
||||
;
|
||||
|
||||
%%
|
||||
|
||||
int
|
||||
yylex()
|
||||
{
|
||||
struct {
|
||||
char *s;
|
||||
int t;
|
||||
} kwds[] = {
|
||||
{ "void", TVOID },
|
||||
{ "int", TINT },
|
||||
{ "long", TLNG },
|
||||
{ "if", IF },
|
||||
{ "else", ELSE },
|
||||
{ "for", FOR },
|
||||
{ "while", WHILE },
|
||||
{ "return", RETURN },
|
||||
{ "break", BREAK },
|
||||
{ "sizeof", SIZEOF },
|
||||
{ 0, 0 }
|
||||
};
|
||||
int i, c, c1, n;
|
||||
char v[NString], *p;
|
||||
|
||||
do {
|
||||
c = getchar();
|
||||
if (c == '#')
|
||||
while ((c = getchar()) != '\n')
|
||||
;
|
||||
if (c == '\n')
|
||||
line++;
|
||||
} while (isspace(c));
|
||||
|
||||
|
||||
if (c == EOF)
|
||||
return 0;
|
||||
|
||||
|
||||
if (isdigit(c)) {
|
||||
n = 0;
|
||||
do {
|
||||
n *= 10;
|
||||
n += c-'0';
|
||||
c = getchar();
|
||||
} while (isdigit(c));
|
||||
ungetc(c, stdin);
|
||||
yylval.n = mknode('N', 0, 0);
|
||||
yylval.n->u.n = n;
|
||||
return NUM;
|
||||
}
|
||||
|
||||
if (isalpha(c)) {
|
||||
p = v;
|
||||
do {
|
||||
if (p == &v[NString-1])
|
||||
die("ident too long");
|
||||
*p++ = c;
|
||||
c = getchar();
|
||||
} while (isalpha(c) || c == '_');
|
||||
*p = 0;
|
||||
ungetc(c, stdin);
|
||||
for (i=0; kwds[i].s; i++)
|
||||
if (strcmp(v, kwds[i].s) == 0)
|
||||
return kwds[i].t;
|
||||
yylval.n = mknode('V', 0, 0);
|
||||
strcpy(yylval.n->u.v, v);
|
||||
return IDENT;
|
||||
}
|
||||
|
||||
if (c == '"') {
|
||||
i = 0;
|
||||
n = 32;
|
||||
p = alloc(n);
|
||||
strcpy(p, "{ b \"");
|
||||
for (i=5;; i++) {
|
||||
c = getchar();
|
||||
if (c == EOF)
|
||||
die("unclosed string literal");
|
||||
if (i+8 >= n) {
|
||||
p = memcpy(alloc(n*2), p, n);
|
||||
n *= 2;
|
||||
}
|
||||
p[i] = c;
|
||||
if (c == '"' && p[i-1]!='\\')
|
||||
break;
|
||||
}
|
||||
strcpy(&p[i], "\", b 0 }");
|
||||
if (nglo == NGlo)
|
||||
die("too many globals");
|
||||
ini[nglo] = p;
|
||||
yylval.n = mknode('S', 0, 0);
|
||||
yylval.n->u.n = nglo++;
|
||||
return STR;
|
||||
}
|
||||
|
||||
c1 = getchar();
|
||||
#define DI(a, b) a + b*256
|
||||
switch (DI(c,c1)) {
|
||||
case DI('!','='): return NE;
|
||||
case DI('=','='): return EQ;
|
||||
case DI('<','='): return LE;
|
||||
case DI('>','='): return GE;
|
||||
case DI('+','+'): return PP;
|
||||
case DI('-','-'): return MM;
|
||||
case DI('&','&'): return AND;
|
||||
case DI('|','|'): return OR;
|
||||
}
|
||||
#undef DI
|
||||
ungetc(c1, stdin);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int
|
||||
yyerror(char *err)
|
||||
{
|
||||
die("parse error");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
int i;
|
||||
|
||||
of = stdout;
|
||||
nglo = 1;
|
||||
if (yyparse() != 0)
|
||||
die("parse error");
|
||||
for (i=1; i<nglo; i++)
|
||||
fprintf(of, "data $glo%d = %s\n", i, ini[i]);
|
||||
return 0;
|
||||
}
|
||||
33
src/qbe/minic/test/collatz.c
Normal file
33
src/qbe/minic/test/collatz.c
Normal file
@@ -0,0 +1,33 @@
|
||||
void *malloc();
|
||||
|
||||
main()
|
||||
{
|
||||
int n;
|
||||
int nv;
|
||||
int c;
|
||||
int cmax;
|
||||
int *mem;
|
||||
|
||||
mem = malloc(sizeof(int) * 4000);
|
||||
|
||||
cmax = 0;
|
||||
for (nv = 1; nv < 1000; nv++) {
|
||||
n = nv;
|
||||
c = 0;
|
||||
while (n != 1) {
|
||||
if (n < nv) {
|
||||
c = c + mem[n];
|
||||
break;
|
||||
}
|
||||
if (n & 1)
|
||||
n = 3*n + 1;
|
||||
else
|
||||
n = n / 2;
|
||||
c++;
|
||||
}
|
||||
mem[nv] = c;
|
||||
if (c > cmax)
|
||||
cmax = c;
|
||||
}
|
||||
printf("should print 178: %d\n", cmax);
|
||||
}
|
||||
27
src/qbe/minic/test/euler9.c
Normal file
27
src/qbe/minic/test/euler9.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <stdio.h>
|
||||
|
||||
main()
|
||||
{
|
||||
int i;
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
int d;
|
||||
|
||||
for (a = 1; a < 1000; a++) {
|
||||
for (b = a + 1; b < 1000; b++) {
|
||||
d = a*a + b*b;
|
||||
for (i = 0; i < 1000; i++) {
|
||||
if (i * i == d) {
|
||||
c = i;
|
||||
if (b < c && a+b+c == 1000) {
|
||||
printf("%d\n", a*b*c);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
60
src/qbe/minic/test/knight.c
Normal file
60
src/qbe/minic/test/knight.c
Normal file
@@ -0,0 +1,60 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void *calloc();
|
||||
|
||||
int N;
|
||||
int **b;
|
||||
|
||||
board()
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
|
||||
for (y=0; y<8; y++) {
|
||||
for (x=0; x<8; x++)
|
||||
printf(" %02d", b[x][y]);
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
chk(int x, int y)
|
||||
{
|
||||
if (x < 0 || x > 7 || y < 0 || y > 7)
|
||||
return 0;
|
||||
return b[x][y] == 0;
|
||||
}
|
||||
|
||||
go(int k, int x, int y)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
|
||||
b[x][y] = k;
|
||||
if (k == 64) {
|
||||
if (x != 2 && y != 0 && abs(x-2) + abs(y) == 3) {
|
||||
board();
|
||||
N++;
|
||||
if (N == 10)
|
||||
exit(0);
|
||||
}
|
||||
} else
|
||||
for (i=-2; i<=2; i++)
|
||||
for (j=-2; j<=2; j++)
|
||||
if (abs(i) + abs(j) == 3 && chk(x+i, y+j))
|
||||
go(k+1, x+i, y+j);
|
||||
b[x][y] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
int i;
|
||||
|
||||
b = calloc(8, sizeof (int *));
|
||||
for (i=0; i<8; i++)
|
||||
b[i] = calloc(8, sizeof (int));
|
||||
go(1, 2, 0);
|
||||
}
|
||||
88
src/qbe/minic/test/mandel.c
Normal file
88
src/qbe/minic/test/mandel.c
Normal file
@@ -0,0 +1,88 @@
|
||||
void *malloc();
|
||||
void *SDL_CreateWindow();
|
||||
void *SDL_CreateRenderer();
|
||||
int SDL_SetRenderDrawColor();
|
||||
int SDL_RenderDrawPoint();
|
||||
int SDL_RenderClear();
|
||||
int SDL_RenderPresent();
|
||||
int SDL_PollEvent();
|
||||
int SDL_DestroyRenderer();
|
||||
int SDL_DestroyWindow();
|
||||
int SDL_Quit();
|
||||
int SDL_Init();
|
||||
|
||||
void *win;
|
||||
void *rnd;
|
||||
int W;
|
||||
int H;
|
||||
int *col;
|
||||
|
||||
plot(int x, int y)
|
||||
{
|
||||
int n;
|
||||
int fx;
|
||||
int fy;
|
||||
int zx;
|
||||
int zy;
|
||||
int nx;
|
||||
int ny;
|
||||
|
||||
fx = (x - W/2)*4000 / W;
|
||||
fy = (y - H/2)*4000 / H;
|
||||
zx = fx;
|
||||
zy = fy;
|
||||
|
||||
for (n=0; n<200; n++) {
|
||||
if (zx*zx + zy*zy > 4000000)
|
||||
break;
|
||||
nx = (zx*zx)/1000 - (zy*zy)/1000 + fx;
|
||||
ny = zx*zy/500 + fy;
|
||||
zx = nx;
|
||||
zy = ny;
|
||||
}
|
||||
n = col[n];
|
||||
SDL_SetRenderDrawColor(rnd, 100, n, n, 255);
|
||||
SDL_RenderDrawPoint(rnd, x, y);
|
||||
return 0;
|
||||
}
|
||||
|
||||
main() {
|
||||
int c;
|
||||
int n;
|
||||
int x;
|
||||
int y;
|
||||
void *e;
|
||||
int *ie;
|
||||
|
||||
W = 800;
|
||||
H = 800;
|
||||
SDL_Init(32);
|
||||
win = SDL_CreateWindow("Mandelbrot MiniC", 0, 0, W, H, 0);
|
||||
rnd = SDL_CreateRenderer(win, -1, 0);
|
||||
e = malloc(56);
|
||||
ie = e;
|
||||
col = malloc(201 * sizeof (int));
|
||||
c = 20;
|
||||
for (n=0; n<200; n++) {
|
||||
col[n] = c;
|
||||
c = c + (255-c)/8;
|
||||
}
|
||||
col[n] = 30;
|
||||
|
||||
SDL_RenderClear(rnd);
|
||||
for (x=0; x<W; x++)
|
||||
for (y=0; y<H; y++)
|
||||
plot(x, y);
|
||||
SDL_RenderPresent(rnd);
|
||||
|
||||
for (;;) {
|
||||
if (SDL_PollEvent(e)) {
|
||||
if (ie[0] == 769)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDL_DestroyRenderer(rnd);
|
||||
SDL_DestroyWindow(win);
|
||||
SDL_Quit();
|
||||
}
|
||||
28
src/qbe/minic/test/prime.c
Normal file
28
src/qbe/minic/test/prime.c
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <stdio.h>
|
||||
|
||||
main() {
|
||||
int n;
|
||||
int t;
|
||||
int c;
|
||||
int p;
|
||||
|
||||
c = 0;
|
||||
n = 2;
|
||||
while (n < 5000) {
|
||||
t = 2;
|
||||
p = 1;
|
||||
while (t*t <= n) {
|
||||
if (n % t == 0)
|
||||
p = 0;
|
||||
t++;
|
||||
}
|
||||
if (p) {
|
||||
if (c && c % 10 == 0)
|
||||
printf("\n");
|
||||
printf("%4d ", n);
|
||||
c++;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
70
src/qbe/minic/test/queen.c
Normal file
70
src/qbe/minic/test/queen.c
Normal file
@@ -0,0 +1,70 @@
|
||||
int printf();
|
||||
void *calloc();
|
||||
int atoi();
|
||||
|
||||
int Q;
|
||||
int N;
|
||||
int **t;
|
||||
|
||||
print() {
|
||||
int x;
|
||||
int y;
|
||||
|
||||
for (y=0; y<Q; y++) {
|
||||
for (x=0; x<Q; x++)
|
||||
if (t[x][y])
|
||||
printf(" Q");
|
||||
else
|
||||
printf(" .");
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
chk(int x, int y) {
|
||||
int i;
|
||||
int r;
|
||||
|
||||
for (r=i=0; i<Q; i++) {
|
||||
r = r + t[x][i];
|
||||
r = r + t[i][y];
|
||||
if (x+i < Q & y+i < Q)
|
||||
r = r + t[x+i][y+i];
|
||||
if (x+i < Q & y-i >= 0)
|
||||
r = r + t[x+i][y-i];
|
||||
if (x-i >= 0 & y+i < Q)
|
||||
r = r + t[x-i][y+i];
|
||||
if (x-i >= 0 & y-i >= 0)
|
||||
r = r + t[x-i][y-i];
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
go(int y) {
|
||||
int x;
|
||||
|
||||
if (y == Q) {
|
||||
print();
|
||||
N++;
|
||||
return 0;
|
||||
}
|
||||
for (x=0; x<Q; x++)
|
||||
if (chk(x, y) == 0) {
|
||||
t[x][y]++;
|
||||
go(y+1);
|
||||
t[x][y]--;
|
||||
}
|
||||
}
|
||||
|
||||
main(int ac, void **av) {
|
||||
int i;
|
||||
|
||||
Q = 8;
|
||||
if (ac >= 2)
|
||||
Q = atoi(av[1]);
|
||||
t = calloc(Q, sizeof(int *));
|
||||
for (i=0; i<Q; i++)
|
||||
t[i] = calloc(Q, sizeof(int));
|
||||
go(0);
|
||||
printf("found %d solutions\n", N);
|
||||
}
|
||||
1378
src/qbe/minic/yacc.c
Normal file
1378
src/qbe/minic/yacc.c
Normal file
File diff suppressed because it is too large
Load Diff
228
src/qbe/ops.h
Normal file
228
src/qbe/ops.h
Normal file
@@ -0,0 +1,228 @@
|
||||
#ifndef X /* amd64 */
|
||||
#define X(NMemArgs, SetsZeroFlag, LeavesFlags)
|
||||
#endif
|
||||
|
||||
#ifndef V /* riscv64 */
|
||||
#define V(Imm)
|
||||
#endif
|
||||
|
||||
#ifndef F
|
||||
#define F(a,b,c,d,e,f,g,h,i,j)
|
||||
#endif
|
||||
|
||||
#define T(a,b,c,d,e,f,g,h) { \
|
||||
{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
|
||||
{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \
|
||||
}
|
||||
|
||||
/*********************/
|
||||
/* PUBLIC OPERATIONS */
|
||||
/*********************/
|
||||
|
||||
/* can fold */
|
||||
/* | has identity */
|
||||
/* | | identity value for arg[1] */
|
||||
/* | | | commutative */
|
||||
/* | | | | associative */
|
||||
/* | | | | | idempotent */
|
||||
/* | | | | | | c{eq,ne}[wl] */
|
||||
/* | | | | | | | c[us][gl][et][wl] */
|
||||
/* | | | | | | | | value if = args */
|
||||
/* | | | | | | | | | pinned */
|
||||
/* Arithmetic and Bits v v v v v v v v v v */
|
||||
O(add, T(w,l,s,d, w,l,s,d), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
|
||||
O(sub, T(w,l,s,d, w,l,s,d), F(1,1,0,0,0,0,0,0,0,0)) X(2,1,0) V(0)
|
||||
O(neg, T(w,l,s,d, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
|
||||
O(div, T(w,l,s,d, w,l,s,d), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(rem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(udiv, T(w,l,e,e, w,l,e,e), F(1,1,1,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(urem, T(w,l,e,e, w,l,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(mul, T(w,l,s,d, w,l,s,d), F(1,1,1,1,0,0,0,0,0,0)) X(2,0,0) V(0)
|
||||
O(and, T(w,l,e,e, w,l,e,e), F(1,0,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
|
||||
O(or, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,1,0,0,0,0)) X(2,1,0) V(1)
|
||||
O(xor, T(w,l,e,e, w,l,e,e), F(1,1,0,1,1,0,0,0,0,0)) X(2,1,0) V(1)
|
||||
O(sar, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
|
||||
O(shr, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
|
||||
O(shl, T(w,l,e,e, w,w,e,e), F(1,1,0,0,0,0,0,0,0,0)) X(1,1,0) V(1)
|
||||
|
||||
/* Comparisons */
|
||||
O(ceqw, T(w,w,e,e, w,w,e,e), F(1,1,1,1,0,0,1,0,1,0)) X(0,1,0) V(0)
|
||||
O(cnew, T(w,w,e,e, w,w,e,e), F(1,1,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
|
||||
O(csgew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(csgtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
|
||||
O(cslew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(csltw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
|
||||
O(cugew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(cugtw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
|
||||
O(culew, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(cultw, T(w,w,e,e, w,w,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
|
||||
|
||||
O(ceql, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,1,0)) X(0,1,0) V(0)
|
||||
O(cnel, T(l,l,e,e, l,l,e,e), F(1,0,0,1,0,0,1,0,0,0)) X(0,1,0) V(0)
|
||||
O(csgel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(csgtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
|
||||
O(cslel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(csltl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
|
||||
O(cugel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(cugtl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(0)
|
||||
O(culel, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,1,0)) X(0,1,0) V(0)
|
||||
O(cultl, T(l,l,e,e, l,l,e,e), F(1,0,0,0,0,0,0,1,0,0)) X(0,1,0) V(1)
|
||||
|
||||
O(ceqs, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cges, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cgts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cles, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(clts, T(s,s,e,e, s,s,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cnes, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cuos, T(s,s,e,e, s,s,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
|
||||
O(ceqd, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cged, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cgtd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cled, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cltd, T(d,d,e,e, d,d,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cned, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
O(cuod, T(d,d,e,e, d,d,e,e), F(1,0,0,1,0,0,0,0,0,0)) X(0,1,0) V(0)
|
||||
|
||||
/* Memory */
|
||||
O(storeb, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(storeh, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(storew, T(w,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(storel, T(l,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(stores, T(s,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(stored, T(d,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
|
||||
O(loadsb, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(loadub, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(loadsh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(loaduh, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(loadsw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(loaduw, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
O(load, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
|
||||
/* Extensions and Truncations */
|
||||
O(extsb, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(extub, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(extsh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(extuh, T(w,w,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(extsw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(extuw, T(e,w,e,e, e,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
|
||||
O(exts, T(e,e,e,s, e,e,e,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(truncd, T(e,e,d,e, e,e,x,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(stosi, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(stoui, T(s,s,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(dtosi, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(dtoui, T(d,d,e,e, x,x,e,e), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(swtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(uwtof, T(e,e,w,w, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(sltof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(ultof, T(e,e,l,l, e,e,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(cast, T(s,d,w,l, x,x,x,x), F(1,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
|
||||
/* Stack Allocation */
|
||||
O(alloc4, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(alloc8, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(alloc16, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
|
||||
/* Variadic Function Helpers */
|
||||
O(vaarg, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(vastart, T(m,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
|
||||
O(copy, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
|
||||
/* Debug */
|
||||
O(dbgloc, T(w,e,e,e, w,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,1) V(0)
|
||||
|
||||
/****************************************/
|
||||
/* INTERNAL OPERATIONS (keep nop first) */
|
||||
/****************************************/
|
||||
|
||||
/* Miscellaneous and Architecture-Specific Operations */
|
||||
O(nop, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(addr, T(m,m,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(blit0, T(m,e,e,e, m,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
|
||||
O(blit1, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,1,0) V(0)
|
||||
O(sel0, T(w,e,e,e, x,e,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(sel1, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(swap, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
|
||||
O(sign, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(salloc, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xidiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
|
||||
O(xdiv, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,0,0) V(0)
|
||||
O(xcmp, T(w,l,s,d, w,l,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
|
||||
O(xtest, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(1,1,0) V(0)
|
||||
O(acmp, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(acmn, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(afcmp, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(reqz, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(rnez, T(w,l,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
|
||||
/* Arguments, Parameters, and Calls */
|
||||
O(par, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(parsb, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(parub, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(parsh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(paruh, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(parc, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(pare, T(e,x,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(arg, T(w,l,s,d, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(argsb, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(argub, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(argsh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(arguh, T(w,e,e,e, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(argc, T(e,x,e,e, e,l,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(arge, T(e,l,e,e, e,x,e,e), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(argv, T(x,x,x,x, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
O(call, T(m,m,m,m, x,x,x,x), F(0,0,0,0,0,0,0,0,0,1)) X(0,0,0) V(0)
|
||||
|
||||
/* Flags Setting */
|
||||
O(flagieq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagine, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagisge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagisgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagisle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagislt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagiuge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagiugt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagiule, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagiult, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfeq, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfge, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfgt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfle, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagflt, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfne, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
O(flagfuo, T(x,x,e,e, x,x,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,1) V(0)
|
||||
|
||||
/* Backend Flag Select (Condition Move) */
|
||||
O(xselieq, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xseline, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselisge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselisgt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselisle, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselislt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xseliuge, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xseliugt, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xseliule, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xseliult, T(w,l,e,e, w,l,e,e), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfeq, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfge, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfgt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfle, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselflt, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfne, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
O(xselfuo, T(e,e,s,d, e,e,s,d), F(0,0,0,0,0,0,0,0,0,0)) X(0,0,0) V(0)
|
||||
|
||||
#undef T
|
||||
#undef X
|
||||
#undef V
|
||||
#undef O
|
||||
|
||||
/*
|
||||
| column -t -o ' '
|
||||
*/
|
||||
43
src/qbe/out.s
Normal file
43
src/qbe/out.s
Normal file
@@ -0,0 +1,43 @@
|
||||
.text
|
||||
.balign 4
|
||||
_add:
|
||||
hint #34
|
||||
stp x29, x30, [sp, -16]!
|
||||
mov x29, sp
|
||||
add w0, w0, w1
|
||||
ldp x29, x30, [sp], 16
|
||||
ret
|
||||
/* end function add */
|
||||
|
||||
.text
|
||||
.balign 4
|
||||
.globl _main
|
||||
_main:
|
||||
hint #34
|
||||
stp x29, x30, [sp, -16]!
|
||||
mov x29, sp
|
||||
mov w1, #1
|
||||
mov w0, #1
|
||||
bl _add
|
||||
mov x1, #16
|
||||
sub sp, sp, x1
|
||||
mov x1, #0
|
||||
add x1, sp, x1
|
||||
str w0, [x1]
|
||||
adrp x0, _fmt@page
|
||||
add x0, x0, _fmt@pageoff
|
||||
bl _printf
|
||||
mov x0, #16
|
||||
add sp, sp, x0
|
||||
mov w0, #0
|
||||
ldp x29, x30, [sp], 16
|
||||
ret
|
||||
/* end function main */
|
||||
|
||||
.data
|
||||
.balign 8
|
||||
_fmt:
|
||||
.ascii "One and one make %d!\n"
|
||||
.byte 0
|
||||
/* end data */
|
||||
|
||||
1433
src/qbe/parse.c
Normal file
1433
src/qbe/parse.c
Normal file
File diff suppressed because it is too large
Load Diff
696
src/qbe/rega.c
Normal file
696
src/qbe/rega.c
Normal file
@@ -0,0 +1,696 @@
|
||||
#include "all.h"
|
||||
|
||||
#ifdef TEST_PMOV
|
||||
#undef assert
|
||||
#define assert(x) assert_test(#x, x)
|
||||
#endif
|
||||
|
||||
typedef struct RMap RMap;
|
||||
|
||||
struct RMap {
|
||||
int t[Tmp0];
|
||||
int r[Tmp0];
|
||||
int w[Tmp0]; /* wait list, for unmatched hints */
|
||||
BSet b[1];
|
||||
int n;
|
||||
};
|
||||
|
||||
static bits regu; /* registers used */
|
||||
static Tmp *tmp; /* function temporaries */
|
||||
static Mem *mem; /* function mem references */
|
||||
static struct {
|
||||
Ref src, dst;
|
||||
int cls;
|
||||
} pm[Tmp0]; /* parallel move constructed */
|
||||
static int npm; /* size of pm */
|
||||
static int loop; /* current loop level */
|
||||
|
||||
static uint stmov; /* stats: added moves */
|
||||
static uint stblk; /* stats: added blocks */
|
||||
|
||||
static int *
|
||||
hint(int t)
|
||||
{
|
||||
return &tmp[phicls(t, tmp)].hint.r;
|
||||
}
|
||||
|
||||
static void
|
||||
sethint(int t, int r)
|
||||
{
|
||||
Tmp *p;
|
||||
|
||||
p = &tmp[phicls(t, tmp)];
|
||||
if (p->hint.r == -1 || p->hint.w > loop) {
|
||||
p->hint.r = r;
|
||||
p->hint.w = loop;
|
||||
tmp[t].visit = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
rcopy(RMap *ma, RMap *mb)
|
||||
{
|
||||
memcpy(ma->t, mb->t, sizeof ma->t);
|
||||
memcpy(ma->r, mb->r, sizeof ma->r);
|
||||
memcpy(ma->w, mb->w, sizeof ma->w);
|
||||
bscopy(ma->b, mb->b);
|
||||
ma->n = mb->n;
|
||||
}
|
||||
|
||||
static int
|
||||
rfind(RMap *m, int t)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<m->n; i++)
|
||||
if (m->t[i] == t)
|
||||
return m->r[i];
|
||||
return -1;
|
||||
}
|
||||
|
||||
static Ref
|
||||
rref(RMap *m, int t)
|
||||
{
|
||||
int r, s;
|
||||
|
||||
r = rfind(m, t);
|
||||
if (r == -1) {
|
||||
s = tmp[t].slot;
|
||||
assert(s != -1 && "should have spilled");
|
||||
return SLOT(s);
|
||||
} else
|
||||
return TMP(r);
|
||||
}
|
||||
|
||||
static void
|
||||
radd(RMap *m, int t, int r)
|
||||
{
|
||||
assert((t >= Tmp0 || t == r) && "invalid temporary");
|
||||
assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr)
|
||||
|| (T.fpr0 <= r && r < T.fpr0 + T.nfpr))
|
||||
&& "invalid register");
|
||||
assert(!bshas(m->b, t) && "temporary has mapping");
|
||||
assert(!bshas(m->b, r) && "register already allocated");
|
||||
assert(m->n <= T.ngpr+T.nfpr && "too many mappings");
|
||||
bsset(m->b, t);
|
||||
bsset(m->b, r);
|
||||
m->t[m->n] = t;
|
||||
m->r[m->n] = r;
|
||||
m->n++;
|
||||
regu |= BIT(r);
|
||||
}
|
||||
|
||||
static Ref
|
||||
ralloctry(RMap *m, int t, int try)
|
||||
{
|
||||
bits regs;
|
||||
int h, r, r0, r1;
|
||||
|
||||
if (t < Tmp0) {
|
||||
assert(bshas(m->b, t));
|
||||
return TMP(t);
|
||||
}
|
||||
if (bshas(m->b, t)) {
|
||||
r = rfind(m, t);
|
||||
assert(r != -1);
|
||||
return TMP(r);
|
||||
}
|
||||
r = tmp[t].visit;
|
||||
if (r == -1 || bshas(m->b, r))
|
||||
r = *hint(t);
|
||||
if (r == -1 || bshas(m->b, r)) {
|
||||
if (try)
|
||||
return R;
|
||||
regs = tmp[phicls(t, tmp)].hint.m;
|
||||
regs |= m->b->t[0];
|
||||
if (KBASE(tmp[t].cls) == 0) {
|
||||
r0 = T.gpr0;
|
||||
r1 = r0 + T.ngpr;
|
||||
} else {
|
||||
r0 = T.fpr0;
|
||||
r1 = r0 + T.nfpr;
|
||||
}
|
||||
for (r=r0; r<r1; r++)
|
||||
if (!(regs & BIT(r)))
|
||||
goto Found;
|
||||
for (r=r0; r<r1; r++)
|
||||
if (!bshas(m->b, r))
|
||||
goto Found;
|
||||
die("no more regs");
|
||||
}
|
||||
Found:
|
||||
radd(m, t, r);
|
||||
sethint(t, r);
|
||||
tmp[t].visit = r;
|
||||
h = *hint(t);
|
||||
if (h != -1 && h != r)
|
||||
m->w[h] = t;
|
||||
return TMP(r);
|
||||
}
|
||||
|
||||
static inline Ref
|
||||
ralloc(RMap *m, int t)
|
||||
{
|
||||
return ralloctry(m, t, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
rfree(RMap *m, int t)
|
||||
{
|
||||
int i, r;
|
||||
|
||||
assert(t >= Tmp0 || !(BIT(t) & T.rglob));
|
||||
if (!bshas(m->b, t))
|
||||
return -1;
|
||||
for (i=0; m->t[i] != t; i++)
|
||||
assert(i+1 < m->n);
|
||||
r = m->r[i];
|
||||
bsclr(m->b, t);
|
||||
bsclr(m->b, r);
|
||||
m->n--;
|
||||
memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
|
||||
memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
|
||||
assert(t >= Tmp0 || t == r);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
mdump(RMap *m)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<m->n; i++)
|
||||
if (m->t[i] >= Tmp0)
|
||||
fprintf(stderr, " (%s, R%d)",
|
||||
tmp[m->t[i]].name,
|
||||
m->r[i]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pmadd(Ref src, Ref dst, int k)
|
||||
{
|
||||
if (npm == Tmp0)
|
||||
die("cannot have more moves than registers");
|
||||
pm[npm].src = src;
|
||||
pm[npm].dst = dst;
|
||||
pm[npm].cls = k;
|
||||
npm++;
|
||||
}
|
||||
|
||||
enum PMStat { ToMove, Moving, Moved };
|
||||
|
||||
static int
|
||||
pmrec(enum PMStat *status, int i, int *k)
|
||||
{
|
||||
int j, c;
|
||||
|
||||
/* note, this routine might emit
|
||||
* too many large instructions
|
||||
*/
|
||||
if (req(pm[i].src, pm[i].dst)) {
|
||||
status[i] = Moved;
|
||||
return -1;
|
||||
}
|
||||
assert(KBASE(pm[i].cls) == KBASE(*k));
|
||||
assert((Kw|Kl) == Kl && (Ks|Kd) == Kd);
|
||||
*k |= pm[i].cls;
|
||||
for (j=0; j<npm; j++)
|
||||
if (req(pm[j].dst, pm[i].src))
|
||||
break;
|
||||
switch (j == npm ? Moved : status[j]) {
|
||||
case Moving:
|
||||
c = j; /* start of cycle */
|
||||
emit(Oswap, *k, R, pm[i].src, pm[i].dst);
|
||||
break;
|
||||
case ToMove:
|
||||
status[i] = Moving;
|
||||
c = pmrec(status, j, k);
|
||||
if (c == i) {
|
||||
c = -1; /* end of cycle */
|
||||
break;
|
||||
}
|
||||
if (c != -1) {
|
||||
emit(Oswap, *k, R, pm[i].src, pm[i].dst);
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
case Moved:
|
||||
c = -1;
|
||||
emit(Ocopy, pm[i].cls, pm[i].dst, pm[i].src, R);
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
status[i] = Moved;
|
||||
return c;
|
||||
}
|
||||
|
||||
static void
|
||||
pmgen()
|
||||
{
|
||||
int i;
|
||||
enum PMStat *status;
|
||||
|
||||
status = alloc(npm * sizeof status[0]);
|
||||
assert(!npm || status[npm-1] == ToMove);
|
||||
for (i=0; i<npm; i++)
|
||||
if (status[i] == ToMove)
|
||||
pmrec(status, i, (int[]){pm[i].cls});
|
||||
}
|
||||
|
||||
static void
|
||||
move(int r, Ref to, RMap *m)
|
||||
{
|
||||
int n, t, r1;
|
||||
|
||||
r1 = req(to, R) ? -1 : rfree(m, to.val);
|
||||
if (bshas(m->b, r)) {
|
||||
/* r is used and not by to */
|
||||
assert(r1 != r);
|
||||
for (n=0; m->r[n] != r; n++)
|
||||
assert(n+1 < m->n);
|
||||
t = m->t[n];
|
||||
rfree(m, t);
|
||||
bsset(m->b, r);
|
||||
ralloc(m, t);
|
||||
bsclr(m->b, r);
|
||||
}
|
||||
t = req(to, R) ? r : to.val;
|
||||
radd(m, t, r);
|
||||
}
|
||||
|
||||
static int
|
||||
regcpy(Ins *i)
|
||||
{
|
||||
return i->op == Ocopy && isreg(i->arg[0]);
|
||||
}
|
||||
|
||||
static Ins *
|
||||
dopm(Blk *b, Ins *i, RMap *m)
|
||||
{
|
||||
RMap m0;
|
||||
int n, r, r1, t, s;
|
||||
Ins *i1, *ip;
|
||||
bits def;
|
||||
|
||||
m0 = *m; /* okay since we don't use m0.b */
|
||||
m0.b->t = 0;
|
||||
i1 = ++i;
|
||||
do {
|
||||
i--;
|
||||
move(i->arg[0].val, i->to, m);
|
||||
} while (i != b->ins && regcpy(i-1));
|
||||
assert(m0.n <= m->n);
|
||||
if (i != b->ins && (i-1)->op == Ocall) {
|
||||
def = T.retregs((i-1)->arg[1], 0) | T.rglob;
|
||||
for (r=0; T.rsave[r]>=0; r++)
|
||||
if (!(BIT(T.rsave[r]) & def))
|
||||
move(T.rsave[r], R, m);
|
||||
}
|
||||
for (npm=0, n=0; n<m->n; n++) {
|
||||
t = m->t[n];
|
||||
s = tmp[t].slot;
|
||||
r1 = m->r[n];
|
||||
r = rfind(&m0, t);
|
||||
if (r != -1)
|
||||
pmadd(TMP(r1), TMP(r), tmp[t].cls);
|
||||
else if (s != -1)
|
||||
pmadd(TMP(r1), SLOT(s), tmp[t].cls);
|
||||
}
|
||||
for (ip=i; ip<i1; ip++) {
|
||||
if (!req(ip->to, R))
|
||||
rfree(m, ip->to.val);
|
||||
r = ip->arg[0].val;
|
||||
if (rfind(m, r) == -1)
|
||||
radd(m, r, r);
|
||||
}
|
||||
pmgen();
|
||||
return i;
|
||||
}
|
||||
|
||||
static int
|
||||
prio1(Ref r1, Ref r2)
|
||||
{
|
||||
/* trivial heuristic to begin with,
|
||||
* later we can use the distance to
|
||||
* the definition instruction
|
||||
*/
|
||||
(void) r2;
|
||||
return *hint(r1.val) != -1;
|
||||
}
|
||||
|
||||
static void
|
||||
insert(Ref *r, Ref **rs, int p)
|
||||
{
|
||||
int i;
|
||||
|
||||
rs[i = p] = r;
|
||||
while (i-- > 0 && prio1(*r, *rs[i])) {
|
||||
rs[i+1] = rs[i];
|
||||
rs[i] = r;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
doblk(Blk *b, RMap *cur)
|
||||
{
|
||||
int t, x, r, rf, rt, nr;
|
||||
bits rs;
|
||||
Ins *i, *i1;
|
||||
Mem *m;
|
||||
Ref *ra[4];
|
||||
|
||||
if (rtype(b->jmp.arg) == RTmp)
|
||||
b->jmp.arg = ralloc(cur, b->jmp.arg.val);
|
||||
curi = &insb[NIns];
|
||||
for (i1=&b->ins[b->nins]; i1!=b->ins;) {
|
||||
emiti(*--i1);
|
||||
i = curi;
|
||||
rf = -1;
|
||||
switch (i->op) {
|
||||
case Ocall:
|
||||
rs = T.argregs(i->arg[1], 0) | T.rglob;
|
||||
for (r=0; T.rsave[r]>=0; r++)
|
||||
if (!(BIT(T.rsave[r]) & rs))
|
||||
rfree(cur, T.rsave[r]);
|
||||
break;
|
||||
case Ocopy:
|
||||
if (regcpy(i)) {
|
||||
curi++;
|
||||
i1 = dopm(b, i1, cur);
|
||||
stmov += i+1 - curi;
|
||||
continue;
|
||||
}
|
||||
if (isreg(i->to))
|
||||
if (rtype(i->arg[0]) == RTmp)
|
||||
sethint(i->arg[0].val, i->to.val);
|
||||
/* fall through */
|
||||
default:
|
||||
if (!req(i->to, R)) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
r = i->to.val;
|
||||
if (r < Tmp0 && (BIT(r) & T.rglob))
|
||||
break;
|
||||
rf = rfree(cur, r);
|
||||
if (rf == -1) {
|
||||
assert(!isreg(i->to));
|
||||
curi++;
|
||||
continue;
|
||||
}
|
||||
i->to = TMP(rf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
for (x=0, nr=0; x<2; x++)
|
||||
switch (rtype(i->arg[x])) {
|
||||
case RMem:
|
||||
m = &mem[i->arg[x].val];
|
||||
if (rtype(m->base) == RTmp)
|
||||
insert(&m->base, ra, nr++);
|
||||
if (rtype(m->index) == RTmp)
|
||||
insert(&m->index, ra, nr++);
|
||||
break;
|
||||
case RTmp:
|
||||
insert(&i->arg[x], ra, nr++);
|
||||
break;
|
||||
}
|
||||
for (r=0; r<nr; r++)
|
||||
*ra[r] = ralloc(cur, ra[r]->val);
|
||||
if (i->op == Ocopy && req(i->to, i->arg[0]))
|
||||
curi++;
|
||||
|
||||
/* try to change the register of a hinted
|
||||
* temporary if rf is available */
|
||||
if (rf != -1 && (t = cur->w[rf]) != 0)
|
||||
if (!bshas(cur->b, rf) && *hint(t) == rf
|
||||
&& (rt = rfree(cur, t)) != -1) {
|
||||
tmp[t].visit = -1;
|
||||
ralloc(cur, t);
|
||||
assert(bshas(cur->b, rf));
|
||||
emit(Ocopy, tmp[t].cls, TMP(rt), TMP(rf), R);
|
||||
stmov += 1;
|
||||
cur->w[rf] = 0;
|
||||
for (r=0; r<nr; r++)
|
||||
if (req(*ra[r], TMP(rt)))
|
||||
*ra[r] = TMP(rf);
|
||||
/* one could iterate this logic with
|
||||
* the newly freed rt, but in this case
|
||||
* the above loop must be changed */
|
||||
}
|
||||
}
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
|
||||
/* qsort() comparison function to peel
|
||||
* loop nests from inside out */
|
||||
static int
|
||||
carve(const void *a, const void *b)
|
||||
{
|
||||
Blk *ba, *bb;
|
||||
|
||||
/* todo, evaluate if this order is really
|
||||
* better than the simple postorder */
|
||||
ba = *(Blk**)a;
|
||||
bb = *(Blk**)b;
|
||||
if (ba->loop == bb->loop)
|
||||
return ba->id > bb->id ? -1 : ba->id < bb->id;
|
||||
return ba->loop > bb->loop ? -1 : +1;
|
||||
}
|
||||
|
||||
/* comparison function to order temporaries
|
||||
* for allocation at the end of blocks */
|
||||
static int
|
||||
prio2(int t1, int t2)
|
||||
{
|
||||
if ((tmp[t1].visit ^ tmp[t2].visit) < 0) /* != signs */
|
||||
return tmp[t1].visit != -1 ? +1 : -1;
|
||||
if ((*hint(t1) ^ *hint(t2)) < 0)
|
||||
return *hint(t1) != -1 ? +1 : -1;
|
||||
return tmp[t1].cost - tmp[t2].cost;
|
||||
}
|
||||
|
||||
/* register allocation
|
||||
* depends on rpo, phi, cost, (and obviously spill)
|
||||
*/
|
||||
void
|
||||
rega(Fn *fn)
|
||||
{
|
||||
int j, t, r, x, rl[Tmp0];
|
||||
Blk *b, *b1, *s, ***ps, *blist, **blk, **bp;
|
||||
RMap *end, *beg, cur, old, *m;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
uint u, n;
|
||||
Ref src, dst;
|
||||
|
||||
/* 1. setup */
|
||||
stmov = 0;
|
||||
stblk = 0;
|
||||
regu = 0;
|
||||
tmp = fn->tmp;
|
||||
mem = fn->mem;
|
||||
blk = alloc(fn->nblk * sizeof blk[0]);
|
||||
end = alloc(fn->nblk * sizeof end[0]);
|
||||
beg = alloc(fn->nblk * sizeof beg[0]);
|
||||
for (n=0; n<fn->nblk; n++) {
|
||||
bsinit(end[n].b, fn->ntmp);
|
||||
bsinit(beg[n].b, fn->ntmp);
|
||||
}
|
||||
bsinit(cur.b, fn->ntmp);
|
||||
bsinit(old.b, fn->ntmp);
|
||||
|
||||
loop = INT_MAX;
|
||||
for (t=0; t<fn->ntmp; t++) {
|
||||
tmp[t].hint.r = t < Tmp0 ? t : -1;
|
||||
tmp[t].hint.w = loop;
|
||||
tmp[t].visit = -1;
|
||||
}
|
||||
for (bp=blk, b=fn->start; b; b=b->link)
|
||||
*bp++ = b;
|
||||
qsort(blk, fn->nblk, sizeof blk[0], carve);
|
||||
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op != Ocopy || !isreg(i->arg[0]))
|
||||
break;
|
||||
else {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
sethint(i->to.val, i->arg[0].val);
|
||||
}
|
||||
|
||||
/* 2. assign registers */
|
||||
for (bp=blk; bp<&blk[fn->nblk]; bp++) {
|
||||
b = *bp;
|
||||
n = b->id;
|
||||
loop = b->loop;
|
||||
cur.n = 0;
|
||||
bszero(cur.b);
|
||||
memset(cur.w, 0, sizeof cur.w);
|
||||
for (x=0, t=Tmp0; bsiter(b->out, &t); t++) {
|
||||
j = x++;
|
||||
rl[j] = t;
|
||||
while (j-- > 0 && prio2(t, rl[j]) > 0) {
|
||||
rl[j+1] = rl[j];
|
||||
rl[j] = t;
|
||||
}
|
||||
}
|
||||
for (r=0; bsiter(b->out, &r) && r<Tmp0; r++)
|
||||
radd(&cur, r, r);
|
||||
for (j=0; j<x; j++)
|
||||
ralloctry(&cur, rl[j], 1);
|
||||
for (j=0; j<x; j++)
|
||||
ralloc(&cur, rl[j]);
|
||||
rcopy(&end[n], &cur);
|
||||
doblk(b, &cur);
|
||||
bscopy(b->in, cur.b);
|
||||
for (p=b->phi; p; p=p->link)
|
||||
if (rtype(p->to) == RTmp)
|
||||
bsclr(b->in, p->to.val);
|
||||
rcopy(&beg[n], &cur);
|
||||
}
|
||||
|
||||
/* 3. emit copies shared by multiple edges
|
||||
* to the same block */
|
||||
for (s=fn->start; s; s=s->link) {
|
||||
if (s->npred <= 1)
|
||||
continue;
|
||||
m = &beg[s->id];
|
||||
|
||||
/* rl maps a register that is live at the
|
||||
* beginning of s to the one used in all
|
||||
* predecessors (if any, -1 otherwise) */
|
||||
memset(rl, 0, sizeof rl);
|
||||
|
||||
/* to find the register of a phi in a
|
||||
* predecessor, we have to find the
|
||||
* corresponding argument */
|
||||
for (p=s->phi; p; p=p->link) {
|
||||
if (rtype(p->to) != RTmp
|
||||
|| (r=rfind(m, p->to.val)) == -1)
|
||||
continue;
|
||||
for (u=0; u<p->narg; u++) {
|
||||
b = p->blk[u];
|
||||
src = p->arg[u];
|
||||
if (rtype(src) != RTmp)
|
||||
continue;
|
||||
x = rfind(&end[b->id], src.val);
|
||||
if (x == -1) /* spilled */
|
||||
continue;
|
||||
rl[r] = (!rl[r] || rl[r] == x) ? x : -1;
|
||||
}
|
||||
if (rl[r] == 0)
|
||||
rl[r] = -1;
|
||||
}
|
||||
|
||||
/* process non-phis temporaries */
|
||||
for (j=0; j<m->n; j++) {
|
||||
t = m->t[j];
|
||||
r = m->r[j];
|
||||
if (rl[r] || t < Tmp0 /* todo, remove this */)
|
||||
continue;
|
||||
for (bp=s->pred; bp<&s->pred[s->npred]; bp++) {
|
||||
x = rfind(&end[(*bp)->id], t);
|
||||
if (x == -1) /* spilled */
|
||||
continue;
|
||||
rl[r] = (!rl[r] || rl[r] == x) ? x : -1;
|
||||
}
|
||||
if (rl[r] == 0)
|
||||
rl[r] = -1;
|
||||
}
|
||||
|
||||
npm = 0;
|
||||
for (j=0; j<m->n; j++) {
|
||||
t = m->t[j];
|
||||
r = m->r[j];
|
||||
x = rl[r];
|
||||
assert(x != 0 || t < Tmp0 /* todo, ditto */);
|
||||
if (x > 0 && !bshas(m->b, x)) {
|
||||
pmadd(TMP(x), TMP(r), tmp[t].cls);
|
||||
m->r[j] = x;
|
||||
bsset(m->b, x);
|
||||
}
|
||||
}
|
||||
curi = &insb[NIns];
|
||||
pmgen();
|
||||
j = &insb[NIns] - curi;
|
||||
if (j == 0)
|
||||
continue;
|
||||
stmov += j;
|
||||
s->nins += j;
|
||||
i = alloc(s->nins * sizeof(Ins));
|
||||
icpy(icpy(i, curi, j), s->ins, s->nins-j);
|
||||
s->ins = i;
|
||||
}
|
||||
|
||||
if (debug['R']) {
|
||||
fprintf(stderr, "\n> Register mappings:\n");
|
||||
for (n=0; n<fn->nblk; n++) {
|
||||
b = fn->rpo[n];
|
||||
fprintf(stderr, "\t%-10s beg", b->name);
|
||||
mdump(&beg[n]);
|
||||
fprintf(stderr, "\t end");
|
||||
mdump(&end[n]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
/* 4. emit remaining copies in new blocks */
|
||||
blist = 0;
|
||||
for (b=fn->start;; b=b->link) {
|
||||
ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
|
||||
for (; (s=**ps); ps++) {
|
||||
npm = 0;
|
||||
for (p=s->phi; p; p=p->link) {
|
||||
dst = p->to;
|
||||
assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
|
||||
if (rtype(dst) == RTmp) {
|
||||
r = rfind(&beg[s->id], dst.val);
|
||||
if (r == -1)
|
||||
continue;
|
||||
dst = TMP(r);
|
||||
}
|
||||
for (u=0; p->blk[u]!=b; u++)
|
||||
assert(u+1 < p->narg);
|
||||
src = p->arg[u];
|
||||
if (rtype(src) == RTmp)
|
||||
src = rref(&end[b->id], src.val);
|
||||
pmadd(src, dst, p->cls);
|
||||
}
|
||||
for (t=Tmp0; bsiter(s->in, &t); t++) {
|
||||
src = rref(&end[b->id], t);
|
||||
dst = rref(&beg[s->id], t);
|
||||
pmadd(src, dst, tmp[t].cls);
|
||||
}
|
||||
curi = &insb[NIns];
|
||||
pmgen();
|
||||
if (curi == &insb[NIns])
|
||||
continue;
|
||||
b1 = newblk();
|
||||
b1->loop = (b->loop+s->loop) / 2;
|
||||
b1->link = blist;
|
||||
blist = b1;
|
||||
fn->nblk++;
|
||||
strf(b1->name, "%s_%s", b->name, s->name);
|
||||
stmov += &insb[NIns]-curi;
|
||||
stblk += 1;
|
||||
idup(b1, curi, &insb[NIns]-curi);
|
||||
b1->jmp.type = Jjmp;
|
||||
b1->s1 = s;
|
||||
**ps = b1;
|
||||
}
|
||||
if (!b->link) {
|
||||
b->link = blist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->phi = 0;
|
||||
fn->reg = regu;
|
||||
|
||||
if (debug['R']) {
|
||||
fprintf(stderr, "\n> Register allocation statistics:\n");
|
||||
fprintf(stderr, "\tnew moves: %d\n", stmov);
|
||||
fprintf(stderr, "\tnew blocks: %d\n", stblk);
|
||||
fprintf(stderr, "\n> After register allocation:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
653
src/qbe/rv64/abi.c
Normal file
653
src/qbe/rv64/abi.c
Normal file
@@ -0,0 +1,653 @@
|
||||
#include "all.h"
|
||||
|
||||
/* the risc-v lp64d abi */
|
||||
|
||||
typedef struct Class Class;
|
||||
typedef struct Insl Insl;
|
||||
typedef struct Params Params;
|
||||
|
||||
enum {
|
||||
Cptr = 1, /* replaced by a pointer */
|
||||
Cstk1 = 2, /* pass first XLEN on the stack */
|
||||
Cstk2 = 4, /* pass second XLEN on the stack */
|
||||
Cstk = Cstk1 | Cstk2,
|
||||
Cfpint = 8, /* float passed like integer */
|
||||
};
|
||||
|
||||
struct Class {
|
||||
char class;
|
||||
Typ *type;
|
||||
int reg[2];
|
||||
int cls[2];
|
||||
int off[2];
|
||||
char ngp; /* only valid after typclass() */
|
||||
char nfp; /* ditto */
|
||||
char nreg;
|
||||
};
|
||||
|
||||
struct Insl {
|
||||
Ins i;
|
||||
Insl *link;
|
||||
};
|
||||
|
||||
struct Params {
|
||||
int ngp;
|
||||
int nfp;
|
||||
int stk; /* stack offset for varargs */
|
||||
};
|
||||
|
||||
static int gpreg[10] = {A0, A1, A2, A3, A4, A5, A6, A7};
|
||||
static int fpreg[10] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
|
||||
|
||||
/* layout of call's second argument (RCall)
|
||||
*
|
||||
* 29 12 8 4 2 0
|
||||
* |0.00|x|xxxx|xxxx|xx|xx| range
|
||||
* | | | | ` gp regs returned (0..2)
|
||||
* | | | ` fp regs returned (0..2)
|
||||
* | | ` gp regs passed (0..8)
|
||||
* | ` fp regs passed (0..8)
|
||||
* ` env pointer passed in t5 (0..1)
|
||||
*/
|
||||
|
||||
bits
|
||||
rv64_retregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int ngp, nfp;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
ngp = r.val & 3;
|
||||
nfp = (r.val >> 2) & 3;
|
||||
if (p) {
|
||||
p[0] = ngp;
|
||||
p[1] = nfp;
|
||||
}
|
||||
b = 0;
|
||||
while (ngp--)
|
||||
b |= BIT(A0+ngp);
|
||||
while (nfp--)
|
||||
b |= BIT(FA0+nfp);
|
||||
return b;
|
||||
}
|
||||
|
||||
bits
|
||||
rv64_argregs(Ref r, int p[2])
|
||||
{
|
||||
bits b;
|
||||
int ngp, nfp, t5;
|
||||
|
||||
assert(rtype(r) == RCall);
|
||||
ngp = (r.val >> 4) & 15;
|
||||
nfp = (r.val >> 8) & 15;
|
||||
t5 = (r.val >> 12) & 1;
|
||||
if (p) {
|
||||
p[0] = ngp + t5;
|
||||
p[1] = nfp;
|
||||
}
|
||||
b = 0;
|
||||
while (ngp--)
|
||||
b |= BIT(A0+ngp);
|
||||
while (nfp--)
|
||||
b |= BIT(FA0+nfp);
|
||||
return b | ((bits)t5 << T5);
|
||||
}
|
||||
|
||||
static int
|
||||
fpstruct(Typ *t, int off, Class *c)
|
||||
{
|
||||
Field *f;
|
||||
int n;
|
||||
|
||||
if (t->isunion)
|
||||
return -1;
|
||||
|
||||
for (f=*t->fields; f->type != FEnd; f++)
|
||||
if (f->type == FPad)
|
||||
off += f->len;
|
||||
else if (f->type == FTyp) {
|
||||
if (fpstruct(&typ[f->len], off, c) == -1)
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
n = c->nfp + c->ngp;
|
||||
if (n == 2)
|
||||
return -1;
|
||||
switch (f->type) {
|
||||
default: die("unreachable");
|
||||
case Fb:
|
||||
case Fh:
|
||||
case Fw: c->cls[n] = Kw; c->ngp++; break;
|
||||
case Fl: c->cls[n] = Kl; c->ngp++; break;
|
||||
case Fs: c->cls[n] = Ks; c->nfp++; break;
|
||||
case Fd: c->cls[n] = Kd; c->nfp++; break;
|
||||
}
|
||||
c->off[n] = off;
|
||||
off += f->len;
|
||||
}
|
||||
|
||||
return c->nfp;
|
||||
}
|
||||
|
||||
static void
|
||||
typclass(Class *c, Typ *t, int fpabi, int *gp, int *fp)
|
||||
{
|
||||
uint n;
|
||||
int i;
|
||||
|
||||
c->type = t;
|
||||
c->class = 0;
|
||||
c->ngp = 0;
|
||||
c->nfp = 0;
|
||||
|
||||
if (t->align > 4)
|
||||
err("alignments larger than 16 are not supported");
|
||||
|
||||
if (t->isdark || t->size > 16 || t->size == 0) {
|
||||
/* large structs are replaced by a
|
||||
* pointer to some caller-allocated
|
||||
* memory
|
||||
*/
|
||||
c->class |= Cptr;
|
||||
*c->cls = Kl;
|
||||
*c->off = 0;
|
||||
c->ngp = 1;
|
||||
}
|
||||
else if (!fpabi || fpstruct(t, 0, c) <= 0) {
|
||||
for (n=0; 8*n<t->size; n++) {
|
||||
c->cls[n] = Kl;
|
||||
c->off[n] = 8*n;
|
||||
}
|
||||
c->nfp = 0;
|
||||
c->ngp = n;
|
||||
}
|
||||
|
||||
c->nreg = c->nfp + c->ngp;
|
||||
for (i=0; i<c->nreg; i++)
|
||||
if (KBASE(c->cls[i]) == 0)
|
||||
c->reg[i] = *gp++;
|
||||
else
|
||||
c->reg[i] = *fp++;
|
||||
}
|
||||
|
||||
static void
|
||||
sttmps(Ref tmp[], int ntmp, Class *c, Ref mem, Fn *fn)
|
||||
{
|
||||
static int st[] = {
|
||||
[Kw] = Ostorew, [Kl] = Ostorel,
|
||||
[Ks] = Ostores, [Kd] = Ostored
|
||||
};
|
||||
int i;
|
||||
Ref r;
|
||||
|
||||
assert(ntmp > 0);
|
||||
assert(ntmp <= 2);
|
||||
for (i=0; i<ntmp; i++) {
|
||||
tmp[i] = newtmp("abi", c->cls[i], fn);
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(st[c->cls[i]], 0, R, tmp[i], r);
|
||||
emit(Oadd, Kl, r, mem, getcon(c->off[i], fn));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ldregs(Class *c, Ref mem, Fn *fn)
|
||||
{
|
||||
int i;
|
||||
Ref r;
|
||||
|
||||
for (i=0; i<c->nreg; i++) {
|
||||
r = newtmp("abi", Kl, fn);
|
||||
emit(Oload, c->cls[i], TMP(c->reg[i]), r, R);
|
||||
emit(Oadd, Kl, r, mem, getcon(c->off[i], fn));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
selret(Blk *b, Fn *fn)
|
||||
{
|
||||
int j, k, cty;
|
||||
Ref r;
|
||||
Class cr;
|
||||
|
||||
j = b->jmp.type;
|
||||
|
||||
if (!isret(j) || j == Jret0)
|
||||
return;
|
||||
|
||||
r = b->jmp.arg;
|
||||
b->jmp.type = Jret0;
|
||||
|
||||
if (j == Jretc) {
|
||||
typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
|
||||
if (cr.class & Cptr) {
|
||||
assert(rtype(fn->retr) == RTmp);
|
||||
emit(Oblit1, 0, R, INT(cr.type->size), R);
|
||||
emit(Oblit0, 0, R, r, fn->retr);
|
||||
cty = 0;
|
||||
} else {
|
||||
ldregs(&cr, r, fn);
|
||||
cty = (cr.nfp << 2) | cr.ngp;
|
||||
}
|
||||
} else {
|
||||
k = j - Jretw;
|
||||
if (KBASE(k) == 0) {
|
||||
emit(Ocopy, k, TMP(A0), r, R);
|
||||
cty = 1;
|
||||
} else {
|
||||
emit(Ocopy, k, TMP(FA0), r, R);
|
||||
cty = 1 << 2;
|
||||
}
|
||||
}
|
||||
|
||||
b->jmp.arg = CALL(cty);
|
||||
}
|
||||
|
||||
static int
|
||||
argsclass(Ins *i0, Ins *i1, Class *carg, int retptr)
|
||||
{
|
||||
int ngp, nfp, *gp, *fp, vararg, envc;
|
||||
Class *c;
|
||||
Typ *t;
|
||||
Ins *i;
|
||||
|
||||
gp = gpreg;
|
||||
fp = fpreg;
|
||||
ngp = 8;
|
||||
nfp = 8;
|
||||
vararg = 0;
|
||||
envc = 0;
|
||||
if (retptr) {
|
||||
gp++;
|
||||
ngp--;
|
||||
}
|
||||
for (i=i0, c=carg; i<i1; i++, c++) {
|
||||
switch (i->op) {
|
||||
case Opar:
|
||||
case Oarg:
|
||||
*c->cls = i->cls;
|
||||
if (!vararg && KBASE(i->cls) == 1 && nfp > 0) {
|
||||
nfp--;
|
||||
*c->reg = *fp++;
|
||||
} else if (ngp > 0) {
|
||||
if (KBASE(i->cls) == 1)
|
||||
c->class |= Cfpint;
|
||||
ngp--;
|
||||
*c->reg = *gp++;
|
||||
} else
|
||||
c->class |= Cstk1;
|
||||
break;
|
||||
case Oargv:
|
||||
vararg = 1;
|
||||
break;
|
||||
case Oparc:
|
||||
case Oargc:
|
||||
t = &typ[i->arg[0].val];
|
||||
typclass(c, t, 1, gp, fp);
|
||||
if (c->nfp > 0)
|
||||
if (c->nfp >= nfp || c->ngp >= ngp)
|
||||
typclass(c, t, 0, gp, fp);
|
||||
assert(c->nfp <= nfp);
|
||||
if (c->ngp <= ngp) {
|
||||
ngp -= c->ngp;
|
||||
nfp -= c->nfp;
|
||||
gp += c->ngp;
|
||||
fp += c->nfp;
|
||||
} else if (ngp > 0) {
|
||||
assert(c->ngp == 2);
|
||||
assert(c->class == 0);
|
||||
c->class |= Cstk2;
|
||||
c->nreg = 1;
|
||||
ngp--;
|
||||
gp++;
|
||||
} else {
|
||||
c->class |= Cstk1;
|
||||
if (c->nreg > 1)
|
||||
c->class |= Cstk2;
|
||||
c->nreg = 0;
|
||||
}
|
||||
break;
|
||||
case Opare:
|
||||
case Oarge:
|
||||
*c->reg = T5;
|
||||
*c->cls = Kl;
|
||||
envc = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return envc << 12 | (gp-gpreg) << 4 | (fp-fpreg) << 8;
|
||||
}
|
||||
|
||||
static void
|
||||
stkblob(Ref r, Typ *t, Fn *fn, Insl **ilp)
|
||||
{
|
||||
Insl *il;
|
||||
int al;
|
||||
uint64_t sz;
|
||||
|
||||
il = alloc(sizeof *il);
|
||||
al = t->align - 2; /* specific to NAlign == 3 */
|
||||
if (al < 0)
|
||||
al = 0;
|
||||
sz = (t->size + 7) & ~7;
|
||||
il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
|
||||
il->link = *ilp;
|
||||
*ilp = il;
|
||||
}
|
||||
|
||||
static void
|
||||
selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
|
||||
{
|
||||
Ins *i;
|
||||
Class *ca, *c, cr;
|
||||
int j, k, cty;
|
||||
uint64_t stk, off;
|
||||
Ref r, r1, r2, tmp[2];
|
||||
|
||||
ca = alloc((i1-i0) * sizeof ca[0]);
|
||||
cr.class = 0;
|
||||
|
||||
if (!req(i1->arg[1], R))
|
||||
typclass(&cr, &typ[i1->arg[1].val], 1, gpreg, fpreg);
|
||||
|
||||
cty = argsclass(i0, i1, ca, cr.class & Cptr);
|
||||
stk = 0;
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (i->op == Oargv)
|
||||
continue;
|
||||
if (c->class & Cptr) {
|
||||
i->arg[0] = newtmp("abi", Kl, fn);
|
||||
stkblob(i->arg[0], c->type, fn, ilp);
|
||||
i->op = Oarg;
|
||||
}
|
||||
if (c->class & Cstk1)
|
||||
stk += 8;
|
||||
if (c->class & Cstk2)
|
||||
stk += 8;
|
||||
}
|
||||
stk += stk & 15;
|
||||
if (stk)
|
||||
emit(Osalloc, Kl, R, getcon(-stk, fn), R);
|
||||
|
||||
if (!req(i1->arg[1], R)) {
|
||||
stkblob(i1->to, cr.type, fn, ilp);
|
||||
cty |= (cr.nfp << 2) | cr.ngp;
|
||||
if (cr.class & Cptr)
|
||||
/* spill & rega expect calls to be
|
||||
* followed by copies from regs,
|
||||
* so we emit a dummy
|
||||
*/
|
||||
emit(Ocopy, Kw, R, TMP(A0), R);
|
||||
else {
|
||||
sttmps(tmp, cr.nreg, &cr, i1->to, fn);
|
||||
for (j=0; j<cr.nreg; j++) {
|
||||
r = TMP(cr.reg[j]);
|
||||
emit(Ocopy, cr.cls[j], tmp[j], r, R);
|
||||
}
|
||||
}
|
||||
} else if (KBASE(i1->cls) == 0) {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(A0), R);
|
||||
cty |= 1;
|
||||
} else {
|
||||
emit(Ocopy, i1->cls, i1->to, TMP(FA0), R);
|
||||
cty |= 1 << 2;
|
||||
}
|
||||
|
||||
emit(Ocall, 0, R, i1->arg[0], CALL(cty));
|
||||
|
||||
if (cr.class & Cptr)
|
||||
/* struct return argument */
|
||||
emit(Ocopy, Kl, TMP(A0), i1->to, R);
|
||||
|
||||
/* move arguments into registers */
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (i->op == Oargv || c->class & Cstk1)
|
||||
continue;
|
||||
if (i->op == Oargc) {
|
||||
ldregs(c, i->arg[1], fn);
|
||||
} else if (c->class & Cfpint) {
|
||||
k = KWIDE(*c->cls) ? Kl : Kw;
|
||||
r = newtmp("abi", k, fn);
|
||||
emit(Ocopy, k, TMP(*c->reg), r, R);
|
||||
*c->reg = r.val;
|
||||
} else {
|
||||
emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
|
||||
}
|
||||
}
|
||||
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (c->class & Cfpint) {
|
||||
k = KWIDE(*c->cls) ? Kl : Kw;
|
||||
emit(Ocast, k, TMP(*c->reg), i->arg[0], R);
|
||||
}
|
||||
if (c->class & Cptr) {
|
||||
emit(Oblit1, 0, R, INT(c->type->size), R);
|
||||
emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!stk)
|
||||
return;
|
||||
|
||||
/* populate the stack */
|
||||
off = 0;
|
||||
r = newtmp("abi", Kl, fn);
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (i->op == Oargv || !(c->class & Cstk))
|
||||
continue;
|
||||
if (i->op == Oarg) {
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorew+i->cls, Kw, R, i->arg[0], r1);
|
||||
if (i->cls == Kw) {
|
||||
/* TODO: we only need this sign
|
||||
* extension for l temps passed
|
||||
* as w arguments
|
||||
* (see rv64/isel.c:fixarg)
|
||||
*/
|
||||
curi->op = Ostorel;
|
||||
curi->arg[0] = newtmp("abi", Kl, fn);
|
||||
emit(Oextsw, Kl, curi->arg[0], i->arg[0], R);
|
||||
}
|
||||
emit(Oadd, Kl, r1, r, getcon(off, fn));
|
||||
off += 8;
|
||||
}
|
||||
if (i->op == Oargc) {
|
||||
if (c->class & Cstk1) {
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
r2 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, 0, R, r2, r1);
|
||||
emit(Oadd, Kl, r1, r, getcon(off, fn));
|
||||
emit(Oload, Kl, r2, i->arg[1], R);
|
||||
off += 8;
|
||||
}
|
||||
if (c->class & Cstk2) {
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
r2 = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, 0, R, r2, r1);
|
||||
emit(Oadd, Kl, r1, r, getcon(off, fn));
|
||||
r1 = newtmp("abi", Kl, fn);
|
||||
emit(Oload, Kl, r2, r1, R);
|
||||
emit(Oadd, Kl, r1, i->arg[1], getcon(8, fn));
|
||||
off += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
emit(Osalloc, Kl, r, getcon(stk, fn), R);
|
||||
}
|
||||
|
||||
static Params
|
||||
selpar(Fn *fn, Ins *i0, Ins *i1)
|
||||
{
|
||||
Class *ca, *c, cr;
|
||||
Insl *il;
|
||||
Ins *i;
|
||||
int j, k, s, cty, nt;
|
||||
Ref r, tmp[17], *t;
|
||||
|
||||
ca = alloc((i1-i0) * sizeof ca[0]);
|
||||
cr.class = 0;
|
||||
curi = &insb[NIns];
|
||||
|
||||
if (fn->retty >= 0) {
|
||||
typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
|
||||
if (cr.class & Cptr) {
|
||||
fn->retr = newtmp("abi", Kl, fn);
|
||||
emit(Ocopy, Kl, fn->retr, TMP(A0), R);
|
||||
}
|
||||
}
|
||||
|
||||
cty = argsclass(i0, i1, ca, cr.class & Cptr);
|
||||
fn->reg = rv64_argregs(CALL(cty), 0);
|
||||
|
||||
il = 0;
|
||||
t = tmp;
|
||||
for (i=i0, c=ca; i<i1; i++, c++) {
|
||||
if (c->class & Cfpint) {
|
||||
r = i->to;
|
||||
k = *c->cls;
|
||||
*c->cls = KWIDE(k) ? Kl : Kw;
|
||||
i->to = newtmp("abi", k, fn);
|
||||
emit(Ocast, k, r, i->to, R);
|
||||
}
|
||||
if (i->op == Oparc)
|
||||
if (!(c->class & Cptr))
|
||||
if (c->nreg != 0) {
|
||||
nt = c->nreg;
|
||||
if (c->class & Cstk2) {
|
||||
c->cls[1] = Kl;
|
||||
c->off[1] = 8;
|
||||
assert(nt == 1);
|
||||
nt = 2;
|
||||
}
|
||||
sttmps(t, nt, c, i->to, fn);
|
||||
stkblob(i->to, c->type, fn, &il);
|
||||
t += nt;
|
||||
}
|
||||
}
|
||||
for (; il; il=il->link)
|
||||
emiti(il->i);
|
||||
|
||||
t = tmp;
|
||||
s = 2 + 8*fn->vararg;
|
||||
for (i=i0, c=ca; i<i1; i++, c++)
|
||||
if (i->op == Oparc && !(c->class & Cptr)) {
|
||||
if (c->nreg == 0) {
|
||||
fn->tmp[i->to.val].slot = -s;
|
||||
s += (c->class & Cstk2) ? 2 : 1;
|
||||
continue;
|
||||
}
|
||||
for (j=0; j<c->nreg; j++) {
|
||||
r = TMP(c->reg[j]);
|
||||
emit(Ocopy, c->cls[j], *t++, r, R);
|
||||
}
|
||||
if (c->class & Cstk2) {
|
||||
emit(Oload, Kl, *t, SLOT(-s), R);
|
||||
t++, s++;
|
||||
}
|
||||
} else if (c->class & Cstk1) {
|
||||
emit(Oload, *c->cls, i->to, SLOT(-s), R);
|
||||
s++;
|
||||
} else {
|
||||
emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
|
||||
}
|
||||
|
||||
return (Params){
|
||||
.stk = s,
|
||||
.ngp = (cty >> 4) & 15,
|
||||
.nfp = (cty >> 8) & 15,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
selvaarg(Fn *fn, Ins *i)
|
||||
{
|
||||
Ref loc, newloc;
|
||||
|
||||
loc = newtmp("abi", Kl, fn);
|
||||
newloc = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, newloc, i->arg[0]);
|
||||
emit(Oadd, Kl, newloc, loc, getcon(8, fn));
|
||||
emit(Oload, i->cls, i->to, loc, R);
|
||||
emit(Oload, Kl, loc, i->arg[0], R);
|
||||
}
|
||||
|
||||
static void
|
||||
selvastart(Fn *fn, Params p, Ref ap)
|
||||
{
|
||||
Ref rsave;
|
||||
int s;
|
||||
|
||||
rsave = newtmp("abi", Kl, fn);
|
||||
emit(Ostorel, Kw, R, rsave, ap);
|
||||
s = p.stk > 2 + 8 * fn->vararg ? p.stk : 2 + p.ngp;
|
||||
emit(Oaddr, Kl, rsave, SLOT(-s), R);
|
||||
}
|
||||
|
||||
void
|
||||
rv64_abi(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i, *i0;
|
||||
Insl *il;
|
||||
int n0, n1, ioff;
|
||||
Params p;
|
||||
|
||||
for (b=fn->start; b; b=b->link)
|
||||
b->visit = 0;
|
||||
|
||||
/* lower parameters */
|
||||
for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (!ispar(i->op))
|
||||
break;
|
||||
p = selpar(fn, b->ins, i);
|
||||
n0 = &insb[NIns] - curi;
|
||||
ioff = i - b->ins;
|
||||
n1 = b->nins - ioff;
|
||||
vgrow(&b->ins, n0+n1);
|
||||
icpy(b->ins+n0, b->ins+ioff, n1);
|
||||
icpy(b->ins, curi, n0);
|
||||
b->nins = n0+n1;
|
||||
|
||||
/* lower calls, returns, and vararg instructions */
|
||||
il = 0;
|
||||
b = fn->start;
|
||||
do {
|
||||
if (!(b = b->link))
|
||||
b = fn->start; /* do it last */
|
||||
if (b->visit)
|
||||
continue;
|
||||
curi = &insb[NIns];
|
||||
selret(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;)
|
||||
switch ((--i)->op) {
|
||||
default:
|
||||
emiti(*i);
|
||||
break;
|
||||
case Ocall:
|
||||
for (i0=i; i0>b->ins; i0--)
|
||||
if (!isarg((i0-1)->op))
|
||||
break;
|
||||
selcall(fn, i0, i, &il);
|
||||
i = i0;
|
||||
break;
|
||||
case Ovastart:
|
||||
selvastart(fn, p, i->arg[0]);
|
||||
break;
|
||||
case Ovaarg:
|
||||
selvaarg(fn, i);
|
||||
break;
|
||||
case Oarg:
|
||||
case Oargc:
|
||||
die("unreachable");
|
||||
}
|
||||
if (b == fn->start)
|
||||
for (; il; il=il->link)
|
||||
emiti(il->i);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
} while (b != fn->start);
|
||||
|
||||
if (debug['A']) {
|
||||
fprintf(stderr, "\n> After ABI lowering:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
52
src/qbe/rv64/all.h
Normal file
52
src/qbe/rv64/all.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#include "../all.h"
|
||||
|
||||
typedef struct Rv64Op Rv64Op;
|
||||
|
||||
enum Rv64Reg {
|
||||
/* caller-save */
|
||||
T0 = RXX + 1, T1, T2, T3, T4, T5,
|
||||
A0, A1, A2, A3, A4, A5, A6, A7,
|
||||
|
||||
/* callee-save */
|
||||
S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11,
|
||||
|
||||
/* globally live */
|
||||
FP, SP, GP, TP, RA,
|
||||
|
||||
/* FP caller-save */
|
||||
FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7, FT8, FT9, FT10,
|
||||
FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7,
|
||||
|
||||
/* FP callee-save */
|
||||
FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11,
|
||||
|
||||
/* reserved (see rv64/emit.c) */
|
||||
T6, FT11,
|
||||
|
||||
NFPR = FS11 - FT0 + 1,
|
||||
NGPR = RA - T0 + 1,
|
||||
NGPS = A7 - T0 + 1,
|
||||
NFPS = FA7 - FT0 + 1,
|
||||
NCLR = (S11 - S1 + 1) + (FS11 - FS0 + 1),
|
||||
};
|
||||
MAKESURE(reg_not_tmp, FT11 < (int)Tmp0);
|
||||
|
||||
struct Rv64Op {
|
||||
char imm;
|
||||
};
|
||||
|
||||
/* targ.c */
|
||||
extern int rv64_rsave[];
|
||||
extern int rv64_rclob[];
|
||||
extern Rv64Op rv64_op[];
|
||||
|
||||
/* abi.c */
|
||||
bits rv64_retregs(Ref, int[2]);
|
||||
bits rv64_argregs(Ref, int[2]);
|
||||
void rv64_abi(Fn *);
|
||||
|
||||
/* isel.c */
|
||||
void rv64_isel(Fn *);
|
||||
|
||||
/* emit.c */
|
||||
void rv64_emitfn(Fn *, FILE *);
|
||||
569
src/qbe/rv64/emit.c
Normal file
569
src/qbe/rv64/emit.c
Normal file
@@ -0,0 +1,569 @@
|
||||
#include "all.h"
|
||||
|
||||
enum {
|
||||
Ki = -1, /* matches Kw and Kl */
|
||||
Ka = -2, /* matches all classes */
|
||||
};
|
||||
|
||||
static struct {
|
||||
short op;
|
||||
short cls;
|
||||
char *fmt;
|
||||
} omap[] = {
|
||||
{ Oadd, Ki, "add%k %=, %0, %1" },
|
||||
{ Oadd, Ka, "fadd.%k %=, %0, %1" },
|
||||
{ Osub, Ki, "sub%k %=, %0, %1" },
|
||||
{ Osub, Ka, "fsub.%k %=, %0, %1" },
|
||||
{ Oneg, Ki, "neg%k %=, %0" },
|
||||
{ Oneg, Ka, "fneg.%k %=, %0" },
|
||||
{ Odiv, Ki, "div%k %=, %0, %1" },
|
||||
{ Odiv, Ka, "fdiv.%k %=, %0, %1" },
|
||||
{ Orem, Ki, "rem%k %=, %0, %1" },
|
||||
{ Orem, Kl, "rem %=, %0, %1" },
|
||||
{ Oudiv, Ki, "divu%k %=, %0, %1" },
|
||||
{ Ourem, Ki, "remu%k %=, %0, %1" },
|
||||
{ Omul, Ki, "mul%k %=, %0, %1" },
|
||||
{ Omul, Ka, "fmul.%k %=, %0, %1" },
|
||||
{ Oand, Ki, "and %=, %0, %1" },
|
||||
{ Oor, Ki, "or %=, %0, %1" },
|
||||
{ Oxor, Ki, "xor %=, %0, %1" },
|
||||
{ Osar, Ki, "sra%k %=, %0, %1" },
|
||||
{ Oshr, Ki, "srl%k %=, %0, %1" },
|
||||
{ Oshl, Ki, "sll%k %=, %0, %1" },
|
||||
{ Ocsltl, Ki, "slt %=, %0, %1" },
|
||||
{ Ocultl, Ki, "sltu %=, %0, %1" },
|
||||
{ Oceqs, Ki, "feq.s %=, %0, %1" },
|
||||
{ Ocges, Ki, "fge.s %=, %0, %1" },
|
||||
{ Ocgts, Ki, "fgt.s %=, %0, %1" },
|
||||
{ Ocles, Ki, "fle.s %=, %0, %1" },
|
||||
{ Oclts, Ki, "flt.s %=, %0, %1" },
|
||||
{ Oceqd, Ki, "feq.d %=, %0, %1" },
|
||||
{ Ocged, Ki, "fge.d %=, %0, %1" },
|
||||
{ Ocgtd, Ki, "fgt.d %=, %0, %1" },
|
||||
{ Ocled, Ki, "fle.d %=, %0, %1" },
|
||||
{ Ocltd, Ki, "flt.d %=, %0, %1" },
|
||||
{ Ostoreb, Kw, "sb %0, %M1" },
|
||||
{ Ostoreh, Kw, "sh %0, %M1" },
|
||||
{ Ostorew, Kw, "sw %0, %M1" },
|
||||
{ Ostorel, Ki, "sd %0, %M1" },
|
||||
{ Ostores, Kw, "fsw %0, %M1" },
|
||||
{ Ostored, Kw, "fsd %0, %M1" },
|
||||
{ Oloadsb, Ki, "lb %=, %M0" },
|
||||
{ Oloadub, Ki, "lbu %=, %M0" },
|
||||
{ Oloadsh, Ki, "lh %=, %M0" },
|
||||
{ Oloaduh, Ki, "lhu %=, %M0" },
|
||||
{ Oloadsw, Ki, "lw %=, %M0" },
|
||||
/* riscv64 always sign-extends 32-bit
|
||||
* values stored in 64-bit registers
|
||||
*/
|
||||
{ Oloaduw, Kw, "lw %=, %M0" },
|
||||
{ Oloaduw, Kl, "lwu %=, %M0" },
|
||||
{ Oload, Kw, "lw %=, %M0" },
|
||||
{ Oload, Kl, "ld %=, %M0" },
|
||||
{ Oload, Ks, "flw %=, %M0" },
|
||||
{ Oload, Kd, "fld %=, %M0" },
|
||||
{ Oextsb, Ki, "sext.b %=, %0" },
|
||||
{ Oextub, Ki, "zext.b %=, %0" },
|
||||
{ Oextsh, Ki, "sext.h %=, %0" },
|
||||
{ Oextuh, Ki, "zext.h %=, %0" },
|
||||
{ Oextsw, Kl, "sext.w %=, %0" },
|
||||
{ Oextuw, Kl, "zext.w %=, %0" },
|
||||
{ Otruncd, Ks, "fcvt.s.d %=, %0" },
|
||||
{ Oexts, Kd, "fcvt.d.s %=, %0" },
|
||||
{ Ostosi, Kw, "fcvt.w.s %=, %0, rtz" },
|
||||
{ Ostosi, Kl, "fcvt.l.s %=, %0, rtz" },
|
||||
{ Ostoui, Kw, "fcvt.wu.s %=, %0, rtz" },
|
||||
{ Ostoui, Kl, "fcvt.lu.s %=, %0, rtz" },
|
||||
{ Odtosi, Kw, "fcvt.w.d %=, %0, rtz" },
|
||||
{ Odtosi, Kl, "fcvt.l.d %=, %0, rtz" },
|
||||
{ Odtoui, Kw, "fcvt.wu.d %=, %0, rtz" },
|
||||
{ Odtoui, Kl, "fcvt.lu.d %=, %0, rtz" },
|
||||
{ Oswtof, Ka, "fcvt.%k.w %=, %0" },
|
||||
{ Ouwtof, Ka, "fcvt.%k.wu %=, %0" },
|
||||
{ Osltof, Ka, "fcvt.%k.l %=, %0" },
|
||||
{ Oultof, Ka, "fcvt.%k.lu %=, %0" },
|
||||
{ Ocast, Kw, "fmv.x.w %=, %0" },
|
||||
{ Ocast, Kl, "fmv.x.d %=, %0" },
|
||||
{ Ocast, Ks, "fmv.w.x %=, %0" },
|
||||
{ Ocast, Kd, "fmv.d.x %=, %0" },
|
||||
{ Ocopy, Ki, "mv %=, %0" },
|
||||
{ Ocopy, Ka, "fmv.%k %=, %0" },
|
||||
{ Oswap, Ki, "mv %?, %0\n\tmv %0, %1\n\tmv %1, %?" },
|
||||
{ Oswap, Ka, "fmv.%k %?, %0\n\tfmv.%k %0, %1\n\tfmv.%k %1, %?" },
|
||||
{ Oreqz, Ki, "seqz %=, %0" },
|
||||
{ Ornez, Ki, "snez %=, %0" },
|
||||
{ Ocall, Kw, "jalr %0" },
|
||||
{ NOp, 0, 0 }
|
||||
};
|
||||
|
||||
static char *rname[] = {
|
||||
[FP] = "fp",
|
||||
[SP] = "sp",
|
||||
[GP] = "gp",
|
||||
[TP] = "tp",
|
||||
[RA] = "ra",
|
||||
[T0] = "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
[A0] = "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
|
||||
[S1] = "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8",
|
||||
"s9", "s10", "s11",
|
||||
[FT0] = "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7",
|
||||
"ft8", "ft9", "ft10",
|
||||
[FA0] = "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7",
|
||||
[FS0] = "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7",
|
||||
"fs8", "fs9", "fs10", "fs11",
|
||||
[T6] = "t6",
|
||||
[FT11] = "ft11",
|
||||
};
|
||||
|
||||
static int64_t
|
||||
slot(Ref r, Fn *fn)
|
||||
{
|
||||
int s;
|
||||
|
||||
s = rsval(r);
|
||||
assert(s <= fn->slot);
|
||||
if (s < 0)
|
||||
return 8 * -s;
|
||||
else
|
||||
return -4 * (fn->slot - s);
|
||||
}
|
||||
|
||||
static void
|
||||
emitaddr(Con *c, FILE *f)
|
||||
{
|
||||
assert(c->sym.type == SGlo);
|
||||
fputs(str(c->sym.id), f);
|
||||
if (c->bits.i)
|
||||
fprintf(f, "+%"PRIi64, c->bits.i);
|
||||
}
|
||||
|
||||
static void
|
||||
emitf(char *s, Ins *i, Fn *fn, FILE *f)
|
||||
{
|
||||
static char clschr[] = {'w', 'l', 's', 'd'};
|
||||
Ref r;
|
||||
int k, c;
|
||||
Con *pc;
|
||||
int64_t offset;
|
||||
|
||||
fputc('\t', f);
|
||||
for (;;) {
|
||||
k = i->cls;
|
||||
while ((c = *s++) != '%')
|
||||
if (!c) {
|
||||
fputc('\n', f);
|
||||
return;
|
||||
} else
|
||||
fputc(c, f);
|
||||
switch ((c = *s++)) {
|
||||
default:
|
||||
die("invalid escape");
|
||||
case '?':
|
||||
if (KBASE(k) == 0)
|
||||
fputs("t6", f);
|
||||
else
|
||||
fputs("ft11", f);
|
||||
break;
|
||||
case 'k':
|
||||
if (i->cls != Kl)
|
||||
fputc(clschr[i->cls], f);
|
||||
break;
|
||||
case '=':
|
||||
case '0':
|
||||
r = c == '=' ? i->to : i->arg[0];
|
||||
assert(isreg(r));
|
||||
fputs(rname[r.val], f);
|
||||
break;
|
||||
case '1':
|
||||
r = i->arg[1];
|
||||
switch (rtype(r)) {
|
||||
default:
|
||||
die("invalid second argument");
|
||||
case RTmp:
|
||||
assert(isreg(r));
|
||||
fputs(rname[r.val], f);
|
||||
break;
|
||||
case RCon:
|
||||
pc = &fn->con[r.val];
|
||||
assert(pc->type == CBits);
|
||||
assert(pc->bits.i >= -2048 && pc->bits.i < 2048);
|
||||
fprintf(f, "%d", (int)pc->bits.i);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'M':
|
||||
c = *s++;
|
||||
assert(c == '0' || c == '1');
|
||||
r = i->arg[c - '0'];
|
||||
switch (rtype(r)) {
|
||||
default:
|
||||
die("invalid address argument");
|
||||
case RTmp:
|
||||
fprintf(f, "0(%s)", rname[r.val]);
|
||||
break;
|
||||
case RCon:
|
||||
pc = &fn->con[r.val];
|
||||
assert(pc->type == CAddr);
|
||||
emitaddr(pc, f);
|
||||
if (isstore(i->op)
|
||||
|| (isload(i->op) && KBASE(i->cls) == 1)) {
|
||||
/* store (and float load)
|
||||
* pseudo-instructions need a
|
||||
* temporary register in which to
|
||||
* load the address
|
||||
*/
|
||||
fprintf(f, ", t6");
|
||||
}
|
||||
break;
|
||||
case RSlot:
|
||||
offset = slot(r, fn);
|
||||
assert(offset >= -2048 && offset <= 2047);
|
||||
fprintf(f, "%d(fp)", (int)offset);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
loadaddr(Con *c, char *rn, FILE *f)
|
||||
{
|
||||
char off[32];
|
||||
|
||||
if (c->sym.type == SThr) {
|
||||
if (c->bits.i)
|
||||
sprintf(off, "+%"PRIi64, c->bits.i);
|
||||
else
|
||||
off[0] = 0;
|
||||
fprintf(f, "\tlui %s, %%tprel_hi(%s)%s\n",
|
||||
rn, str(c->sym.id), off);
|
||||
fprintf(f, "\tadd %s, %s, tp, %%tprel_add(%s)%s\n",
|
||||
rn, rn, str(c->sym.id), off);
|
||||
fprintf(f, "\taddi %s, %s, %%tprel_lo(%s)%s\n",
|
||||
rn, rn, str(c->sym.id), off);
|
||||
} else {
|
||||
fprintf(f, "\tla %s, ", rn);
|
||||
emitaddr(c, f);
|
||||
fputc('\n', f);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
loadcon(Con *c, int r, int k, FILE *f)
|
||||
{
|
||||
char *rn;
|
||||
int64_t n;
|
||||
|
||||
rn = rname[r];
|
||||
switch (c->type) {
|
||||
case CAddr:
|
||||
loadaddr(c, rn, f);
|
||||
break;
|
||||
case CBits:
|
||||
n = c->bits.i;
|
||||
if (!KWIDE(k))
|
||||
n = (int32_t)n;
|
||||
fprintf(f, "\tli %s, %"PRIi64"\n", rn, n);
|
||||
break;
|
||||
default:
|
||||
die("invalid constant");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fixmem(Ref *pr, Fn *fn, FILE *f)
|
||||
{
|
||||
Ref r;
|
||||
int64_t s;
|
||||
Con *c;
|
||||
|
||||
r = *pr;
|
||||
if (rtype(r) == RCon) {
|
||||
c = &fn->con[r.val];
|
||||
if (c->type == CAddr)
|
||||
if (c->sym.type == SThr) {
|
||||
loadcon(c, T6, Kl, f);
|
||||
*pr = TMP(T6);
|
||||
}
|
||||
}
|
||||
if (rtype(r) == RSlot) {
|
||||
s = slot(r, fn);
|
||||
if (s < -2048 || s > 2047) {
|
||||
fprintf(f, "\tli t6, %"PRId64"\n", s);
|
||||
fprintf(f, "\tadd t6, fp, t6\n");
|
||||
*pr = TMP(T6);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emitins(Ins *i, Fn *fn, FILE *f)
|
||||
{
|
||||
int o;
|
||||
char *rn;
|
||||
int64_t s;
|
||||
Con *con;
|
||||
|
||||
switch (i->op) {
|
||||
default:
|
||||
if (isload(i->op))
|
||||
fixmem(&i->arg[0], fn, f);
|
||||
else if (isstore(i->op))
|
||||
fixmem(&i->arg[1], fn, f);
|
||||
Table:
|
||||
/* most instructions are just pulled out of
|
||||
* the table omap[], some special cases are
|
||||
* detailed below */
|
||||
for (o=0;; o++) {
|
||||
/* this linear search should really be a binary
|
||||
* search */
|
||||
if (omap[o].op == NOp)
|
||||
die("no match for %s(%c)",
|
||||
optab[i->op].name, "wlsd"[i->cls]);
|
||||
if (omap[o].op == i->op)
|
||||
if (omap[o].cls == i->cls || omap[o].cls == Ka
|
||||
|| (omap[o].cls == Ki && KBASE(i->cls) == 0))
|
||||
break;
|
||||
}
|
||||
emitf(omap[o].fmt, i, fn, f);
|
||||
break;
|
||||
case Ocopy:
|
||||
if (req(i->to, i->arg[0]))
|
||||
break;
|
||||
if (rtype(i->to) == RSlot) {
|
||||
switch (rtype(i->arg[0])) {
|
||||
case RSlot:
|
||||
case RCon:
|
||||
die("unimplemented");
|
||||
break;
|
||||
default:
|
||||
assert(isreg(i->arg[0]));
|
||||
i->arg[1] = i->to;
|
||||
i->to = R;
|
||||
switch (i->cls) {
|
||||
case Kw: i->op = Ostorew; break;
|
||||
case Kl: i->op = Ostorel; break;
|
||||
case Ks: i->op = Ostores; break;
|
||||
case Kd: i->op = Ostored; break;
|
||||
}
|
||||
fixmem(&i->arg[1], fn, f);
|
||||
goto Table;
|
||||
}
|
||||
break;
|
||||
}
|
||||
assert(isreg(i->to));
|
||||
switch (rtype(i->arg[0])) {
|
||||
case RCon:
|
||||
loadcon(&fn->con[i->arg[0].val], i->to.val, i->cls, f);
|
||||
break;
|
||||
case RSlot:
|
||||
i->op = Oload;
|
||||
fixmem(&i->arg[0], fn, f);
|
||||
goto Table;
|
||||
default:
|
||||
assert(isreg(i->arg[0]));
|
||||
goto Table;
|
||||
}
|
||||
break;
|
||||
case Onop:
|
||||
break;
|
||||
case Oaddr:
|
||||
assert(rtype(i->arg[0]) == RSlot);
|
||||
rn = rname[i->to.val];
|
||||
s = slot(i->arg[0], fn);
|
||||
if (-s < 2048) {
|
||||
fprintf(f, "\tadd %s, fp, %"PRId64"\n", rn, s);
|
||||
} else {
|
||||
fprintf(f,
|
||||
"\tli %s, %"PRId64"\n"
|
||||
"\tadd %s, fp, %s\n",
|
||||
rn, s, rn, rn
|
||||
);
|
||||
}
|
||||
break;
|
||||
case Ocall:
|
||||
switch (rtype(i->arg[0])) {
|
||||
case RCon:
|
||||
con = &fn->con[i->arg[0].val];
|
||||
if (con->type != CAddr
|
||||
|| con->sym.type != SGlo
|
||||
|| con->bits.i)
|
||||
goto Invalid;
|
||||
fprintf(f, "\tcall %s\n", str(con->sym.id));
|
||||
break;
|
||||
case RTmp:
|
||||
emitf("jalr %0", i, fn, f);
|
||||
break;
|
||||
default:
|
||||
Invalid:
|
||||
die("invalid call argument");
|
||||
}
|
||||
break;
|
||||
case Osalloc:
|
||||
emitf("sub sp, sp, %0", i, fn, f);
|
||||
if (!req(i->to, R))
|
||||
emitf("mv %=, sp", i, fn, f);
|
||||
break;
|
||||
case Odbgloc:
|
||||
emitdbgloc(i->arg[0].val, i->arg[1].val, f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Stack-frame layout:
|
||||
|
||||
+=============+
|
||||
| varargs |
|
||||
| save area |
|
||||
+-------------+
|
||||
| saved ra |
|
||||
| saved fp |
|
||||
+-------------+ <- fp
|
||||
| ... |
|
||||
| spill slots |
|
||||
| ... |
|
||||
+-------------+
|
||||
| ... |
|
||||
| locals |
|
||||
| ... |
|
||||
+-------------+
|
||||
| padding |
|
||||
+-------------+
|
||||
| callee-save |
|
||||
| registers |
|
||||
+=============+
|
||||
|
||||
*/
|
||||
|
||||
void
|
||||
rv64_emitfn(Fn *fn, FILE *f)
|
||||
{
|
||||
static int id0;
|
||||
int lbl, neg, off, frame, *pr, r;
|
||||
Blk *b, *s;
|
||||
Ins *i, ii;
|
||||
|
||||
emitfnlnk(fn->name, &fn->lnk, f);
|
||||
|
||||
if (fn->vararg) {
|
||||
/* TODO: only need space for registers
|
||||
* unused by named arguments
|
||||
*/
|
||||
fprintf(f, "\tadd sp, sp, -64\n");
|
||||
for (r=A0; r<=A7; r++)
|
||||
fprintf(f,
|
||||
"\tsd %s, %d(sp)\n",
|
||||
rname[r], 8 * (r - A0)
|
||||
);
|
||||
}
|
||||
fprintf(f, "\tsd fp, -16(sp)\n");
|
||||
fprintf(f, "\tsd ra, -8(sp)\n");
|
||||
fprintf(f, "\tadd fp, sp, -16\n");
|
||||
|
||||
frame = (16 + 4 * fn->slot + 15) & ~15;
|
||||
for (pr=rv64_rclob; *pr>=0; pr++) {
|
||||
if (fn->reg & BIT(*pr))
|
||||
frame += 8;
|
||||
}
|
||||
frame = (frame + 15) & ~15;
|
||||
|
||||
if (frame <= 2048)
|
||||
fprintf(f,
|
||||
"\tadd sp, sp, -%d\n",
|
||||
frame
|
||||
);
|
||||
else
|
||||
fprintf(f,
|
||||
"\tli t6, %d\n"
|
||||
"\tsub sp, sp, t6\n",
|
||||
frame
|
||||
);
|
||||
for (pr=rv64_rclob, off=0; *pr>=0; pr++) {
|
||||
if (fn->reg & BIT(*pr)) {
|
||||
fprintf(f,
|
||||
"\t%s %s, %d(sp)\n",
|
||||
*pr < FT0 ? "sd" : "fsd",
|
||||
rname[*pr], off
|
||||
);
|
||||
off += 8;
|
||||
}
|
||||
}
|
||||
|
||||
for (lbl=0, b=fn->start; b; b=b->link) {
|
||||
if (lbl || b->npred > 1)
|
||||
fprintf(f, ".L%d:\n", id0+b->id);
|
||||
for (i=b->ins; i!=&b->ins[b->nins]; i++)
|
||||
emitins(i, fn, f);
|
||||
lbl = 1;
|
||||
switch (b->jmp.type) {
|
||||
case Jhlt:
|
||||
fprintf(f, "\tebreak\n");
|
||||
break;
|
||||
case Jret0:
|
||||
if (fn->dynalloc) {
|
||||
if (frame - 16 <= 2048)
|
||||
fprintf(f,
|
||||
"\tadd sp, fp, -%d\n",
|
||||
frame - 16
|
||||
);
|
||||
else
|
||||
fprintf(f,
|
||||
"\tli t6, %d\n"
|
||||
"\tsub sp, fp, t6\n",
|
||||
frame - 16
|
||||
);
|
||||
}
|
||||
for (pr=rv64_rclob, off=0; *pr>=0; pr++) {
|
||||
if (fn->reg & BIT(*pr)) {
|
||||
fprintf(f,
|
||||
"\t%s %s, %d(sp)\n",
|
||||
*pr < FT0 ? "ld" : "fld",
|
||||
rname[*pr], off
|
||||
);
|
||||
off += 8;
|
||||
}
|
||||
}
|
||||
fprintf(f,
|
||||
"\tadd sp, fp, %d\n"
|
||||
"\tld ra, 8(fp)\n"
|
||||
"\tld fp, 0(fp)\n"
|
||||
"\tret\n",
|
||||
16 + fn->vararg * 64
|
||||
);
|
||||
break;
|
||||
case Jjmp:
|
||||
Jmp:
|
||||
if (b->s1 != b->link)
|
||||
fprintf(f, "\tj .L%d\n", id0+b->s1->id);
|
||||
else
|
||||
lbl = 0;
|
||||
break;
|
||||
case Jjnz:
|
||||
neg = 0;
|
||||
if (b->link == b->s2) {
|
||||
s = b->s1;
|
||||
b->s1 = b->s2;
|
||||
b->s2 = s;
|
||||
neg = 1;
|
||||
}
|
||||
if (rtype(b->jmp.arg) == RSlot) {
|
||||
ii.arg[0] = b->jmp.arg;
|
||||
emitf("lw t6, %M0", &ii, fn, f);
|
||||
b->jmp.arg = TMP(T6);
|
||||
}
|
||||
assert(isreg(b->jmp.arg));
|
||||
fprintf(f,
|
||||
"\tb%sz %s, .L%d\n",
|
||||
neg ? "ne" : "eq",
|
||||
rname[b->jmp.arg.val],
|
||||
id0+b->s2->id
|
||||
);
|
||||
goto Jmp;
|
||||
}
|
||||
}
|
||||
id0 += fn->nblk;
|
||||
elf_emitfnfin(fn->name, f);
|
||||
}
|
||||
255
src/qbe/rv64/isel.c
Normal file
255
src/qbe/rv64/isel.c
Normal file
@@ -0,0 +1,255 @@
|
||||
#include "all.h"
|
||||
|
||||
static int
|
||||
memarg(Ref *r, int op, Ins *i)
|
||||
{
|
||||
if (isload(op) || op == Ocall)
|
||||
return r == &i->arg[0];
|
||||
if (isstore(op))
|
||||
return r == &i->arg[1];
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
immarg(Ref *r, int op, Ins *i)
|
||||
{
|
||||
return rv64_op[op].imm && r == &i->arg[1];
|
||||
}
|
||||
|
||||
static void
|
||||
fixarg(Ref *r, int k, Ins *i, Fn *fn)
|
||||
{
|
||||
char buf[32];
|
||||
Ref r0, r1;
|
||||
int s, n, op;
|
||||
Con *c;
|
||||
|
||||
r0 = r1 = *r;
|
||||
op = i ? i->op : Ocopy;
|
||||
switch (rtype(r0)) {
|
||||
case RCon:
|
||||
c = &fn->con[r0.val];
|
||||
if (c->type == CAddr && memarg(r, op, i))
|
||||
break;
|
||||
if (KBASE(k) == 0)
|
||||
if (c->type == CBits && immarg(r, op, i))
|
||||
if (-2048 <= c->bits.i && c->bits.i < 2048)
|
||||
break;
|
||||
r1 = newtmp("isel", k, fn);
|
||||
if (KBASE(k) == 1) {
|
||||
/* load floating points from memory
|
||||
* slots, they can't be used as
|
||||
* immediates
|
||||
*/
|
||||
assert(c->type == CBits);
|
||||
n = stashbits(c->bits.i, KWIDE(k) ? 8 : 4);
|
||||
vgrow(&fn->con, ++fn->ncon);
|
||||
c = &fn->con[fn->ncon-1];
|
||||
sprintf(buf, "\"%sfp%d\"", T.asloc, n);
|
||||
*c = (Con){.type = CAddr};
|
||||
c->sym.id = intern(buf);
|
||||
emit(Oload, k, r1, CON(c-fn->con), R);
|
||||
break;
|
||||
}
|
||||
emit(Ocopy, k, r1, r0, R);
|
||||
break;
|
||||
case RTmp:
|
||||
if (isreg(r0))
|
||||
break;
|
||||
s = fn->tmp[r0.val].slot;
|
||||
if (s != -1) {
|
||||
/* aggregate passed by value on
|
||||
* stack, or fast local address,
|
||||
* replace with slot if we can
|
||||
*/
|
||||
if (memarg(r, op, i)) {
|
||||
r1 = SLOT(s);
|
||||
break;
|
||||
}
|
||||
r1 = newtmp("isel", k, fn);
|
||||
emit(Oaddr, k, r1, SLOT(s), R);
|
||||
break;
|
||||
}
|
||||
if (k == Kw && fn->tmp[r0.val].cls == Kl) {
|
||||
/* TODO: this sign extension isn't needed
|
||||
* for 32-bit arithmetic instructions
|
||||
*/
|
||||
r1 = newtmp("isel", k, fn);
|
||||
emit(Oextsw, Kl, r1, r0, R);
|
||||
} else {
|
||||
assert(k == fn->tmp[r0.val].cls);
|
||||
}
|
||||
break;
|
||||
}
|
||||
*r = r1;
|
||||
}
|
||||
|
||||
static void
|
||||
negate(Ref *pr, Fn *fn)
|
||||
{
|
||||
Ref r;
|
||||
|
||||
r = newtmp("isel", Kw, fn);
|
||||
emit(Oxor, Kw, *pr, r, getcon(1, fn));
|
||||
*pr = r;
|
||||
}
|
||||
|
||||
static void
|
||||
selcmp(Ins i, int k, int op, Fn *fn)
|
||||
{
|
||||
Ins *icmp;
|
||||
Ref r, r0, r1;
|
||||
int sign, swap, neg;
|
||||
|
||||
switch (op) {
|
||||
case Cieq:
|
||||
r = newtmp("isel", k, fn);
|
||||
emit(Oreqz, i.cls, i.to, r, R);
|
||||
emit(Oxor, k, r, i.arg[0], i.arg[1]);
|
||||
icmp = curi;
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
return;
|
||||
case Cine:
|
||||
r = newtmp("isel", k, fn);
|
||||
emit(Ornez, i.cls, i.to, r, R);
|
||||
emit(Oxor, k, r, i.arg[0], i.arg[1]);
|
||||
icmp = curi;
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
return;
|
||||
case Cisge: sign = 1, swap = 0, neg = 1; break;
|
||||
case Cisgt: sign = 1, swap = 1, neg = 0; break;
|
||||
case Cisle: sign = 1, swap = 1, neg = 1; break;
|
||||
case Cislt: sign = 1, swap = 0, neg = 0; break;
|
||||
case Ciuge: sign = 0, swap = 0, neg = 1; break;
|
||||
case Ciugt: sign = 0, swap = 1, neg = 0; break;
|
||||
case Ciule: sign = 0, swap = 1, neg = 1; break;
|
||||
case Ciult: sign = 0, swap = 0, neg = 0; break;
|
||||
case NCmpI+Cfeq:
|
||||
case NCmpI+Cfge:
|
||||
case NCmpI+Cfgt:
|
||||
case NCmpI+Cfle:
|
||||
case NCmpI+Cflt:
|
||||
swap = 0, neg = 0;
|
||||
break;
|
||||
case NCmpI+Cfuo:
|
||||
negate(&i.to, fn);
|
||||
/* fall through */
|
||||
case NCmpI+Cfo:
|
||||
r0 = newtmp("isel", i.cls, fn);
|
||||
r1 = newtmp("isel", i.cls, fn);
|
||||
emit(Oand, i.cls, i.to, r0, r1);
|
||||
op = KWIDE(k) ? Oceqd : Oceqs;
|
||||
emit(op, i.cls, r0, i.arg[0], i.arg[0]);
|
||||
icmp = curi;
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
emit(op, i.cls, r1, i.arg[1], i.arg[1]);
|
||||
icmp = curi;
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
return;
|
||||
case NCmpI+Cfne:
|
||||
swap = 0, neg = 1;
|
||||
i.op = KWIDE(k) ? Oceqd : Oceqs;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "unknown comparison");
|
||||
}
|
||||
if (op < NCmpI)
|
||||
i.op = sign ? Ocsltl : Ocultl;
|
||||
if (swap) {
|
||||
r = i.arg[0];
|
||||
i.arg[0] = i.arg[1];
|
||||
i.arg[1] = r;
|
||||
}
|
||||
if (neg)
|
||||
negate(&i.to, fn);
|
||||
emiti(i);
|
||||
icmp = curi;
|
||||
fixarg(&icmp->arg[0], k, icmp, fn);
|
||||
fixarg(&icmp->arg[1], k, icmp, fn);
|
||||
}
|
||||
|
||||
static void
|
||||
sel(Ins i, Fn *fn)
|
||||
{
|
||||
Ins *i0;
|
||||
int ck, cc;
|
||||
|
||||
if (INRANGE(i.op, Oalloc, Oalloc1)) {
|
||||
i0 = curi - 1;
|
||||
salloc(i.to, i.arg[0], fn);
|
||||
fixarg(&i0->arg[0], Kl, i0, fn);
|
||||
return;
|
||||
}
|
||||
if (iscmp(i.op, &ck, &cc)) {
|
||||
selcmp(i, ck, cc, fn);
|
||||
return;
|
||||
}
|
||||
if (i.op != Onop) {
|
||||
emiti(i);
|
||||
i0 = curi; /* fixarg() can change curi */
|
||||
fixarg(&i0->arg[0], argcls(&i, 0), i0, fn);
|
||||
fixarg(&i0->arg[1], argcls(&i, 1), i0, fn);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
seljmp(Blk *b, Fn *fn)
|
||||
{
|
||||
/* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */
|
||||
if (b->jmp.type == Jjnz)
|
||||
fixarg(&b->jmp.arg, Kw, 0, fn);
|
||||
}
|
||||
|
||||
void
|
||||
rv64_isel(Fn *fn)
|
||||
{
|
||||
Blk *b, **sb;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
uint n;
|
||||
int al;
|
||||
int64_t sz;
|
||||
|
||||
/* assign slots to fast allocs */
|
||||
b = fn->start;
|
||||
/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
|
||||
for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++)
|
||||
if (i->op == al) {
|
||||
if (rtype(i->arg[0]) != RCon)
|
||||
break;
|
||||
sz = fn->con[i->arg[0].val].bits.i;
|
||||
if (sz < 0 || sz >= INT_MAX-15)
|
||||
err("invalid alloc size %"PRId64, sz);
|
||||
sz = (sz + n-1) & -n;
|
||||
sz /= 4;
|
||||
if (sz > INT_MAX - fn->slot)
|
||||
die("alloc too large");
|
||||
fn->tmp[i->to.val].slot = fn->slot;
|
||||
fn->slot += sz;
|
||||
*i = (Ins){.op = Onop};
|
||||
}
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
curi = &insb[NIns];
|
||||
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
|
||||
for (p=(*sb)->phi; p; p=p->link) {
|
||||
for (n=0; p->blk[n] != b; n++)
|
||||
assert(n+1 < p->narg);
|
||||
fixarg(&p->arg[n], p->cls, 0, fn);
|
||||
}
|
||||
seljmp(b, fn);
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;)
|
||||
sel(*--i, fn);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
|
||||
if (debug['I']) {
|
||||
fprintf(stderr, "\n> After instruction selection:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
57
src/qbe/rv64/targ.c
Normal file
57
src/qbe/rv64/targ.c
Normal file
@@ -0,0 +1,57 @@
|
||||
#include "all.h"
|
||||
|
||||
Rv64Op rv64_op[NOp] = {
|
||||
#define O(op, t, x) [O##op] =
|
||||
#define V(imm) { imm },
|
||||
#include "../ops.h"
|
||||
};
|
||||
|
||||
int rv64_rsave[] = {
|
||||
T0, T1, T2, T3, T4, T5,
|
||||
A0, A1, A2, A3, A4, A5, A6, A7,
|
||||
FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7,
|
||||
FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7,
|
||||
FT8, FT9, FT10,
|
||||
-1
|
||||
};
|
||||
int rv64_rclob[] = {
|
||||
S1, S2, S3, S4, S5, S6, S7,
|
||||
S8, S9, S10, S11,
|
||||
FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7,
|
||||
FS8, FS9, FS10, FS11,
|
||||
-1
|
||||
};
|
||||
|
||||
#define RGLOB (BIT(FP) | BIT(SP) | BIT(GP) | BIT(TP) | BIT(RA))
|
||||
|
||||
static int
|
||||
rv64_memargs(int op)
|
||||
{
|
||||
(void)op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Target T_rv64 = {
|
||||
.name = "rv64",
|
||||
.gpr0 = T0,
|
||||
.ngpr = NGPR,
|
||||
.fpr0 = FT0,
|
||||
.nfpr = NFPR,
|
||||
.rglob = RGLOB,
|
||||
.nrglob = 5,
|
||||
.rsave = rv64_rsave,
|
||||
.nrsave = {NGPS, NFPS},
|
||||
.retregs = rv64_retregs,
|
||||
.argregs = rv64_argregs,
|
||||
.memargs = rv64_memargs,
|
||||
.abi0 = elimsb,
|
||||
.abi1 = rv64_abi,
|
||||
.isel = rv64_isel,
|
||||
.emitfn = rv64_emitfn,
|
||||
.emitfin = elf_emitfin,
|
||||
.asloc = ".L",
|
||||
.cansel = 0,
|
||||
};
|
||||
|
||||
MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int));
|
||||
MAKESURE(rclob_size_ok, sizeof rv64_rclob == (NCLR+1) * sizeof(int));
|
||||
124
src/qbe/simpl.c
Normal file
124
src/qbe/simpl.c
Normal file
@@ -0,0 +1,124 @@
|
||||
#include "all.h"
|
||||
|
||||
static void
|
||||
blit(Ref sd[2], int sz, Fn *fn)
|
||||
{
|
||||
struct { int st, ld, cls, size; } *p, tbl[] = {
|
||||
{ Ostorel, Oload, Kl, 8 },
|
||||
{ Ostorew, Oload, Kw, 4 },
|
||||
{ Ostoreh, Oloaduh, Kw, 2 },
|
||||
{ Ostoreb, Oloadub, Kw, 1 }
|
||||
};
|
||||
Ref r, r1, ro;
|
||||
int off, fwd, n;
|
||||
|
||||
fwd = sz >= 0;
|
||||
sz = abs(sz);
|
||||
off = fwd ? sz : 0;
|
||||
for (p=tbl; sz; p++)
|
||||
for (n=p->size; sz>=n; sz-=n) {
|
||||
off -= fwd ? n : 0;
|
||||
r = newtmp("blt", Kl, fn);
|
||||
r1 = newtmp("blt", Kl, fn);
|
||||
ro = getcon(off, fn);
|
||||
emit(p->st, 0, R, r, r1);
|
||||
emit(Oadd, Kl, r1, sd[1], ro);
|
||||
r1 = newtmp("blt", Kl, fn);
|
||||
emit(p->ld, p->cls, r, r1, R);
|
||||
emit(Oadd, Kl, r1, sd[0], ro);
|
||||
off += fwd ? 0 : n;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
ulog2_tab64[64] = {
|
||||
63, 0, 1, 41, 37, 2, 16, 42,
|
||||
38, 29, 32, 3, 12, 17, 43, 55,
|
||||
39, 35, 30, 53, 33, 21, 4, 23,
|
||||
13, 9, 18, 6, 25, 44, 48, 56,
|
||||
62, 40, 36, 15, 28, 31, 11, 54,
|
||||
34, 52, 20, 22, 8, 5, 24, 47,
|
||||
61, 14, 27, 10, 51, 19, 7, 46,
|
||||
60, 26, 50, 45, 59, 49, 58, 57,
|
||||
};
|
||||
|
||||
static int
|
||||
ulog2(uint64_t pow2)
|
||||
{
|
||||
return ulog2_tab64[(pow2 * 0x5b31ab928877a7e) >> 58];
|
||||
}
|
||||
|
||||
static int
|
||||
ispow2(uint64_t v)
|
||||
{
|
||||
return v && (v & (v - 1)) == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
ins(Ins **pi, int *new, Blk *b, Fn *fn)
|
||||
{
|
||||
ulong ni;
|
||||
Con *c;
|
||||
Ins *i;
|
||||
Ref r;
|
||||
int n;
|
||||
|
||||
i = *pi;
|
||||
/* simplify more instructions here;
|
||||
* copy 0 into xor, bit rotations,
|
||||
* etc. */
|
||||
switch (i->op) {
|
||||
case Oblit1:
|
||||
assert(i > b->ins);
|
||||
assert((i-1)->op == Oblit0);
|
||||
if (!*new) {
|
||||
curi = &insb[NIns];
|
||||
ni = &b->ins[b->nins] - (i+1);
|
||||
curi -= ni;
|
||||
icpy(curi, i+1, ni);
|
||||
*new = 1;
|
||||
}
|
||||
blit((i-1)->arg, rsval(i->arg[0]), fn);
|
||||
*pi = i-1;
|
||||
return;
|
||||
case Oudiv:
|
||||
case Ourem:
|
||||
r = i->arg[1];
|
||||
if (KBASE(i->cls) == 0)
|
||||
if (rtype(r) == RCon) {
|
||||
c = &fn->con[r.val];
|
||||
if (c->type == CBits)
|
||||
if (ispow2(c->bits.i)) {
|
||||
n = ulog2(c->bits.i);
|
||||
if (i->op == Ourem) {
|
||||
i->op = Oand;
|
||||
i->arg[1] = getcon((1ull<<n) - 1, fn);
|
||||
} else {
|
||||
i->op = Oshr;
|
||||
i->arg[1] = getcon(n, fn);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (*new)
|
||||
emiti(*i);
|
||||
}
|
||||
|
||||
void
|
||||
simpl(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Ins *i;
|
||||
int new;
|
||||
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
new = 0;
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
||||
--i;
|
||||
ins(&i, &new, b, fn);
|
||||
}
|
||||
if (new)
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
}
|
||||
531
src/qbe/spill.c
Normal file
531
src/qbe/spill.c
Normal file
@@ -0,0 +1,531 @@
|
||||
#include "all.h"
|
||||
|
||||
static void
|
||||
aggreg(Blk *hd, Blk *b)
|
||||
{
|
||||
int k;
|
||||
|
||||
/* aggregate looping information at
|
||||
* loop headers */
|
||||
bsunion(hd->gen, b->gen);
|
||||
for (k=0; k<2; k++)
|
||||
if (b->nlive[k] > hd->nlive[k])
|
||||
hd->nlive[k] = b->nlive[k];
|
||||
}
|
||||
|
||||
static void
|
||||
tmpuse(Ref r, int use, int loop, Fn *fn)
|
||||
{
|
||||
Mem *m;
|
||||
Tmp *t;
|
||||
|
||||
if (rtype(r) == RMem) {
|
||||
m = &fn->mem[r.val];
|
||||
tmpuse(m->base, 1, loop, fn);
|
||||
tmpuse(m->index, 1, loop, fn);
|
||||
}
|
||||
else if (rtype(r) == RTmp && r.val >= Tmp0) {
|
||||
t = &fn->tmp[r.val];
|
||||
t->nuse += use;
|
||||
t->ndef += !use;
|
||||
t->cost += loop;
|
||||
}
|
||||
}
|
||||
|
||||
/* evaluate spill costs of temporaries,
|
||||
* this also fills usage information
|
||||
* requires rpo, preds
|
||||
*/
|
||||
void
|
||||
fillcost(Fn *fn)
|
||||
{
|
||||
int n;
|
||||
uint a;
|
||||
Blk *b;
|
||||
Ins *i;
|
||||
Tmp *t;
|
||||
Phi *p;
|
||||
|
||||
loopiter(fn, aggreg);
|
||||
if (debug['S']) {
|
||||
fprintf(stderr, "\n> Loop information:\n");
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
for (a=0; a<b->npred; ++a)
|
||||
if (b->id <= b->pred[a]->id)
|
||||
break;
|
||||
if (a != b->npred) {
|
||||
fprintf(stderr, "\t%-10s", b->name);
|
||||
fprintf(stderr, " (% 3d ", b->nlive[0]);
|
||||
fprintf(stderr, "% 3d) ", b->nlive[1]);
|
||||
dumpts(b->gen, fn->tmp, stderr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
|
||||
t->cost = t-fn->tmp < Tmp0 ? UINT_MAX : 0;
|
||||
t->nuse = 0;
|
||||
t->ndef = 0;
|
||||
}
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
t = &fn->tmp[p->to.val];
|
||||
tmpuse(p->to, 0, 0, fn);
|
||||
for (a=0; a<p->narg; a++) {
|
||||
n = p->blk[a]->loop;
|
||||
t->cost += n;
|
||||
tmpuse(p->arg[a], 1, n, fn);
|
||||
}
|
||||
}
|
||||
n = b->loop;
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
tmpuse(i->to, 0, n, fn);
|
||||
tmpuse(i->arg[0], 1, n, fn);
|
||||
tmpuse(i->arg[1], 1, n, fn);
|
||||
}
|
||||
tmpuse(b->jmp.arg, 1, n, fn);
|
||||
}
|
||||
if (debug['S']) {
|
||||
fprintf(stderr, "\n> Spill costs:\n");
|
||||
for (n=Tmp0; n<fn->ntmp; n++)
|
||||
fprintf(stderr, "\t%-10s %d\n",
|
||||
fn->tmp[n].name,
|
||||
fn->tmp[n].cost);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
|
||||
static Tmp *tmp; /* current temporaries (for tcmpX) */
|
||||
static int ntmp; /* current # of temps (for limit) */
|
||||
static int locs; /* stack size used by locals */
|
||||
static int slot4; /* next slot of 4 bytes */
|
||||
static int slot8; /* ditto, 8 bytes */
|
||||
static BSet mask[2][1]; /* class masks */
|
||||
|
||||
static int
|
||||
tcmp0(const void *pa, const void *pb)
|
||||
{
|
||||
uint ca, cb;
|
||||
|
||||
ca = tmp[*(int *)pa].cost;
|
||||
cb = tmp[*(int *)pb].cost;
|
||||
return (cb < ca) ? -1 : (cb > ca);
|
||||
}
|
||||
|
||||
static int
|
||||
tcmp1(const void *pa, const void *pb)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
|
||||
return c ? c : tcmp0(pa, pb);
|
||||
}
|
||||
|
||||
static Ref
|
||||
slot(int t)
|
||||
{
|
||||
int s;
|
||||
|
||||
assert(t >= Tmp0 && "cannot spill register");
|
||||
s = tmp[t].slot;
|
||||
if (s == -1) {
|
||||
/* specific to NAlign == 3 */
|
||||
/* nice logic to pack stack slots
|
||||
* on demand, there can be only
|
||||
* one hole and slot4 points to it
|
||||
*
|
||||
* invariant: slot4 <= slot8
|
||||
*/
|
||||
if (KWIDE(tmp[t].cls)) {
|
||||
s = slot8;
|
||||
if (slot4 == slot8)
|
||||
slot4 += 2;
|
||||
slot8 += 2;
|
||||
} else {
|
||||
s = slot4;
|
||||
if (slot4 == slot8) {
|
||||
slot8 += 2;
|
||||
slot4 += 1;
|
||||
} else
|
||||
slot4 = slot8;
|
||||
}
|
||||
s += locs;
|
||||
tmp[t].slot = s;
|
||||
}
|
||||
return SLOT(s);
|
||||
}
|
||||
|
||||
/* restricts b to hold at most k
|
||||
* temporaries, preferring those
|
||||
* present in f (if given), then
|
||||
* those with the largest spill
|
||||
* cost
|
||||
*/
|
||||
static void
|
||||
limit(BSet *b, int k, BSet *f)
|
||||
{
|
||||
static int *tarr, maxt;
|
||||
int i, t, nt;
|
||||
|
||||
nt = bscount(b);
|
||||
if (nt <= k)
|
||||
return;
|
||||
if (nt > maxt) {
|
||||
free(tarr);
|
||||
tarr = emalloc(nt * sizeof tarr[0]);
|
||||
maxt = nt;
|
||||
}
|
||||
for (i=0, t=0; bsiter(b, &t); t++) {
|
||||
bsclr(b, t);
|
||||
tarr[i++] = t;
|
||||
}
|
||||
if (nt > 1) {
|
||||
if (!f)
|
||||
qsort(tarr, nt, sizeof tarr[0], tcmp0);
|
||||
else {
|
||||
fst = f;
|
||||
qsort(tarr, nt, sizeof tarr[0], tcmp1);
|
||||
}
|
||||
}
|
||||
for (i=0; i<k && i<nt; i++)
|
||||
bsset(b, tarr[i]);
|
||||
for (; i<nt; i++)
|
||||
slot(tarr[i]);
|
||||
}
|
||||
|
||||
/* spills temporaries to fit the
|
||||
* target limits using the same
|
||||
* preferences as limit(); assumes
|
||||
* that k1 gprs and k2 fprs are
|
||||
* currently in use
|
||||
*/
|
||||
static void
|
||||
limit2(BSet *b1, int k1, int k2, BSet *f)
|
||||
{
|
||||
BSet b2[1];
|
||||
|
||||
bsinit(b2, ntmp); /* todo, free those */
|
||||
bscopy(b2, b1);
|
||||
bsinter(b1, mask[0]);
|
||||
bsinter(b2, mask[1]);
|
||||
limit(b1, T.ngpr - k1, f);
|
||||
limit(b2, T.nfpr - k2, f);
|
||||
bsunion(b1, b2);
|
||||
}
|
||||
|
||||
static void
|
||||
sethint(BSet *u, bits r)
|
||||
{
|
||||
int t;
|
||||
|
||||
for (t=Tmp0; bsiter(u, &t); t++)
|
||||
tmp[phicls(t, tmp)].hint.m |= r;
|
||||
}
|
||||
|
||||
/* reloads temporaries in u that are
|
||||
* not in v from their slots
|
||||
*/
|
||||
static void
|
||||
reloads(BSet *u, BSet *v)
|
||||
{
|
||||
int t;
|
||||
|
||||
for (t=Tmp0; bsiter(u, &t); t++)
|
||||
if (!bshas(v, t))
|
||||
emit(Oload, tmp[t].cls, TMP(t), slot(t), R);
|
||||
}
|
||||
|
||||
static void
|
||||
store(Ref r, int s)
|
||||
{
|
||||
if (s != -1)
|
||||
emit(Ostorew + tmp[r.val].cls, 0, R, r, SLOT(s));
|
||||
}
|
||||
|
||||
static int
|
||||
regcpy(Ins *i)
|
||||
{
|
||||
return i->op == Ocopy && isreg(i->arg[0]);
|
||||
}
|
||||
|
||||
static Ins *
|
||||
dopm(Blk *b, Ins *i, BSet *v)
|
||||
{
|
||||
int n, t;
|
||||
BSet u[1];
|
||||
Ins *i1;
|
||||
bits r;
|
||||
|
||||
bsinit(u, ntmp); /* todo, free those */
|
||||
/* consecutive copies from
|
||||
* registers need to be handled
|
||||
* as one large instruction
|
||||
*
|
||||
* fixme: there is an assumption
|
||||
* that calls are always followed
|
||||
* by copy instructions here, this
|
||||
* might not be true if previous
|
||||
* passes change
|
||||
*/
|
||||
i1 = ++i;
|
||||
do {
|
||||
i--;
|
||||
t = i->to.val;
|
||||
if (!req(i->to, R))
|
||||
if (bshas(v, t)) {
|
||||
bsclr(v, t);
|
||||
store(i->to, tmp[t].slot);
|
||||
}
|
||||
bsset(v, i->arg[0].val);
|
||||
} while (i != b->ins && regcpy(i-1));
|
||||
bscopy(u, v);
|
||||
if (i != b->ins && (i-1)->op == Ocall) {
|
||||
v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
|
||||
limit2(v, T.nrsave[0], T.nrsave[1], 0);
|
||||
for (n=0, r=0; T.rsave[n]>=0; n++)
|
||||
r |= BIT(T.rsave[n]);
|
||||
v->t[0] |= T.argregs((i-1)->arg[1], 0);
|
||||
} else {
|
||||
limit2(v, 0, 0, 0);
|
||||
r = v->t[0];
|
||||
}
|
||||
sethint(v, r);
|
||||
reloads(u, v);
|
||||
do
|
||||
emiti(*--i1);
|
||||
while (i1 != i);
|
||||
return i;
|
||||
}
|
||||
|
||||
static void
|
||||
merge(BSet *u, Blk *bu, BSet *v, Blk *bv)
|
||||
{
|
||||
int t;
|
||||
|
||||
if (bu->loop <= bv->loop)
|
||||
bsunion(u, v);
|
||||
else
|
||||
for (t=0; bsiter(v, &t); t++)
|
||||
if (tmp[t].slot == -1)
|
||||
bsset(u, t);
|
||||
}
|
||||
|
||||
/* spill code insertion
|
||||
* requires spill costs, rpo, liveness
|
||||
*
|
||||
* Note: this will replace liveness
|
||||
* information (in, out) with temporaries
|
||||
* that must be in registers at block
|
||||
* borders
|
||||
*
|
||||
* Be careful with:
|
||||
* - Ocopy instructions to ensure register
|
||||
* constraints
|
||||
*/
|
||||
void
|
||||
spill(Fn *fn)
|
||||
{
|
||||
Blk *b, *s1, *s2, *hd, **bp;
|
||||
int j, l, t, k, lvarg[2];
|
||||
uint n;
|
||||
BSet u[1], v[1], w[1];
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
Mem *m;
|
||||
bits r;
|
||||
|
||||
tmp = fn->tmp;
|
||||
ntmp = fn->ntmp;
|
||||
bsinit(u, ntmp);
|
||||
bsinit(v, ntmp);
|
||||
bsinit(w, ntmp);
|
||||
bsinit(mask[0], ntmp);
|
||||
bsinit(mask[1], ntmp);
|
||||
locs = fn->slot;
|
||||
slot4 = 0;
|
||||
slot8 = 0;
|
||||
for (t=0; t<ntmp; t++) {
|
||||
k = 0;
|
||||
if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
|
||||
k = 1;
|
||||
if (t >= Tmp0)
|
||||
k = KBASE(tmp[t].cls);
|
||||
bsset(mask[k], t);
|
||||
}
|
||||
|
||||
for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
|
||||
b = *--bp;
|
||||
/* invariant: all blocks with bigger rpo got
|
||||
* their in,out updated. */
|
||||
|
||||
/* 1. find temporaries in registers at
|
||||
* the end of the block (put them in v) */
|
||||
curi = 0;
|
||||
s1 = b->s1;
|
||||
s2 = b->s2;
|
||||
hd = 0;
|
||||
if (s1 && s1->id <= b->id)
|
||||
hd = s1;
|
||||
if (s2 && s2->id <= b->id)
|
||||
if (!hd || s2->id >= hd->id)
|
||||
hd = s2;
|
||||
if (hd) {
|
||||
/* back-edge */
|
||||
bszero(v);
|
||||
hd->gen->t[0] |= T.rglob; /* don't spill registers */
|
||||
for (k=0; k<2; k++) {
|
||||
n = k == 0 ? T.ngpr : T.nfpr;
|
||||
bscopy(u, b->out);
|
||||
bsinter(u, mask[k]);
|
||||
bscopy(w, u);
|
||||
bsinter(u, hd->gen);
|
||||
bsdiff(w, hd->gen);
|
||||
if (bscount(u) < n) {
|
||||
j = bscount(w); /* live through */
|
||||
l = hd->nlive[k];
|
||||
limit(w, n - (l - j), 0);
|
||||
bsunion(u, w);
|
||||
} else
|
||||
limit(u, n, 0);
|
||||
bsunion(v, u);
|
||||
}
|
||||
} else if (s1) {
|
||||
/* avoid reloading temporaries
|
||||
* in the middle of loops */
|
||||
bszero(v);
|
||||
liveon(w, b, s1);
|
||||
merge(v, b, w, s1);
|
||||
if (s2) {
|
||||
liveon(u, b, s2);
|
||||
merge(v, b, u, s2);
|
||||
bsinter(w, u);
|
||||
}
|
||||
limit2(v, 0, 0, w);
|
||||
} else {
|
||||
bscopy(v, b->out);
|
||||
if (rtype(b->jmp.arg) == RCall)
|
||||
v->t[0] |= T.retregs(b->jmp.arg, 0);
|
||||
}
|
||||
if (rtype(b->jmp.arg) == RTmp) {
|
||||
t = b->jmp.arg.val;
|
||||
assert(KBASE(tmp[t].cls) == 0);
|
||||
bsset(v, t);
|
||||
limit2(v, 0, 0, NULL);
|
||||
if (!bshas(v, t))
|
||||
b->jmp.arg = slot(t);
|
||||
}
|
||||
for (t=Tmp0; bsiter(b->out, &t); t++)
|
||||
if (!bshas(v, t))
|
||||
slot(t);
|
||||
bscopy(b->out, v);
|
||||
|
||||
/* 2. process the block instructions */
|
||||
curi = &insb[NIns];
|
||||
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
||||
i--;
|
||||
if (regcpy(i)) {
|
||||
i = dopm(b, i, v);
|
||||
continue;
|
||||
}
|
||||
bszero(w);
|
||||
if (!req(i->to, R)) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
t = i->to.val;
|
||||
if (bshas(v, t))
|
||||
bsclr(v, t);
|
||||
else {
|
||||
/* make sure we have a reg
|
||||
* for the result */
|
||||
assert(t >= Tmp0 && "dead reg");
|
||||
bsset(v, t);
|
||||
bsset(w, t);
|
||||
}
|
||||
}
|
||||
j = T.memargs(i->op);
|
||||
for (n=0; n<2; n++)
|
||||
if (rtype(i->arg[n]) == RMem)
|
||||
j--;
|
||||
for (n=0; n<2; n++)
|
||||
switch (rtype(i->arg[n])) {
|
||||
case RMem:
|
||||
t = i->arg[n].val;
|
||||
m = &fn->mem[t];
|
||||
if (rtype(m->base) == RTmp) {
|
||||
bsset(v, m->base.val);
|
||||
bsset(w, m->base.val);
|
||||
}
|
||||
if (rtype(m->index) == RTmp) {
|
||||
bsset(v, m->index.val);
|
||||
bsset(w, m->index.val);
|
||||
}
|
||||
break;
|
||||
case RTmp:
|
||||
t = i->arg[n].val;
|
||||
lvarg[n] = bshas(v, t);
|
||||
bsset(v, t);
|
||||
if (j-- <= 0)
|
||||
bsset(w, t);
|
||||
break;
|
||||
}
|
||||
bscopy(u, v);
|
||||
limit2(v, 0, 0, w);
|
||||
for (n=0; n<2; n++)
|
||||
if (rtype(i->arg[n]) == RTmp) {
|
||||
t = i->arg[n].val;
|
||||
if (!bshas(v, t)) {
|
||||
/* do not reload if the
|
||||
* argument is dead
|
||||
*/
|
||||
if (!lvarg[n])
|
||||
bsclr(u, t);
|
||||
i->arg[n] = slot(t);
|
||||
}
|
||||
}
|
||||
reloads(u, v);
|
||||
if (!req(i->to, R)) {
|
||||
t = i->to.val;
|
||||
store(i->to, tmp[t].slot);
|
||||
if (t >= Tmp0)
|
||||
/* in case i->to was a
|
||||
* dead temporary */
|
||||
bsclr(v, t);
|
||||
}
|
||||
emiti(*i);
|
||||
r = v->t[0]; /* Tmp0 is NBit */
|
||||
if (r)
|
||||
sethint(v, r);
|
||||
}
|
||||
if (b == fn->start)
|
||||
assert(v->t[0] == (T.rglob | fn->reg));
|
||||
else
|
||||
assert(v->t[0] == T.rglob);
|
||||
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
assert(rtype(p->to) == RTmp);
|
||||
t = p->to.val;
|
||||
if (bshas(v, t)) {
|
||||
bsclr(v, t);
|
||||
store(p->to, tmp[t].slot);
|
||||
} else if (bshas(b->in, t))
|
||||
/* only if the phi is live */
|
||||
p->to = slot(p->to.val);
|
||||
}
|
||||
bscopy(b->in, v);
|
||||
idup(b, curi, &insb[NIns]-curi);
|
||||
}
|
||||
|
||||
/* align the locals to a 16 byte boundary */
|
||||
/* specific to NAlign == 3 */
|
||||
slot8 += slot8 & 3;
|
||||
fn->slot += slot8;
|
||||
|
||||
if (debug['S']) {
|
||||
fprintf(stderr, "\n> Block information:\n");
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
fprintf(stderr, "\t%-10s (% 5d) ", b->name, b->loop);
|
||||
dumpts(b->out, fn->tmp, stderr);
|
||||
}
|
||||
fprintf(stderr, "\n> After spilling:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
433
src/qbe/ssa.c
Normal file
433
src/qbe/ssa.c
Normal file
@@ -0,0 +1,433 @@
|
||||
#include "all.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
void
|
||||
adduse(Tmp *tmp, int ty, Blk *b, ...)
|
||||
{
|
||||
Use *u;
|
||||
int n;
|
||||
va_list ap;
|
||||
|
||||
if (!tmp->use)
|
||||
return;
|
||||
va_start(ap, b);
|
||||
n = tmp->nuse;
|
||||
vgrow(&tmp->use, ++tmp->nuse);
|
||||
u = &tmp->use[n];
|
||||
u->type = ty;
|
||||
u->bid = b->id;
|
||||
switch (ty) {
|
||||
case UPhi:
|
||||
u->u.phi = va_arg(ap, Phi *);
|
||||
break;
|
||||
case UIns:
|
||||
u->u.ins = va_arg(ap, Ins *);
|
||||
break;
|
||||
case UJmp:
|
||||
break;
|
||||
default:
|
||||
die("unreachable");
|
||||
}
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/* fill usage, width, phi, and class information
|
||||
* must not change .visit fields
|
||||
*/
|
||||
void
|
||||
filluse(Fn *fn)
|
||||
{
|
||||
Blk *b;
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
int m, t, tp, w, x;
|
||||
uint a;
|
||||
Tmp *tmp;
|
||||
|
||||
tmp = fn->tmp;
|
||||
for (t=Tmp0; t<fn->ntmp; t++) {
|
||||
tmp[t].def = 0;
|
||||
tmp[t].bid = -1u;
|
||||
tmp[t].ndef = 0;
|
||||
tmp[t].nuse = 0;
|
||||
tmp[t].cls = 0;
|
||||
tmp[t].phi = 0;
|
||||
tmp[t].width = WFull;
|
||||
if (tmp[t].use == 0)
|
||||
tmp[t].use = vnew(0, sizeof(Use), PFn);
|
||||
}
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
assert(rtype(p->to) == RTmp);
|
||||
tp = p->to.val;
|
||||
tmp[tp].bid = b->id;
|
||||
tmp[tp].ndef++;
|
||||
tmp[tp].cls = p->cls;
|
||||
tp = phicls(tp, fn->tmp);
|
||||
for (a=0; a<p->narg; a++)
|
||||
if (rtype(p->arg[a]) == RTmp) {
|
||||
t = p->arg[a].val;
|
||||
adduse(&tmp[t], UPhi, b, p);
|
||||
t = phicls(t, fn->tmp);
|
||||
if (t != tp)
|
||||
tmp[t].phi = tp;
|
||||
}
|
||||
}
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (!req(i->to, R)) {
|
||||
assert(rtype(i->to) == RTmp);
|
||||
w = WFull;
|
||||
if (isparbh(i->op))
|
||||
w = Wsb + (i->op - Oparsb);
|
||||
if (isload(i->op) && i->op != Oload)
|
||||
w = Wsb + (i->op - Oloadsb);
|
||||
if (isext(i->op))
|
||||
w = Wsb + (i->op - Oextsb);
|
||||
if (iscmp(i->op, &x, &x))
|
||||
w = Wub;
|
||||
if (w == Wsw || w == Wuw)
|
||||
if (i->cls == Kw)
|
||||
w = WFull;
|
||||
t = i->to.val;
|
||||
tmp[t].width = w;
|
||||
tmp[t].def = i;
|
||||
tmp[t].bid = b->id;
|
||||
tmp[t].ndef++;
|
||||
tmp[t].cls = i->cls;
|
||||
}
|
||||
for (m=0; m<2; m++)
|
||||
if (rtype(i->arg[m]) == RTmp) {
|
||||
t = i->arg[m].val;
|
||||
adduse(&tmp[t], UIns, b, i);
|
||||
}
|
||||
}
|
||||
if (rtype(b->jmp.arg) == RTmp)
|
||||
adduse(&tmp[b->jmp.arg.val], UJmp, b);
|
||||
}
|
||||
}
|
||||
|
||||
static Ref
|
||||
refindex(int t, Fn *fn)
|
||||
{
|
||||
return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn);
|
||||
}
|
||||
|
||||
static void
|
||||
phiins(Fn *fn)
|
||||
{
|
||||
BSet u[1], defs[1];
|
||||
Blk *a, *b, **blist, **be, **bp;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
Use *use;
|
||||
Ref r;
|
||||
int t, nt, ok;
|
||||
uint n, defb;
|
||||
short k;
|
||||
|
||||
bsinit(u, fn->nblk);
|
||||
bsinit(defs, fn->nblk);
|
||||
blist = emalloc(fn->nblk * sizeof blist[0]);
|
||||
be = &blist[fn->nblk];
|
||||
nt = fn->ntmp;
|
||||
for (t=Tmp0; t<nt; t++) {
|
||||
fn->tmp[t].visit = 0;
|
||||
if (fn->tmp[t].phi != 0)
|
||||
continue;
|
||||
if (fn->tmp[t].ndef == 1) {
|
||||
ok = 1;
|
||||
defb = fn->tmp[t].bid;
|
||||
use = fn->tmp[t].use;
|
||||
for (n=fn->tmp[t].nuse; n--; use++)
|
||||
ok &= use->bid == defb;
|
||||
if (ok || defb == fn->start->id)
|
||||
continue;
|
||||
}
|
||||
bszero(u);
|
||||
k = Kx;
|
||||
bp = be;
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
b->visit = 0;
|
||||
r = R;
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (!req(r, R)) {
|
||||
if (req(i->arg[0], TMP(t)))
|
||||
i->arg[0] = r;
|
||||
if (req(i->arg[1], TMP(t)))
|
||||
i->arg[1] = r;
|
||||
}
|
||||
if (req(i->to, TMP(t))) {
|
||||
if (!bshas(b->out, t)) {
|
||||
r = refindex(t, fn);
|
||||
i->to = r;
|
||||
} else {
|
||||
if (!bshas(u, b->id)) {
|
||||
bsset(u, b->id);
|
||||
*--bp = b;
|
||||
}
|
||||
if (clsmerge(&k, i->cls))
|
||||
die("invalid input");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!req(r, R) && req(b->jmp.arg, TMP(t)))
|
||||
b->jmp.arg = r;
|
||||
}
|
||||
bscopy(defs, u);
|
||||
while (bp != be) {
|
||||
fn->tmp[t].visit = t;
|
||||
b = *bp++;
|
||||
bsclr(u, b->id);
|
||||
for (n=0; n<b->nfron; n++) {
|
||||
a = b->fron[n];
|
||||
if (a->visit++ == 0)
|
||||
if (bshas(a->in, t)) {
|
||||
p = alloc(sizeof *p);
|
||||
p->cls = k;
|
||||
p->to = TMP(t);
|
||||
p->link = a->phi;
|
||||
p->arg = vnew(0, sizeof p->arg[0], PFn);
|
||||
p->blk = vnew(0, sizeof p->blk[0], PFn);
|
||||
a->phi = p;
|
||||
if (!bshas(defs, a->id))
|
||||
if (!bshas(u, a->id)) {
|
||||
bsset(u, a->id);
|
||||
*--bp = a;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(blist);
|
||||
}
|
||||
|
||||
typedef struct Name Name;
|
||||
struct Name {
|
||||
Ref r;
|
||||
Blk *b;
|
||||
Name *up;
|
||||
};
|
||||
|
||||
static Name *namel;
|
||||
|
||||
static Name *
|
||||
nnew(Ref r, Blk *b, Name *up)
|
||||
{
|
||||
Name *n;
|
||||
|
||||
if (namel) {
|
||||
n = namel;
|
||||
namel = n->up;
|
||||
} else
|
||||
/* could use alloc, here
|
||||
* but namel should be reset
|
||||
*/
|
||||
n = emalloc(sizeof *n);
|
||||
n->r = r;
|
||||
n->b = b;
|
||||
n->up = up;
|
||||
return n;
|
||||
}
|
||||
|
||||
static void
|
||||
nfree(Name *n)
|
||||
{
|
||||
n->up = namel;
|
||||
namel = n;
|
||||
}
|
||||
|
||||
static void
|
||||
rendef(Ref *r, Blk *b, Name **stk, Fn *fn)
|
||||
{
|
||||
Ref r1;
|
||||
int t;
|
||||
|
||||
t = r->val;
|
||||
if (req(*r, R) || !fn->tmp[t].visit)
|
||||
return;
|
||||
r1 = refindex(t, fn);
|
||||
fn->tmp[r1.val].visit = t;
|
||||
stk[t] = nnew(r1, b, stk[t]);
|
||||
*r = r1;
|
||||
}
|
||||
|
||||
static Ref
|
||||
getstk(int t, Blk *b, Name **stk)
|
||||
{
|
||||
Name *n, *n1;
|
||||
|
||||
n = stk[t];
|
||||
while (n && !dom(n->b, b)) {
|
||||
n1 = n;
|
||||
n = n->up;
|
||||
nfree(n1);
|
||||
}
|
||||
stk[t] = n;
|
||||
if (!n) {
|
||||
/* uh, oh, warn */
|
||||
return UNDEF;
|
||||
} else
|
||||
return n->r;
|
||||
}
|
||||
|
||||
static void
|
||||
renblk(Blk *b, Name **stk, Fn *fn)
|
||||
{
|
||||
Phi *p;
|
||||
Ins *i;
|
||||
Blk *s, **ps, *succ[3];
|
||||
int t, m;
|
||||
|
||||
for (p=b->phi; p; p=p->link)
|
||||
rendef(&p->to, b, stk, fn);
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
for (m=0; m<2; m++) {
|
||||
t = i->arg[m].val;
|
||||
if (rtype(i->arg[m]) == RTmp)
|
||||
if (fn->tmp[t].visit)
|
||||
i->arg[m] = getstk(t, b, stk);
|
||||
}
|
||||
rendef(&i->to, b, stk, fn);
|
||||
}
|
||||
t = b->jmp.arg.val;
|
||||
if (rtype(b->jmp.arg) == RTmp)
|
||||
if (fn->tmp[t].visit)
|
||||
b->jmp.arg = getstk(t, b, stk);
|
||||
succ[0] = b->s1;
|
||||
succ[1] = b->s2 == b->s1 ? 0 : b->s2;
|
||||
succ[2] = 0;
|
||||
for (ps=succ; (s=*ps); ps++)
|
||||
for (p=s->phi; p; p=p->link) {
|
||||
t = p->to.val;
|
||||
if ((t=fn->tmp[t].visit)) {
|
||||
m = p->narg++;
|
||||
vgrow(&p->arg, p->narg);
|
||||
vgrow(&p->blk, p->narg);
|
||||
p->arg[m] = getstk(t, b, stk);
|
||||
p->blk[m] = b;
|
||||
}
|
||||
}
|
||||
for (s=b->dom; s; s=s->dlink)
|
||||
renblk(s, stk, fn);
|
||||
}
|
||||
|
||||
/* require rpo and use */
|
||||
void
|
||||
ssa(Fn *fn)
|
||||
{
|
||||
Name **stk, *n;
|
||||
int d, nt;
|
||||
Blk *b, *b1;
|
||||
|
||||
nt = fn->ntmp;
|
||||
stk = emalloc(nt * sizeof stk[0]);
|
||||
d = debug['L'];
|
||||
debug['L'] = 0;
|
||||
filldom(fn);
|
||||
if (debug['N']) {
|
||||
fprintf(stderr, "\n> Dominators:\n");
|
||||
for (b1=fn->start; b1; b1=b1->link) {
|
||||
if (!b1->dom)
|
||||
continue;
|
||||
fprintf(stderr, "%10s:", b1->name);
|
||||
for (b=b1->dom; b; b=b->dlink)
|
||||
fprintf(stderr, " %s", b->name);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
fillfron(fn);
|
||||
filllive(fn);
|
||||
phiins(fn);
|
||||
renblk(fn->start, stk, fn);
|
||||
while (nt--)
|
||||
while ((n=stk[nt])) {
|
||||
stk[nt] = n->up;
|
||||
nfree(n);
|
||||
}
|
||||
debug['L'] = d;
|
||||
free(stk);
|
||||
if (debug['N']) {
|
||||
fprintf(stderr, "\n> After SSA construction:\n");
|
||||
printfn(fn, stderr);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
phicheck(Phi *p, Blk *b, Ref t)
|
||||
{
|
||||
Blk *b1;
|
||||
uint n;
|
||||
|
||||
for (n=0; n<p->narg; n++)
|
||||
if (req(p->arg[n], t)) {
|
||||
b1 = p->blk[n];
|
||||
if (b1 != b && !sdom(b, b1))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* require use and ssa */
|
||||
void
|
||||
ssacheck(Fn *fn)
|
||||
{
|
||||
Tmp *t;
|
||||
Ins *i;
|
||||
Phi *p;
|
||||
Use *u;
|
||||
Blk *b, *bu;
|
||||
Ref r;
|
||||
|
||||
for (t=&fn->tmp[Tmp0]; t-fn->tmp < fn->ntmp; t++) {
|
||||
if (t->ndef > 1)
|
||||
err("ssa temporary %%%s defined more than once",
|
||||
t->name);
|
||||
if (t->nuse > 0 && t->ndef == 0) {
|
||||
bu = fn->rpo[t->use[0].bid];
|
||||
goto Err;
|
||||
}
|
||||
}
|
||||
for (b=fn->start; b; b=b->link) {
|
||||
for (p=b->phi; p; p=p->link) {
|
||||
r = p->to;
|
||||
t = &fn->tmp[r.val];
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
bu = fn->rpo[u->bid];
|
||||
if (u->type == UPhi) {
|
||||
if (phicheck(u->u.phi, b, r))
|
||||
goto Err;
|
||||
} else
|
||||
if (bu != b && !sdom(b, bu))
|
||||
goto Err;
|
||||
}
|
||||
}
|
||||
for (i=b->ins; i<&b->ins[b->nins]; i++) {
|
||||
if (rtype(i->to) != RTmp)
|
||||
continue;
|
||||
r = i->to;
|
||||
t = &fn->tmp[r.val];
|
||||
for (u=t->use; u<&t->use[t->nuse]; u++) {
|
||||
bu = fn->rpo[u->bid];
|
||||
if (u->type == UPhi) {
|
||||
if (phicheck(u->u.phi, b, r))
|
||||
goto Err;
|
||||
} else {
|
||||
if (bu == b) {
|
||||
if (u->type == UIns)
|
||||
if (u->u.ins <= i)
|
||||
goto Err;
|
||||
} else
|
||||
if (!sdom(b, bu))
|
||||
goto Err;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
Err:
|
||||
if (t->visit)
|
||||
die("%%%s violates ssa invariant", t->name);
|
||||
else
|
||||
err("ssa temporary %%%s is used undefined in @%s",
|
||||
t->name, bu->name);
|
||||
}
|
||||
25
src/qbe/test/_alt.ssa
Normal file
25
src/qbe/test/_alt.ssa
Normal file
@@ -0,0 +1,25 @@
|
||||
# an example with reducible control
|
||||
# flow graph that exposes poor
|
||||
# handling of looping constructs
|
||||
|
||||
function $test() {
|
||||
@start
|
||||
%ten =w copy 10
|
||||
%dum =w copy 0 # dummy live-through temporary
|
||||
@loop
|
||||
%alt =w phi @start 0, @left %alt1, @right %alt1
|
||||
%cnt =w phi @start 100, @left %cnt, @right %cnt1
|
||||
%alt1 =w sub 1, %alt
|
||||
jnz %alt1, @right, @left
|
||||
@left
|
||||
%x =w phi @loop 10, @left %x1
|
||||
%x1 =w sub %x, 1
|
||||
%z =w copy %x
|
||||
jnz %z, @left, @loop
|
||||
@right
|
||||
%cnt1 =w sub %cnt, %ten
|
||||
jnz %cnt1, @loop, @end
|
||||
@end
|
||||
%ret =w add %cnt, %dum
|
||||
ret
|
||||
}
|
||||
2687
src/qbe/test/_bf99.ssa
Normal file
2687
src/qbe/test/_bf99.ssa
Normal file
File diff suppressed because it is too large
Load Diff
9079
src/qbe/test/_bfmandel.ssa
Normal file
9079
src/qbe/test/_bfmandel.ssa
Normal file
File diff suppressed because it is too large
Load Diff
233
src/qbe/test/_chacha20.ssa
Normal file
233
src/qbe/test/_chacha20.ssa
Normal file
@@ -0,0 +1,233 @@
|
||||
export function $chacha20_rounds_qbe(l %out, l %in) {
|
||||
@start
|
||||
%t0 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t1 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t2 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t3 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t4 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t5 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t6 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t7 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t8 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t9 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t10 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t11 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t12 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t13 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t14 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%t15 =w loadw %in
|
||||
%in =l add %in, 4
|
||||
%counter =w copy 10
|
||||
@loop
|
||||
%t0 =w add %t0, %t4
|
||||
%t12 =w xor %t12, %t0
|
||||
%rotl32_a =w shl %t12, 16
|
||||
%rotl32_b =w shr %t12, 16
|
||||
%t12 =w xor %rotl32_a, %rotl32_b
|
||||
%t8 =w add %t8, %t12
|
||||
%t4 =w xor %t4, %t8
|
||||
%rotl32_a =w shl %t4, 12
|
||||
%rotl32_b =w shr %t4, 20
|
||||
%t4 =w xor %rotl32_a, %rotl32_b
|
||||
%t0 =w add %t0, %t4
|
||||
%t12 =w xor %t12, %t0
|
||||
%rotl32_a =w shl %t12, 8
|
||||
%rotl32_b =w shr %t12, 24
|
||||
%t12 =w xor %rotl32_a, %rotl32_b
|
||||
%t8 =w add %t8, %t12
|
||||
%t4 =w xor %t4, %t8
|
||||
%rotl32_a =w shl %t4, 7
|
||||
%rotl32_b =w shr %t4, 25
|
||||
%t4 =w xor %rotl32_a, %rotl32_b
|
||||
%t1 =w add %t1, %t5
|
||||
%t13 =w xor %t13, %t1
|
||||
%rotl32_a =w shl %t13, 16
|
||||
%rotl32_b =w shr %t13, 16
|
||||
%t13 =w xor %rotl32_a, %rotl32_b
|
||||
%t9 =w add %t9, %t13
|
||||
%t5 =w xor %t5, %t9
|
||||
%rotl32_a =w shl %t5, 12
|
||||
%rotl32_b =w shr %t5, 20
|
||||
%t5 =w xor %rotl32_a, %rotl32_b
|
||||
%t1 =w add %t1, %t5
|
||||
%t13 =w xor %t13, %t1
|
||||
%rotl32_a =w shl %t13, 8
|
||||
%rotl32_b =w shr %t13, 24
|
||||
%t13 =w xor %rotl32_a, %rotl32_b
|
||||
%t9 =w add %t9, %t13
|
||||
%t5 =w xor %t5, %t9
|
||||
%rotl32_a =w shl %t5, 7
|
||||
%rotl32_b =w shr %t5, 25
|
||||
%t5 =w xor %rotl32_a, %rotl32_b
|
||||
%t2 =w add %t2, %t6
|
||||
%t14 =w xor %t14, %t2
|
||||
%rotl32_a =w shl %t14, 16
|
||||
%rotl32_b =w shr %t14, 16
|
||||
%t14 =w xor %rotl32_a, %rotl32_b
|
||||
%t10 =w add %t10, %t14
|
||||
%t6 =w xor %t6, %t10
|
||||
%rotl32_a =w shl %t6, 12
|
||||
%rotl32_b =w shr %t6, 20
|
||||
%t6 =w xor %rotl32_a, %rotl32_b
|
||||
%t2 =w add %t2, %t6
|
||||
%t14 =w xor %t14, %t2
|
||||
%rotl32_a =w shl %t14, 8
|
||||
%rotl32_b =w shr %t14, 24
|
||||
%t14 =w xor %rotl32_a, %rotl32_b
|
||||
%t10 =w add %t10, %t14
|
||||
%t6 =w xor %t6, %t10
|
||||
%rotl32_a =w shl %t6, 7
|
||||
%rotl32_b =w shr %t6, 25
|
||||
%t6 =w xor %rotl32_a, %rotl32_b
|
||||
%t3 =w add %t3, %t7
|
||||
%t15 =w xor %t15, %t3
|
||||
%rotl32_a =w shl %t15, 16
|
||||
%rotl32_b =w shr %t15, 16
|
||||
%t15 =w xor %rotl32_a, %rotl32_b
|
||||
%t11 =w add %t11, %t15
|
||||
%t7 =w xor %t7, %t11
|
||||
%rotl32_a =w shl %t7, 12
|
||||
%rotl32_b =w shr %t7, 20
|
||||
%t7 =w xor %rotl32_a, %rotl32_b
|
||||
%t3 =w add %t3, %t7
|
||||
%t15 =w xor %t15, %t3
|
||||
%rotl32_a =w shl %t15, 8
|
||||
%rotl32_b =w shr %t15, 24
|
||||
%t15 =w xor %rotl32_a, %rotl32_b
|
||||
%t11 =w add %t11, %t15
|
||||
%t7 =w xor %t7, %t11
|
||||
%rotl32_a =w shl %t7, 7
|
||||
%rotl32_b =w shr %t7, 25
|
||||
%t7 =w xor %rotl32_a, %rotl32_b
|
||||
%t0 =w add %t0, %t5
|
||||
%t15 =w xor %t15, %t0
|
||||
%rotl32_a =w shl %t15, 16
|
||||
%rotl32_b =w shr %t15, 16
|
||||
%t15 =w xor %rotl32_a, %rotl32_b
|
||||
%t10 =w add %t10, %t15
|
||||
%t5 =w xor %t5, %t10
|
||||
%rotl32_a =w shl %t5, 12
|
||||
%rotl32_b =w shr %t5, 20
|
||||
%t5 =w xor %rotl32_a, %rotl32_b
|
||||
%t0 =w add %t0, %t5
|
||||
%t15 =w xor %t15, %t0
|
||||
%rotl32_a =w shl %t15, 8
|
||||
%rotl32_b =w shr %t15, 24
|
||||
%t15 =w xor %rotl32_a, %rotl32_b
|
||||
%t10 =w add %t10, %t15
|
||||
%t5 =w xor %t5, %t10
|
||||
%rotl32_a =w shl %t5, 7
|
||||
%rotl32_b =w shr %t5, 25
|
||||
%t5 =w xor %rotl32_a, %rotl32_b
|
||||
%t1 =w add %t1, %t6
|
||||
%t12 =w xor %t12, %t1
|
||||
%rotl32_a =w shl %t12, 16
|
||||
%rotl32_b =w shr %t12, 16
|
||||
%t12 =w xor %rotl32_a, %rotl32_b
|
||||
%t11 =w add %t11, %t12
|
||||
%t6 =w xor %t6, %t11
|
||||
%rotl32_a =w shl %t6, 12
|
||||
%rotl32_b =w shr %t6, 20
|
||||
%t6 =w xor %rotl32_a, %rotl32_b
|
||||
%t1 =w add %t1, %t6
|
||||
%t12 =w xor %t12, %t1
|
||||
%rotl32_a =w shl %t12, 8
|
||||
%rotl32_b =w shr %t12, 24
|
||||
%t12 =w xor %rotl32_a, %rotl32_b
|
||||
%t11 =w add %t11, %t12
|
||||
%t6 =w xor %t6, %t11
|
||||
%rotl32_a =w shl %t6, 7
|
||||
%rotl32_b =w shr %t6, 25
|
||||
%t6 =w xor %rotl32_a, %rotl32_b
|
||||
%t2 =w add %t2, %t7
|
||||
%t13 =w xor %t13, %t2
|
||||
%rotl32_a =w shl %t13, 16
|
||||
%rotl32_b =w shr %t13, 16
|
||||
%t13 =w xor %rotl32_a, %rotl32_b
|
||||
%t8 =w add %t8, %t13
|
||||
%t7 =w xor %t7, %t8
|
||||
%rotl32_a =w shl %t7, 12
|
||||
%rotl32_b =w shr %t7, 20
|
||||
%t7 =w xor %rotl32_a, %rotl32_b
|
||||
%t2 =w add %t2, %t7
|
||||
%t13 =w xor %t13, %t2
|
||||
%rotl32_a =w shl %t13, 8
|
||||
%rotl32_b =w shr %t13, 24
|
||||
%t13 =w xor %rotl32_a, %rotl32_b
|
||||
%t8 =w add %t8, %t13
|
||||
%t7 =w xor %t7, %t8
|
||||
%rotl32_a =w shl %t7, 7
|
||||
%rotl32_b =w shr %t7, 25
|
||||
%t7 =w xor %rotl32_a, %rotl32_b
|
||||
%t3 =w add %t3, %t4
|
||||
%t14 =w xor %t14, %t3
|
||||
%rotl32_a =w shl %t14, 16
|
||||
%rotl32_b =w shr %t14, 16
|
||||
%t14 =w xor %rotl32_a, %rotl32_b
|
||||
%t9 =w add %t9, %t14
|
||||
%t4 =w xor %t4, %t9
|
||||
%rotl32_a =w shl %t4, 12
|
||||
%rotl32_b =w shr %t4, 20
|
||||
%t4 =w xor %rotl32_a, %rotl32_b
|
||||
%t3 =w add %t3, %t4
|
||||
%t14 =w xor %t14, %t3
|
||||
%rotl32_a =w shl %t14, 8
|
||||
%rotl32_b =w shr %t14, 24
|
||||
%t14 =w xor %rotl32_a, %rotl32_b
|
||||
%t9 =w add %t9, %t14
|
||||
%t4 =w xor %t4, %t9
|
||||
%rotl32_a =w shl %t4, 7
|
||||
%rotl32_b =w shr %t4, 25
|
||||
%t4 =w xor %rotl32_a, %rotl32_b
|
||||
%counter =w sub %counter, 10
|
||||
jnz %counter, @loop, @done
|
||||
@done
|
||||
storew %t0, %out
|
||||
%out =l add %out, 4
|
||||
storew %t1, %out
|
||||
%out =l add %out, 4
|
||||
storew %t2, %out
|
||||
%out =l add %out, 4
|
||||
storew %t3, %out
|
||||
%out =l add %out, 4
|
||||
storew %t4, %out
|
||||
%out =l add %out, 4
|
||||
storew %t5, %out
|
||||
%out =l add %out, 4
|
||||
storew %t6, %out
|
||||
%out =l add %out, 4
|
||||
storew %t7, %out
|
||||
%out =l add %out, 4
|
||||
storew %t8, %out
|
||||
%out =l add %out, 4
|
||||
storew %t9, %out
|
||||
%out =l add %out, 4
|
||||
storew %t10, %out
|
||||
%out =l add %out, 4
|
||||
storew %t11, %out
|
||||
%out =l add %out, 4
|
||||
storew %t12, %out
|
||||
%out =l add %out, 4
|
||||
storew %t13, %out
|
||||
%out =l add %out, 4
|
||||
storew %t14, %out
|
||||
%out =l add %out, 4
|
||||
storew %t15, %out
|
||||
%out =l add %out, 4
|
||||
ret
|
||||
}
|
||||
33
src/qbe/test/_dragon.ssa
Normal file
33
src/qbe/test/_dragon.ssa
Normal file
@@ -0,0 +1,33 @@
|
||||
# a moderately complex test for
|
||||
# dominators computation from
|
||||
# the dragon book
|
||||
# because branching is limited to
|
||||
# two, I had to split some blocks
|
||||
|
||||
function $dragon() {
|
||||
@start
|
||||
@b1
|
||||
jnz 0, @b2, @b3
|
||||
@b2
|
||||
jmp @b3
|
||||
@b3
|
||||
jmp @b4.1
|
||||
@b4.1
|
||||
jnz 0, @b3, @b4.2
|
||||
@b4.2
|
||||
jnz 0, @b5, @b6
|
||||
@b5
|
||||
jmp @b7
|
||||
@b6
|
||||
jmp @b7
|
||||
@b7
|
||||
jnz 0, @b8.1, @b4.1
|
||||
@b8.1
|
||||
jnz 0, @b3, @b8.2
|
||||
@b8.2
|
||||
jnz 0, @b9, @b10
|
||||
@b9
|
||||
jmp @b1
|
||||
@b10
|
||||
jmp @b7
|
||||
}
|
||||
15
src/qbe/test/_fix1.ssa
Normal file
15
src/qbe/test/_fix1.ssa
Normal file
@@ -0,0 +1,15 @@
|
||||
function $test() {
|
||||
@start
|
||||
%x =w copy 1
|
||||
@loop
|
||||
jnz %x, @noz, @isz
|
||||
@noz
|
||||
%x =w copy 0
|
||||
jmp @end
|
||||
@isz
|
||||
%x =w copy 1
|
||||
jmp @loop
|
||||
@end
|
||||
%z =w add 10, %x
|
||||
ret
|
||||
}
|
||||
15
src/qbe/test/_fix2.ssa
Normal file
15
src/qbe/test/_fix2.ssa
Normal file
@@ -0,0 +1,15 @@
|
||||
function $test() {
|
||||
@start
|
||||
%x =w copy 1
|
||||
@loop
|
||||
jnz %x, @noz, @isz
|
||||
@noz
|
||||
%x =w copy 0
|
||||
jnz %x, @loop, @end
|
||||
@isz
|
||||
%x =w copy 1
|
||||
jmp @loop
|
||||
@end
|
||||
%z =w add 10, %x
|
||||
ret
|
||||
}
|
||||
20
src/qbe/test/_fix3.ssa
Normal file
20
src/qbe/test/_fix3.ssa
Normal file
@@ -0,0 +1,20 @@
|
||||
function w $test() {
|
||||
@start
|
||||
%x =w copy 100
|
||||
%s =w copy 0
|
||||
@l
|
||||
%c =w cslew %x, 10
|
||||
jnz %c, @a, @b
|
||||
@a
|
||||
%s =w add %s, %x
|
||||
%x =w sub %x, 1
|
||||
jmp @c
|
||||
@b
|
||||
%s =w sub %s, %x
|
||||
jmp @c
|
||||
@c
|
||||
%x =w sub %x, 1
|
||||
jnz %x, @l, @end
|
||||
@end
|
||||
ret %s
|
||||
}
|
||||
27
src/qbe/test/_fix4.ssa
Normal file
27
src/qbe/test/_fix4.ssa
Normal file
@@ -0,0 +1,27 @@
|
||||
function $test() {
|
||||
@start
|
||||
%x =w copy 3
|
||||
%n =w copy 2
|
||||
@loop
|
||||
%c =w ceqw %n, 10000
|
||||
jnz %c, @end, @next
|
||||
@next
|
||||
%t =w copy 3
|
||||
%x =w add %x, 2
|
||||
@tloop
|
||||
%s =w mul %t, %t
|
||||
%c =w csgtw %s, %x
|
||||
jnz %c, @prime, @test
|
||||
@test
|
||||
%r =w rem %x, %t
|
||||
jnz %r, @tnext, @loop
|
||||
@tnext
|
||||
%t =w add %t, 2
|
||||
jmp @tloop
|
||||
@prime
|
||||
%n =w add %n, 1
|
||||
jmp @loop
|
||||
@end
|
||||
storew %x, $a
|
||||
ret
|
||||
}
|
||||
48
src/qbe/test/_gcm1.ssa
Normal file
48
src/qbe/test/_gcm1.ssa
Normal file
@@ -0,0 +1,48 @@
|
||||
export
|
||||
function w $ifmv(w %p1, w %p2, w %p3) {
|
||||
@start
|
||||
@entry
|
||||
%rt =w add %p2, %p3 # gcm moves to @true
|
||||
%rf =w sub %p2, %p3 # gcm moves to @false
|
||||
jnz %p1, @true, @false
|
||||
@true
|
||||
%r =w copy %rt
|
||||
jmp @exit
|
||||
@false
|
||||
%r =w copy %rf
|
||||
jmp @exit
|
||||
@exit
|
||||
ret %r
|
||||
}
|
||||
|
||||
export
|
||||
function w $hoist1(w %p1, w %p2, w %p3) {
|
||||
@start
|
||||
@entry
|
||||
%n =w copy 0
|
||||
%i =w copy %p1
|
||||
@loop
|
||||
%base =w add %p2, %p3 # gcm moves to @exit
|
||||
%i =w sub %i, 1
|
||||
%n =w add %n, 1
|
||||
jnz %i, @loop, @exit
|
||||
@exit
|
||||
%r =w add %base, %n
|
||||
ret %r
|
||||
}
|
||||
|
||||
export
|
||||
function w $hoist2(w %p1, w %p2, w %p3) {
|
||||
@start
|
||||
@entry
|
||||
%n =w copy 0
|
||||
%i =w copy %p1
|
||||
@loop
|
||||
%base =w add %p2, %p3 # gcm moves to @entry
|
||||
%i =w sub %i, 1
|
||||
%n =w add %n, %base
|
||||
jnz %i, @loop, @exit
|
||||
@exit
|
||||
%r =w add %base, %n
|
||||
ret %r
|
||||
}
|
||||
43
src/qbe/test/_gcm2.ssa
Normal file
43
src/qbe/test/_gcm2.ssa
Normal file
@@ -0,0 +1,43 @@
|
||||
# Programs from "Global Code Motion Global Value Numbering" by Cliff Click
|
||||
# https://courses.cs.washington.edu/courses/cse501/06wi/reading/click-pldi95.pdf
|
||||
|
||||
# GCM program in Figure 1
|
||||
|
||||
function w $gcm_test(w %a){
|
||||
@start
|
||||
%i.0 =w copy 0
|
||||
@loop
|
||||
%i.1 =w phi @start %i.0, @loop %i.2
|
||||
%b =w add %a, 1 # early schedule moves to @start
|
||||
%i.2 =w add %i.1, %b
|
||||
%c =w mul %i.2, 2 # late schedule moves to @end
|
||||
%x =w csltw %i.2, 10
|
||||
jnz %x, @loop, @end
|
||||
@end
|
||||
ret %c
|
||||
}
|
||||
|
||||
# GCM program in "Figure 3 x's definition does not dominate it's use"
|
||||
#
|
||||
# SSA contruction will insert phi instruction for "x" in @if_false
|
||||
# preventing the "add" in @if_false from being moved to @if_true
|
||||
|
||||
function $gcm_test2 (w %a){
|
||||
@start
|
||||
%f =w copy 1
|
||||
%x =w copy 0
|
||||
%s.0 =w copy 0
|
||||
@loop
|
||||
%s.1 = w phi @start %s.0, @if_false %s.2
|
||||
jnz %a, @if, @end
|
||||
@if
|
||||
jnz %f, @if_true, @if_false
|
||||
@if_true
|
||||
%f =w copy 0
|
||||
%x =w add %x, 1
|
||||
@if_false
|
||||
%s.2 =w add %s.1, %x
|
||||
jmp @loop
|
||||
@end
|
||||
ret
|
||||
}
|
||||
21
src/qbe/test/_live.ssa
Normal file
21
src/qbe/test/_live.ssa
Normal file
@@ -0,0 +1,21 @@
|
||||
# this control flow graph is irreducible
|
||||
# yet, we expecet the liveness analysis
|
||||
# to work properly and make %x live in
|
||||
# the block @left
|
||||
#
|
||||
# nothing should ever be live at the entry
|
||||
|
||||
function $test() {
|
||||
@start
|
||||
%b =w copy 0
|
||||
%x =w copy 10
|
||||
jnz 0, @loop, @left
|
||||
@left
|
||||
jmp @inloop
|
||||
@loop
|
||||
%x1 =w add %x, 1
|
||||
@inloop
|
||||
%b1 =w add %b, 1
|
||||
@endloop
|
||||
jmp @loop
|
||||
}
|
||||
17
src/qbe/test/_load-elim.ssa
Normal file
17
src/qbe/test/_load-elim.ssa
Normal file
@@ -0,0 +1,17 @@
|
||||
# GCM can eliminate unused add/load instructions
|
||||
|
||||
export
|
||||
function w $f(l %p, w %c) {
|
||||
@start
|
||||
jnz %c, @true, @false
|
||||
@true
|
||||
%p1 =l add %p, 4
|
||||
%v1 =w loaduw %p1
|
||||
jmp @end
|
||||
@false
|
||||
%p2 =l add %p, 4
|
||||
%v2 =w loaduw %p2
|
||||
jmp @end
|
||||
@end
|
||||
ret 0
|
||||
}
|
||||
12
src/qbe/test/_rpo.ssa
Normal file
12
src/qbe/test/_rpo.ssa
Normal file
@@ -0,0 +1,12 @@
|
||||
function $test() {
|
||||
@start
|
||||
jmp @foo
|
||||
@baz
|
||||
jnz 1, @end, @foo
|
||||
@bar
|
||||
jmp @end
|
||||
@foo
|
||||
jnz 0, @bar, @baz
|
||||
@end
|
||||
ret
|
||||
}
|
||||
35762
src/qbe/test/_slow.qbe
Normal file
35762
src/qbe/test/_slow.qbe
Normal file
File diff suppressed because it is too large
Load Diff
22
src/qbe/test/_spill1.ssa
Normal file
22
src/qbe/test/_spill1.ssa
Normal file
@@ -0,0 +1,22 @@
|
||||
# test with NReg == 3
|
||||
# there must be a spill
|
||||
# happening on %c
|
||||
#
|
||||
# if you replace the sub
|
||||
# by an add or comment
|
||||
# the two marked lines
|
||||
# there should be no
|
||||
# spill
|
||||
#
|
||||
|
||||
function $test() {
|
||||
@start
|
||||
%f =w copy 0 # here
|
||||
%b =w copy 1
|
||||
%c =w copy 2
|
||||
%a =w sub %b, %c
|
||||
%d =w copy %b
|
||||
%e =w copy %f # and there
|
||||
%g =w copy %a
|
||||
ret
|
||||
}
|
||||
22
src/qbe/test/_spill2.ssa
Normal file
22
src/qbe/test/_spill2.ssa
Normal file
@@ -0,0 +1,22 @@
|
||||
# stupid spilling test
|
||||
|
||||
function $test() {
|
||||
@start
|
||||
%x1 =w copy 10
|
||||
%x2 =w add %x1, %x1
|
||||
%x3 =w sub %x2, %x1
|
||||
%x4 =w add %x3, %x1
|
||||
%x5 =w sub %x4, %x1
|
||||
%x6 =w add %x5, %x1
|
||||
%x7 =w sub %x6, %x1
|
||||
%x8 =w add %x7, %x1
|
||||
%x9 =w sub %x8, %x8
|
||||
%x10 =w add %x9, %x7
|
||||
%x11 =w sub %x10, %x6
|
||||
%x12 =w add %x11, %x5
|
||||
%x13 =w sub %x12, %x4
|
||||
%x14 =w add %x13, %x3
|
||||
%x15 =w sub %x14, %x2
|
||||
%x16 =w add %x15, %x1
|
||||
ret
|
||||
}
|
||||
24
src/qbe/test/_spill3.ssa
Normal file
24
src/qbe/test/_spill3.ssa
Normal file
@@ -0,0 +1,24 @@
|
||||
# make sure comparisons
|
||||
# never get their two
|
||||
# operands in memory
|
||||
# run with NReg == 3, or
|
||||
# adapt it!
|
||||
|
||||
function $test() {
|
||||
@start
|
||||
%a =w loadw $a
|
||||
%b =w loadw $a
|
||||
|
||||
@loop
|
||||
%c =w phi @start 0, @loop %f
|
||||
%d =w phi @start 0, @loop %g
|
||||
%e =w phi @start 0, @loop %h
|
||||
%f =w add %c, %d
|
||||
%g =w add %c, %e
|
||||
%h =w add %e, %d
|
||||
%x =w cslew %a, %b
|
||||
jnz %x, @loop, @end
|
||||
|
||||
@end
|
||||
ret
|
||||
}
|
||||
60
src/qbe/test/abi1.ssa
Normal file
60
src/qbe/test/abi1.ssa
Normal file
@@ -0,0 +1,60 @@
|
||||
# test calling into C with two
|
||||
# large struct arguments (passed
|
||||
# on the stack)
|
||||
|
||||
type :mem = { b 17 }
|
||||
|
||||
function $alpha(l %p, w %l, l %n) {
|
||||
@ini
|
||||
%pe =l add %p, %n
|
||||
@lop
|
||||
%p1 =l phi @ini %p, @lop %p2
|
||||
%l1 =w phi @ini %l, @lop %l2
|
||||
storeb %l1, %p1
|
||||
%p2 =l add %p1, 1
|
||||
%l2 =w add %l1, 1
|
||||
%c1 =w ceql %p1, %pe
|
||||
jnz %c1, @end, @lop
|
||||
@end
|
||||
storeb 0, %pe
|
||||
ret
|
||||
}
|
||||
|
||||
export
|
||||
function $test() {
|
||||
@start
|
||||
%p =l alloc4 17
|
||||
%q =l alloc4 17
|
||||
%r0 =w call $alpha(l %p, w 65, l 16)
|
||||
%r1 =w call $alpha(l %q, w 97, l 16)
|
||||
%r2 =w call $fcb(:mem %p, w 1, w 2, w 3, w 4, w 5, w 6, w 7, w 8, w 9, :mem %q)
|
||||
ret
|
||||
}
|
||||
|
||||
|
||||
# >>> driver
|
||||
# #include <stdio.h>
|
||||
# typedef struct { char t[17]; } mem;
|
||||
# extern void test();
|
||||
# void fcb(mem m, int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, mem n) {
|
||||
# printf("fcb: m = (mem){ t = \"%s\" }\n", m.t);
|
||||
# printf(" n = (mem){ t = \"%s\" }\n", n.t);
|
||||
# #define T(n) printf(" i%d = %d\n", n, i##n);
|
||||
# T(1) T(2) T(3) T(4) T(5) T(6) T(7) T(8) T(9)
|
||||
# }
|
||||
# int main() { test(); return 0; }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# fcb: m = (mem){ t = "ABCDEFGHIJKLMNOP" }
|
||||
# n = (mem){ t = "abcdefghijklmnop" }
|
||||
# i1 = 1
|
||||
# i2 = 2
|
||||
# i3 = 3
|
||||
# i4 = 4
|
||||
# i5 = 5
|
||||
# i6 = 6
|
||||
# i7 = 7
|
||||
# i8 = 8
|
||||
# i9 = 9
|
||||
# <<<
|
||||
19
src/qbe/test/abi2.ssa
Normal file
19
src/qbe/test/abi2.ssa
Normal file
@@ -0,0 +1,19 @@
|
||||
type :fps = { s, b, s }
|
||||
|
||||
export
|
||||
function s $sum(:fps %p) {
|
||||
@start
|
||||
%f1 =s load %p
|
||||
%p8 =l add 8, %p
|
||||
%f2 =s load %p8
|
||||
%s =s add %f1, %f2
|
||||
ret %s
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# typedef struct { float f1; char b; float f2; } fps;
|
||||
# extern float sum(fps);
|
||||
# int main() { fps x = { 1.23, -1, 2.34 }; return !(sum(x) == 1.23f+2.34f); }
|
||||
# /* Note the f suffixes above are important
|
||||
# * otherwise C does double operations. */
|
||||
# <<<
|
||||
45
src/qbe/test/abi3.ssa
Normal file
45
src/qbe/test/abi3.ssa
Normal file
@@ -0,0 +1,45 @@
|
||||
type :four = {l, b, w}
|
||||
|
||||
data $z = { w 0 }
|
||||
|
||||
export
|
||||
function $test() {
|
||||
@start
|
||||
%a =w loadw $z
|
||||
%y =w add %a, %a
|
||||
%yl =l extsw %y
|
||||
|
||||
%s =l alloc8 16 # allocate a :four struct
|
||||
%s1 =l add %s, 12 # get address of the w
|
||||
storel 4, %s # set the l
|
||||
storew 5, %s1 # set the w
|
||||
|
||||
# only the last argument should be on the stack
|
||||
%f =l add $F, %yl
|
||||
%x =w call %f(w %y, w 1, w 2, w 3, :four %s, w 6)
|
||||
|
||||
# store the result in the
|
||||
# global variable a
|
||||
|
||||
%x1 =w add %y, %x
|
||||
storew %x1, $a
|
||||
ret
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# #include <stdio.h>
|
||||
# struct four { long long l; char c; int i; };
|
||||
# extern void test(void);
|
||||
# int F(int a0, int a1, int a2, int a3, struct four s, int a6) {
|
||||
# printf("%d %d %d %d %d %d %d\n",
|
||||
# a0, a1, a2, a3, (int)s.l, s.i, a6);
|
||||
# return 42;
|
||||
# }
|
||||
# int a;
|
||||
# int main() { test(); printf("%d\n", a); return 0; }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# 0 1 2 3 4 5 6
|
||||
# 42
|
||||
# <<<
|
||||
39
src/qbe/test/abi4.ssa
Normal file
39
src/qbe/test/abi4.ssa
Normal file
@@ -0,0 +1,39 @@
|
||||
# return a large struct to C
|
||||
|
||||
type :mem = { b 17 }
|
||||
|
||||
function $alpha(l %p, w %l, l %n) {
|
||||
@ini
|
||||
%pe =l add %p, %n
|
||||
@lop
|
||||
%p1 =l phi @ini %p, @lop %p2
|
||||
%l1 =w phi @ini %l, @lop %l2
|
||||
storeb %l1, %p1
|
||||
%p2 =l add %p1, 1
|
||||
%l2 =w add %l1, 1
|
||||
%c1 =w ceql %p1, %pe
|
||||
jnz %c1, @end, @lop
|
||||
@end
|
||||
storeb 0, %pe
|
||||
ret
|
||||
}
|
||||
|
||||
export
|
||||
function :mem $test() {
|
||||
@ini
|
||||
%p =l alloc4 17
|
||||
%r0 =w call $alpha(l %p, w 65, l 16)
|
||||
ret %p
|
||||
}
|
||||
|
||||
|
||||
# >>> driver
|
||||
# #include <stdio.h>
|
||||
# typedef struct { char t[17]; } mem;
|
||||
# extern mem test(void);
|
||||
# int main() { mem m = test(); printf("%s\n", m.t); return 0; }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# ABCDEFGHIJKLMNOP
|
||||
# <<<
|
||||
144
src/qbe/test/abi5.ssa
Normal file
144
src/qbe/test/abi5.ssa
Normal file
@@ -0,0 +1,144 @@
|
||||
# returning structs from C
|
||||
|
||||
type :st1 = { b 17 }
|
||||
type :st2 = { w }
|
||||
type :st3 = { s, w }
|
||||
type :st4 = { w, d }
|
||||
type :st5 = { s, l }
|
||||
type :st6 = { b 16 }
|
||||
type :st7 = { s, d }
|
||||
type :st8 = { w 4 }
|
||||
type :un9 = { { b } { s } }
|
||||
type :st9 = { w, :un9 }
|
||||
type :sta = { b, s }
|
||||
type :stb = { b, b, s }
|
||||
|
||||
data $fmt1 = { b "t1: %s\n", b 0 }
|
||||
data $fmt2 = { b "t2: %d\n", b 0 }
|
||||
data $fmt3 = { b "t3: %f %d\n", b 0 }
|
||||
data $fmt4 = { b "t4: %d %f\n", b 0 }
|
||||
data $fmt5 = { b "t5: %f %lld\n", b 0 }
|
||||
data $fmt6 = { b "t6: %s\n", b 0 }
|
||||
data $fmt7 = { b "t7: %f %f\n", b 0 }
|
||||
data $fmt8 = { b "t8: %d %d %d %d\n", b 0 }
|
||||
data $fmt9 = { b "t9: %d %f\n", b 0 }
|
||||
data $fmta = { b "ta: %d %f\n", b 0 }
|
||||
data $fmtb = { b "tb: %d %d %f\n", b 0 }
|
||||
|
||||
export
|
||||
function $test() {
|
||||
@start
|
||||
%r1 =:st1 call $t1()
|
||||
%i1 =w call $printf(l $fmt1, ..., l %r1)
|
||||
|
||||
%r2 =:st2 call $t2()
|
||||
%w2 =w loadw %r2
|
||||
%i2 =w call $printf(l $fmt2, ..., w %w2)
|
||||
|
||||
%r3 =:st3 call $t3()
|
||||
%s3 =s loads %r3
|
||||
%r34 =l add %r3, 4
|
||||
%w3 =w loadw %r34
|
||||
%p3 =d exts %s3
|
||||
%i3 =w call $printf(l $fmt3, ..., d %p3, w %w3)
|
||||
|
||||
%r4 =:st4 call $t4()
|
||||
%w4 =w loadw %r4
|
||||
%r48 =l add 8, %r4
|
||||
%d4 =d loadd %r48
|
||||
%i4 =w call $printf(l $fmt4, ..., w %w4, d %d4)
|
||||
|
||||
%r5 =:st5 call $t5()
|
||||
%s5 =s loads %r5
|
||||
%d5 =d exts %s5
|
||||
%r58 =l add %r5, 8
|
||||
%l5 =l loadl %r58
|
||||
%i5 =w call $printf(l $fmt5, ..., d %d5, l %l5)
|
||||
|
||||
%r6 =:st6 call $t6()
|
||||
%i6 =w call $printf(l $fmt6, ..., l %r6)
|
||||
|
||||
%r7 =:st7 call $t7()
|
||||
%s7 =s loads %r7
|
||||
%d71 =d exts %s7
|
||||
%r78 =l add %r7, 8
|
||||
%d72 =d loadd %r78
|
||||
%i7 =w call $printf(l $fmt7, ..., d %d71, d %d72)
|
||||
|
||||
%r8 =:st8 call $t8()
|
||||
%r84 =l add 4, %r8
|
||||
%r88 =l add 4, %r84
|
||||
%r812 =l add 4, %r88
|
||||
%w81 =w loadw %r8
|
||||
%w82 =w loadw %r84
|
||||
%w83 =w loadw %r88
|
||||
%w84 =w loadw %r812
|
||||
%i8 =w call $printf(l $fmt8, ..., w %w81, w %w82, w %w83, w %w84)
|
||||
|
||||
%r9 =:st9 call $t9()
|
||||
%r94 =l add 4, %r9
|
||||
%w9 =w loadw %r9
|
||||
%s9 =s loads %r94
|
||||
%d9 =d exts %s9
|
||||
%i9 =w call $printf(l $fmt9, ..., w %w9, d %d9)
|
||||
|
||||
%ra =:sta call $ta()
|
||||
%ra4 =l add 4, %ra
|
||||
%wa =w loadsb %ra
|
||||
%sa =s loads %ra4
|
||||
%da =d exts %sa
|
||||
%ia =w call $printf(l $fmta, ..., w %wa, d %da)
|
||||
|
||||
%rb =:stb call $tb()
|
||||
%rb1 =l add 1, %rb
|
||||
%rb4 =l add 4, %rb
|
||||
%w0b =w loadsb %rb
|
||||
%w1b =w loadsb %rb1
|
||||
%sb =s loads %rb4
|
||||
%db =d exts %sb
|
||||
%ib =w call $printf(l $fmtb, ..., w %w0b, w %w1b, d %db)
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
|
||||
# >>> driver
|
||||
# typedef struct { char t[17]; } st1;
|
||||
# typedef struct { int i; } st2;
|
||||
# typedef struct { float f; int i; } st3;
|
||||
# typedef struct { int i; double d; } st4;
|
||||
# typedef struct { float f; long long l; } st5;
|
||||
# typedef struct { char t[16]; } st6;
|
||||
# typedef struct { float f; double d; } st7;
|
||||
# typedef struct { int i[4]; } st8;
|
||||
# typedef struct { int i; union { char c; float f; } u; } st9;
|
||||
# typedef struct { char c; float f; } sta;
|
||||
# typedef struct { char c0, c1; float f; } stb;
|
||||
# extern void test(void);
|
||||
# st1 t1() { return (st1){"abcdefghijklmnop"}; }
|
||||
# st2 t2() { return (st2){2}; }
|
||||
# st3 t3() { return (st3){3.0,30}; }
|
||||
# st4 t4() { return (st4){4,-40}; }
|
||||
# st5 t5() { return (st5){5.5,-55}; }
|
||||
# st6 t6() { return (st6){"abcdefghijklmno"}; }
|
||||
# st7 t7() { return (st7){7.77,77.7}; }
|
||||
# st8 t8() { return (st8){-8,88,-888,8888}; }
|
||||
# st9 t9() { return (st9){9,{.f=9.9}}; }
|
||||
# sta ta() { return (sta){-10,10.1}; }
|
||||
# stb tb() { return (stb){-1,11,11.11}; }
|
||||
# int main() { test(); return 0; }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# t1: abcdefghijklmnop
|
||||
# t2: 2
|
||||
# t3: 3.000000 30
|
||||
# t4: 4 -40.000000
|
||||
# t5: 5.500000 -55
|
||||
# t6: abcdefghijklmno
|
||||
# t7: 7.770000 77.700000
|
||||
# t8: -8 88 -888 8888
|
||||
# t9: 9 9.900000
|
||||
# ta: -10 10.100000
|
||||
# tb: -1 11 11.110000
|
||||
# <<<
|
||||
38
src/qbe/test/abi6.ssa
Normal file
38
src/qbe/test/abi6.ssa
Normal file
@@ -0,0 +1,38 @@
|
||||
# test arm64's hfa
|
||||
|
||||
data $dfmt = { b "double: %g\n", b 0 }
|
||||
|
||||
type :hfa3 = { s, s, s }
|
||||
|
||||
export
|
||||
function $f(:hfa3 %h1, :hfa3 %h2, d %d1, :hfa3 %h3, d %d2) {
|
||||
# the first three parameters should be in 7 registers
|
||||
# the last two should be on the stack
|
||||
@start
|
||||
|
||||
call $phfa3(:hfa3 %h1)
|
||||
call $phfa3(:hfa3 %h2)
|
||||
call $phfa3(:hfa3 %h3)
|
||||
call $printf(l $dfmt, ..., d %d1)
|
||||
call $printf(l $dfmt, ..., d %d2)
|
||||
ret
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# #include <stdio.h>
|
||||
# typedef struct { float f1, f2, f3; } hfa3;
|
||||
# void f(hfa3, hfa3, double, hfa3, double);
|
||||
# void phfa3(hfa3 h) { printf("{ %g, %g, %g }\n", h.f1, h.f2, h.f3); }
|
||||
# int main() {
|
||||
# hfa3 h1={1,2,3}, h2={2,3,4}, h3={3,4,5};
|
||||
# f(h1, h2, 1, h3, 2);
|
||||
# }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# { 1, 2, 3 }
|
||||
# { 2, 3, 4 }
|
||||
# { 3, 4, 5 }
|
||||
# double: 1
|
||||
# double: 2
|
||||
# <<<
|
||||
21
src/qbe/test/abi7.ssa
Normal file
21
src/qbe/test/abi7.ssa
Normal file
@@ -0,0 +1,21 @@
|
||||
# test padding calculation with
|
||||
# embedded struct
|
||||
|
||||
type :s1 = align 4 { w 3 }
|
||||
type :s2 = align 4 { b 1, :s1 1 }
|
||||
|
||||
export function :s2 $test() {
|
||||
@start
|
||||
ret $s
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# struct s2 {
|
||||
# char x;
|
||||
# struct { int a[3]; } s1;
|
||||
# } s = { .x = 123 };
|
||||
# extern struct s2 test(void);
|
||||
# int main(void) {
|
||||
# return !(test().x == 123);
|
||||
# }
|
||||
# <<<
|
||||
278
src/qbe/test/abi8.ssa
Normal file
278
src/qbe/test/abi8.ssa
Normal file
@@ -0,0 +1,278 @@
|
||||
# riscv64 ABI stress
|
||||
# see tools/abi8.py
|
||||
|
||||
type :fi1 = { h, s } # in a gp & fp pair
|
||||
type :fi2 = { s, w } # ditto
|
||||
type :uw = { { w } }
|
||||
type :fi3 = { s, :uw } # in a single gp reg
|
||||
type :ss = { s, s } # in two fp regs
|
||||
type :sd = { s, d } # ditto
|
||||
type :ww = { w, w } # in a single gp reg
|
||||
type :lb = { l, b } # in two gp regs
|
||||
type :big = { b 17 } # by reference
|
||||
type :ddd = { d, d, d} # big hfa on arm64
|
||||
|
||||
data $ctoqbestr = { b "c->qbe(%d)", b 0 }
|
||||
data $emptystr = { b 0 }
|
||||
|
||||
export
|
||||
function $qfn0(s %p0, s %p1, s %p2, s %p3, s %p4, s %p5, s %p6, s %p7, s %p8) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 0)
|
||||
call $ps(s %p8)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn1(w %p0, s %p1, :fi1 %p2) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 1)
|
||||
call $pw(w %p0)
|
||||
call $ps(s %p1)
|
||||
call $pfi1(l %p2)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn2(w %p0, :fi2 %p1, s %p2) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 2)
|
||||
call $pw(w %p0)
|
||||
call $pfi2(l %p1)
|
||||
call $ps(s %p2)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn3(w %p0, s %p1, :fi3 %p2) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 3)
|
||||
call $pw(w %p0)
|
||||
call $ps(s %p1)
|
||||
call $pfi3(l %p2)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn4(:ss %p0) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 4)
|
||||
call $pss(l %p0)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn5(d %p0, d %p1, d %p2, d %p3, d %p4, d %p5, d %p6, :ss %p7, s %p8, l %p9) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 5)
|
||||
call $pss(l %p7)
|
||||
call $ps(s %p8)
|
||||
call $pl(l %p9)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn6(:lb %p0) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 6)
|
||||
call $plb(l %p0)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn7(w %p0, w %p1, w %p2, w %p3, w %p4, w %p5, w %p6, :lb %p7) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 7)
|
||||
call $plb(l %p7)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn8(w %p0, w %p1, w %p2, w %p3, w %p4, w %p5, w %p6, w %p7, :lb %p8) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 8)
|
||||
call $plb(l %p8)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn9(:big %p0) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 9)
|
||||
call $pbig(l %p0)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn10(w %p0, w %p1, w %p2, w %p3, w %p4, w %p5, w %p6, w %p7, :big %p8, s %p9, l %p10) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 10)
|
||||
call $pbig(l %p8)
|
||||
call $ps(s %p9)
|
||||
call $pl(l %p10)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
export
|
||||
function $qfn11(:ddd %p0) {
|
||||
@start
|
||||
%r0 =w call $printf(l $ctoqbestr, ..., w 11)
|
||||
call $pddd(l %p0)
|
||||
%r1 =w call $puts(l $emptystr)
|
||||
ret
|
||||
}
|
||||
|
||||
export
|
||||
function w $main() {
|
||||
@start
|
||||
|
||||
call $cfn0(s 0, s 0, s 0, s 0, s 0, s 0, s 0, s 0, s s_9.9)
|
||||
call $cfn1(w 1, s s_2.2, :fi1 $fi1)
|
||||
call $cfn2(w 1, :fi2 $fi2, s s_3.3)
|
||||
call $cfn3(w 1, s s_2.2, :fi3 $fi3)
|
||||
call $cfn4(:ss $ss)
|
||||
call $cfn5(d 0, d 0, d 0, d 0, d 0, d 0, d 0, :ss $ss, s s_9.9, l 10)
|
||||
call $cfn6(:lb $lb)
|
||||
call $cfn7(w 0, w 0, w 0, w 0, w 0, w 0, w 0, :lb $lb)
|
||||
call $cfn8(w 0, w 0, w 0, w 0, w 0, w 0, w 0, w 0, :lb $lb)
|
||||
call $cfn9(:big $big)
|
||||
call $cfn10(w 0, w 0, w 0, w 0, w 0, w 0, w 0, w 0, :big $big, s s_10.10, l 11)
|
||||
call $cfn11(:ddd $ddd)
|
||||
|
||||
ret 0
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# #include <stdio.h>
|
||||
# typedef struct { short h; float s; } Sfi1;
|
||||
# typedef struct { float s; int w; } Sfi2;
|
||||
# typedef struct { float s; union { int w; } u; } Sfi3;
|
||||
# typedef struct { float s0, s1; } Sss;
|
||||
# typedef struct { float s; double d; } Ssd;
|
||||
# typedef struct { int w0, w1; } Sww;
|
||||
# typedef struct { long long l; char b; } Slb;
|
||||
# typedef struct { char b[17]; } Sbig;
|
||||
# typedef struct { double d0, d1, d2; } Sddd;
|
||||
# Sfi1 zfi1, fi1 = { -123, 4.56 };
|
||||
# Sfi2 zfi2, fi2 = { 1.23, 456 };
|
||||
# Sfi3 zfi3, fi3 = { 3.45, 567 };
|
||||
# Sss zss, ss = { 1.23, 45.6 };
|
||||
# Ssd zsd, sd = { 2.34, 5.67 };
|
||||
# Sww zww, ww = { -123, -456 };
|
||||
# Slb zlb, lb = { 123, 'z' };
|
||||
# Sbig zbig, big = { "abcdefhijklmnopqr" };
|
||||
# Sddd zddd, ddd = { 1.23, 45.6, 7.89 };
|
||||
# void pfi1(Sfi1 *s) { printf(" { %d, %g }", s->h, s->s); }
|
||||
# void pfi2(Sfi2 *s) { printf(" { %g, %d }", s->s, s->w); }
|
||||
# void pfi3(Sfi3 *s) { printf(" { %g, %d }", s->s, s->u.w); }
|
||||
# void pss(Sss *s) { printf(" { %g, %g }", s->s0, s->s1); }
|
||||
# void psd(Ssd *s) { printf(" { %g, %g }", s->s, s->d); }
|
||||
# void pww(Sww *s) { printf(" { %d, %d }", s->w0, s->w1); }
|
||||
# void plb(Slb *s) { printf(" { %lld, '%c' }", s->l, s->b); }
|
||||
# void pbig(Sbig *s) { printf(" \"%.17s\"", s->b); }
|
||||
# void pddd(Sddd *s) { printf(" { %g, %g, %g }", s->d0, s->d1, s->d2); }
|
||||
# void pw(int w) { printf(" %d", w); }
|
||||
# void pl(long long l) { printf(" %lld", l); }
|
||||
# void ps(float s) { printf(" %g", s); }
|
||||
# void pd(double d) { printf(" %g", d); }
|
||||
# /* --------------------------- */
|
||||
# extern void qfn0(float, float, float, float, float, float, float, float, float);
|
||||
# void cfn0(float p0, float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8) {
|
||||
# printf("qbe->c(%d)", 0);
|
||||
# ps(p8); puts("");
|
||||
# qfn0(p0, p1, p2, p3, p4, p5, p6, p7, p8);
|
||||
# }
|
||||
# extern void qfn1(int, float, Sfi1);
|
||||
# void cfn1(int p0, float p1, Sfi1 p2) {
|
||||
# printf("qbe->c(%d)", 1);
|
||||
# pw(p0); ps(p1); pfi1(&p2); puts("");
|
||||
# qfn1(p0, p1, p2);
|
||||
# }
|
||||
# extern void qfn2(int, Sfi2, float);
|
||||
# void cfn2(int p0, Sfi2 p1, float p2) {
|
||||
# printf("qbe->c(%d)", 2);
|
||||
# pw(p0); pfi2(&p1); ps(p2); puts("");
|
||||
# qfn2(p0, p1, p2);
|
||||
# }
|
||||
# extern void qfn3(int, float, Sfi3);
|
||||
# void cfn3(int p0, float p1, Sfi3 p2) {
|
||||
# printf("qbe->c(%d)", 3);
|
||||
# pw(p0); ps(p1); pfi3(&p2); puts("");
|
||||
# qfn3(p0, p1, p2);
|
||||
# }
|
||||
# extern void qfn4(Sss);
|
||||
# void cfn4(Sss p0) {
|
||||
# printf("qbe->c(%d)", 4);
|
||||
# pss(&p0); puts("");
|
||||
# qfn4(p0);
|
||||
# }
|
||||
# extern void qfn5(double, double, double, double, double, double, double, Sss, float, long long);
|
||||
# void cfn5(double p0, double p1, double p2, double p3, double p4, double p5, double p6, Sss p7, float p8, long long p9) {
|
||||
# printf("qbe->c(%d)", 5);
|
||||
# pss(&p7); ps(p8); pl(p9); puts("");
|
||||
# qfn5(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9);
|
||||
# }
|
||||
# extern void qfn6(Slb);
|
||||
# void cfn6(Slb p0) {
|
||||
# printf("qbe->c(%d)", 6);
|
||||
# plb(&p0); puts("");
|
||||
# qfn6(p0);
|
||||
# }
|
||||
# extern void qfn7(int, int, int, int, int, int, int, Slb);
|
||||
# void cfn7(int p0, int p1, int p2, int p3, int p4, int p5, int p6, Slb p7) {
|
||||
# printf("qbe->c(%d)", 7);
|
||||
# plb(&p7); puts("");
|
||||
# qfn7(p0, p1, p2, p3, p4, p5, p6, p7);
|
||||
# }
|
||||
# extern void qfn8(int, int, int, int, int, int, int, int, Slb);
|
||||
# void cfn8(int p0, int p1, int p2, int p3, int p4, int p5, int p6, int p7, Slb p8) {
|
||||
# printf("qbe->c(%d)", 8);
|
||||
# plb(&p8); puts("");
|
||||
# qfn8(p0, p1, p2, p3, p4, p5, p6, p7, p8);
|
||||
# }
|
||||
# extern void qfn9(Sbig);
|
||||
# void cfn9(Sbig p0) {
|
||||
# printf("qbe->c(%d)", 9);
|
||||
# pbig(&p0); puts("");
|
||||
# qfn9(p0);
|
||||
# }
|
||||
# extern void qfn10(int, int, int, int, int, int, int, int, Sbig, float, long long);
|
||||
# void cfn10(int p0, int p1, int p2, int p3, int p4, int p5, int p6, int p7, Sbig p8, float p9, long long p10) {
|
||||
# printf("qbe->c(%d)", 10);
|
||||
# pbig(&p8); ps(p9); pl(p10); puts("");
|
||||
# qfn10(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
|
||||
# }
|
||||
# extern void qfn11(Sddd);
|
||||
# void cfn11(Sddd p0) {
|
||||
# printf("qbe->c(%d)", 11);
|
||||
# pddd(&p0); puts("");
|
||||
# qfn11(p0);
|
||||
# }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# qbe->c(0) 9.9
|
||||
# c->qbe(0) 9.9
|
||||
# qbe->c(1) 1 2.2 { -123, 4.56 }
|
||||
# c->qbe(1) 1 2.2 { -123, 4.56 }
|
||||
# qbe->c(2) 1 { 1.23, 456 } 3.3
|
||||
# c->qbe(2) 1 { 1.23, 456 } 3.3
|
||||
# qbe->c(3) 1 2.2 { 3.45, 567 }
|
||||
# c->qbe(3) 1 2.2 { 3.45, 567 }
|
||||
# qbe->c(4) { 1.23, 45.6 }
|
||||
# c->qbe(4) { 1.23, 45.6 }
|
||||
# qbe->c(5) { 1.23, 45.6 } 9.9 10
|
||||
# c->qbe(5) { 1.23, 45.6 } 9.9 10
|
||||
# qbe->c(6) { 123, 'z' }
|
||||
# c->qbe(6) { 123, 'z' }
|
||||
# qbe->c(7) { 123, 'z' }
|
||||
# c->qbe(7) { 123, 'z' }
|
||||
# qbe->c(8) { 123, 'z' }
|
||||
# c->qbe(8) { 123, 'z' }
|
||||
# qbe->c(9) "abcdefhijklmnopqr"
|
||||
# c->qbe(9) "abcdefhijklmnopqr"
|
||||
# qbe->c(10) "abcdefhijklmnopqr" 10.1 11
|
||||
# c->qbe(10) "abcdefhijklmnopqr" 10.1 11
|
||||
# qbe->c(11) { 1.23, 45.6, 7.89 }
|
||||
# c->qbe(11) { 1.23, 45.6, 7.89 }
|
||||
# <<<
|
||||
20
src/qbe/test/abi9.ssa
Normal file
20
src/qbe/test/abi9.ssa
Normal file
@@ -0,0 +1,20 @@
|
||||
type :obj = { l, l, l, l }
|
||||
|
||||
export
|
||||
function :obj $f(l %self) {
|
||||
@_0
|
||||
%_1 =l alloc8 16
|
||||
storel 77, %_1
|
||||
ret %_1
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# #include <stdio.h>
|
||||
# typedef struct { long long a, b, c, d; } obj;
|
||||
# extern obj f();
|
||||
# int main() { obj ret = f(); printf("%lld\n", ret.a); return 0; }
|
||||
# <<<
|
||||
|
||||
# >>> output
|
||||
# 77
|
||||
# <<<
|
||||
21
src/qbe/test/alias1.ssa
Normal file
21
src/qbe/test/alias1.ssa
Normal file
@@ -0,0 +1,21 @@
|
||||
export function w $main() {
|
||||
@start
|
||||
%a =l alloc4 4
|
||||
%b =l alloc4 4
|
||||
storew 4, %a
|
||||
storew 5, %b
|
||||
|
||||
@loop
|
||||
# %mem will be %a and %b successively,
|
||||
# but we do not know it when processing
|
||||
# the phi because %b goes through a cpy
|
||||
%mem =l phi @start %a, @loop %bcpy
|
||||
|
||||
%w =w load %mem
|
||||
%eq5 =w ceqw %w, 5
|
||||
%bcpy =l copy %b
|
||||
jnz %eq5, @exit, @loop
|
||||
|
||||
@exit
|
||||
ret 0
|
||||
}
|
||||
17
src/qbe/test/align.ssa
Normal file
17
src/qbe/test/align.ssa
Normal file
@@ -0,0 +1,17 @@
|
||||
export
|
||||
function $test() {
|
||||
@start
|
||||
%x =l alloc16 16
|
||||
%y =l add %x, 8
|
||||
%m =w rem %y, 16
|
||||
storew %m, %y
|
||||
%n =w loadw %y
|
||||
storew %n, $a
|
||||
ret
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# extern void test(void);
|
||||
# int a;
|
||||
# int main() { test(); return !(a == 8 || a == -8); }
|
||||
# <<<
|
||||
17
src/qbe/test/cmp1.ssa
Normal file
17
src/qbe/test/cmp1.ssa
Normal file
@@ -0,0 +1,17 @@
|
||||
# test cmp used in jnz as well as its result value
|
||||
|
||||
export
|
||||
function w $test(w %c) {
|
||||
@start
|
||||
%cmp =w cultw 1, %c
|
||||
jnz %cmp, @yes, @no
|
||||
@yes
|
||||
%cmp =w copy 1
|
||||
@no
|
||||
ret %cmp
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# int test(int);
|
||||
# int main(void) { return test(0); }
|
||||
# <<<
|
||||
62
src/qbe/test/collatz.ssa
Normal file
62
src/qbe/test/collatz.ssa
Normal file
@@ -0,0 +1,62 @@
|
||||
# a solution for N=1000 to
|
||||
# https://projecteuler.net/problem=14
|
||||
# we use a fast local array to
|
||||
# memoize small collatz numbers
|
||||
|
||||
export
|
||||
function $test() {
|
||||
@start
|
||||
%mem =l alloc4 4000
|
||||
@loop
|
||||
%n =w phi @start 1, @newm %n9, @oldm %n9
|
||||
%cmax =w phi @start 0, @newm %c, @oldm %cmax
|
||||
%fin =w csltw %n, 1000
|
||||
jnz %fin, @cloop, @end
|
||||
@cloop
|
||||
%n0 =w phi @loop %n, @odd %n2, @even %n3
|
||||
%c0 =w phi @loop 0, @odd %c1, @even %c1
|
||||
%no1 =w cnew %n0, 1
|
||||
jnz %no1, @iter0, @endcl
|
||||
@iter0
|
||||
%ism =w csltw %n0, %n
|
||||
jnz %ism, @getmemo, @iter1
|
||||
@iter1
|
||||
%c1 =w add %c0, 1
|
||||
%p =w and %n0, 1
|
||||
jnz %p, @odd, @even
|
||||
@odd
|
||||
%n1 =w mul 3, %n0
|
||||
%n2 =w add %n1, 1
|
||||
jmp @cloop
|
||||
@even
|
||||
%n3 =w shr %n0, 1
|
||||
jmp @cloop
|
||||
@getmemo # get the count for n0 in mem
|
||||
%n0l =l extsw %n0
|
||||
%idx0 =l mul %n0l, 4
|
||||
%loc0 =l add %idx0, %mem
|
||||
%cn0 =w loadw %loc0
|
||||
%c2 =w add %c0, %cn0
|
||||
@endcl # store the count for n in mem
|
||||
%c =w phi @getmemo %c2, @cloop %c0
|
||||
%nl =l extsw %n
|
||||
%idx1 =l mul %nl, 4
|
||||
%loc1 =l add %idx1, %mem
|
||||
storew %c, %loc1
|
||||
%n9 =w add 1, %n
|
||||
%big =w cslew %cmax, %c
|
||||
jnz %big, @newm, @oldm
|
||||
@newm
|
||||
jmp @loop
|
||||
@oldm
|
||||
jmp @loop
|
||||
@end
|
||||
storew %cmax, $a
|
||||
ret
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# extern void test(void);
|
||||
# int a;
|
||||
# int main() { test(); return !(a == 178); }
|
||||
# <<<
|
||||
76
src/qbe/test/conaddr.ssa
Normal file
76
src/qbe/test/conaddr.ssa
Normal file
@@ -0,0 +1,76 @@
|
||||
# skip amd64_win (no signals on win32)
|
||||
# test amd64 addressing modes
|
||||
|
||||
export
|
||||
function w $f0(l %o) {
|
||||
@start
|
||||
%addr =l add $a, %o
|
||||
%char =w loadub %addr
|
||||
ret %char
|
||||
}
|
||||
|
||||
export
|
||||
function w $f1(l %o) {
|
||||
@start
|
||||
%o1 =l mul %o, 1
|
||||
%addr =l add 10, %o1
|
||||
%char =w loadub %addr
|
||||
ret %char
|
||||
}
|
||||
|
||||
export
|
||||
function w $f2(l %o1, l %o2) {
|
||||
@start
|
||||
%o22 =l mul %o2, 2
|
||||
%o =l add %o1, %o22
|
||||
%addr =l add $a, %o
|
||||
%char =w loadub %addr
|
||||
ret %char
|
||||
}
|
||||
|
||||
export
|
||||
function l $f3(l %o) {
|
||||
@start
|
||||
%addr =l add %o, $a
|
||||
ret %addr
|
||||
}
|
||||
|
||||
export
|
||||
function $f4() {
|
||||
@start
|
||||
storel $p, $p
|
||||
ret
|
||||
}
|
||||
|
||||
export
|
||||
function $writeto0() {
|
||||
@start
|
||||
storel 0, 0
|
||||
ret
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# #include <stdlib.h>
|
||||
# #include <signal.h>
|
||||
# char a[] = "qbe rocks";
|
||||
# void *p;
|
||||
# int ok;
|
||||
# extern unsigned f0(long), f1(long), f2(long, long);
|
||||
# extern char *f3(long);
|
||||
# extern void f4(), writeto0();
|
||||
# void h(int sig, siginfo_t *si, void *unused) {
|
||||
# ok += si->si_addr == 0;
|
||||
# exit(!(ok == 6));
|
||||
# }
|
||||
# int main() {
|
||||
# struct sigaction sa = {.sa_flags=SA_SIGINFO, .sa_sigaction=h};
|
||||
# sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, 0);
|
||||
# ok += f0(2) == 'e';
|
||||
# ok += f1((long)a-5) == 'o';
|
||||
# ok += f2(4, 2) == 's';
|
||||
# ok += *f3(0) == 'q';
|
||||
# f4();
|
||||
# ok += p == &p;
|
||||
# writeto0(); /* will segfault */
|
||||
# }
|
||||
# <<<
|
||||
15
src/qbe/test/copy.ssa
Normal file
15
src/qbe/test/copy.ssa
Normal file
@@ -0,0 +1,15 @@
|
||||
export function w $f() {
|
||||
@start
|
||||
%x0 =w loadsb $a
|
||||
# the extension must not be eliminated
|
||||
# even though the load already extended
|
||||
%x1 =l extsb %x0
|
||||
%c =w ceql %x1, -1
|
||||
ret %c
|
||||
}
|
||||
|
||||
# >>> driver
|
||||
# char a = -1;
|
||||
# extern int f();
|
||||
# int main() { return !(f() == 1); }
|
||||
# <<<
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user