470 lines
13 KiB
Plaintext
470 lines
13 KiB
Plaintext
// verify_ir.cm — validates mcode IR structure after optimizer passes
|
|
// Used to catch structural bugs introduced by optimization.
|
|
|
|
// Operand positions that are slots for each opcode.
|
|
// Positions are 0-indexed from the first operand (after the opcode),
|
|
// excluding the trailing line/col pair.
|
|
var slot_positions = {
|
|
// Constant loaders — only dest
|
|
access: [0],
|
|
int: [0],
|
|
true: [0],
|
|
false: [0],
|
|
null: [0],
|
|
function: [0],
|
|
array: [0],
|
|
record: [0],
|
|
|
|
// Unary — dest, src
|
|
move: [0, 1],
|
|
not: [0, 1],
|
|
neg_int: [0, 1],
|
|
neg_float: [0, 1],
|
|
bitnot: [0, 1],
|
|
length: [0, 1],
|
|
typeof: [0, 1],
|
|
is_int: [0, 1],
|
|
is_text: [0, 1],
|
|
is_num: [0, 1],
|
|
is_bool: [0, 1],
|
|
is_null: [0, 1],
|
|
is_array: [0, 1],
|
|
is_func: [0, 1],
|
|
is_record: [0, 1],
|
|
is_stone: [0, 1],
|
|
is_identical: [0, 1, 2],
|
|
|
|
// Binary arithmetic/comparison — dest, src1, src2
|
|
add: [0, 1, 2],
|
|
subtract: [0, 1, 2],
|
|
multiply: [0, 1, 2],
|
|
divide: [0, 1, 2],
|
|
modulo: [0, 1, 2],
|
|
pow: [0, 1, 2],
|
|
add_int: [0, 1, 2],
|
|
sub_int: [0, 1, 2],
|
|
mul_int: [0, 1, 2],
|
|
div_int: [0, 1, 2],
|
|
mod_int: [0, 1, 2],
|
|
add_float: [0, 1, 2],
|
|
sub_float: [0, 1, 2],
|
|
mul_float: [0, 1, 2],
|
|
div_float: [0, 1, 2],
|
|
mod_float: [0, 1, 2],
|
|
eq: [0, 1, 2],
|
|
ne: [0, 1, 2],
|
|
lt: [0, 1, 2],
|
|
le: [0, 1, 2],
|
|
gt: [0, 1, 2],
|
|
ge: [0, 1, 2],
|
|
eq_int: [0, 1, 2],
|
|
ne_int: [0, 1, 2],
|
|
lt_int: [0, 1, 2],
|
|
gt_int: [0, 1, 2],
|
|
le_int: [0, 1, 2],
|
|
ge_int: [0, 1, 2],
|
|
eq_float: [0, 1, 2],
|
|
ne_float: [0, 1, 2],
|
|
lt_float: [0, 1, 2],
|
|
gt_float: [0, 1, 2],
|
|
le_float: [0, 1, 2],
|
|
ge_float: [0, 1, 2],
|
|
eq_text: [0, 1, 2],
|
|
ne_text: [0, 1, 2],
|
|
lt_text: [0, 1, 2],
|
|
gt_text: [0, 1, 2],
|
|
le_text: [0, 1, 2],
|
|
ge_text: [0, 1, 2],
|
|
eq_bool: [0, 1, 2],
|
|
ne_bool: [0, 1, 2],
|
|
eq_tol: [0, 1, 2],
|
|
ne_tol: [0, 1, 2],
|
|
concat: [0, 1, 2],
|
|
and: [0, 1, 2],
|
|
or: [0, 1, 2],
|
|
bitand: [0, 1, 2],
|
|
bitor: [0, 1, 2],
|
|
bitxor: [0, 1, 2],
|
|
shl: [0, 1, 2],
|
|
shr: [0, 1, 2],
|
|
ushr: [0, 1, 2],
|
|
in: [0, 1, 2],
|
|
|
|
// Element access — all operands are slots
|
|
load_index: [0, 1, 2],
|
|
load_dynamic: [0, 1, 2],
|
|
load_field: [0, 1],
|
|
store_index: [0, 1, 2],
|
|
store_dynamic: [0, 1, 2],
|
|
store_field: [0, 1],
|
|
|
|
// Push/pop
|
|
push: [0, 1],
|
|
pop: [0, 1],
|
|
get: [0, 1],
|
|
|
|
// Control flow — slot positions only
|
|
return: [0],
|
|
jump: [],
|
|
jump_true: [0],
|
|
jump_false: [0],
|
|
jump_not_null: [0],
|
|
disrupt: [],
|
|
|
|
// Invoke
|
|
invoke: [0, 1],
|
|
tail_invoke: [0, 1],
|
|
goinvoke: [0],
|
|
frame: [0, 1],
|
|
setarg: [0, 2]
|
|
}
|
|
|
|
// Opcodes that write to their first operand (position 0)
|
|
var writes_dest = {
|
|
access: true, int: true, true: true, false: true, null: true,
|
|
function: true, array: true, record: true,
|
|
move: true, not: true, neg_int: true, neg_float: true, bitnot: true,
|
|
length: true, typeof: true,
|
|
is_int: true, is_text: true, is_num: true,
|
|
is_bool: true, is_null: true, is_array: true,
|
|
is_func: true, is_record: true, is_stone: true, is_identical: true,
|
|
add: true, subtract: true, multiply: true, divide: true,
|
|
modulo: true, pow: true,
|
|
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
|
|
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
|
|
eq: true, ne: true, lt: true, le: true, gt: true, ge: true,
|
|
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
|
|
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
|
|
eq_text: true, ne_text: true, lt_text: true, gt_text: true, le_text: true, ge_text: true,
|
|
eq_bool: true, ne_bool: true, eq_tol: true, ne_tol: true,
|
|
concat: true, and: true, or: true,
|
|
bitand: true, bitor: true, bitxor: true, shl: true, shr: true, ushr: true,
|
|
in: true,
|
|
load_index: true, load_dynamic: true, load_field: true,
|
|
pop: true, get: true,
|
|
invoke: true,
|
|
tail_invoke: true
|
|
}
|
|
|
|
// Opcodes where invoke writes to position 1 (result slot), not position 0
|
|
var invoke_result_pos = 1
|
|
|
|
// Jump opcodes and the position of their label operand (0-indexed from first operand)
|
|
var jump_label_pos = {
|
|
jump: 0,
|
|
jump_true: 1,
|
|
jump_false: 1,
|
|
jump_not_null: 1
|
|
}
|
|
|
|
// --- Check: slot_bounds ---
|
|
// Verifies every slot operand is in 0..nr_slots-1.
|
|
var check_slot_bounds = function(func) {
|
|
var instructions = func.instructions
|
|
var nr_slots = func.nr_slots
|
|
var errors = []
|
|
var i = 0
|
|
var instr = null
|
|
var op = null
|
|
var positions = null
|
|
var j = 0
|
|
var pos = null
|
|
var val = null
|
|
|
|
if (instructions == null) return errors
|
|
|
|
while (i < length(instructions)) {
|
|
instr = instructions[i]
|
|
if (is_array(instr)) {
|
|
op = instr[0]
|
|
positions = slot_positions[op]
|
|
if (positions != null) {
|
|
j = 0
|
|
while (j < length(positions)) {
|
|
pos = positions[j] + 1
|
|
if (pos < length(instr) - 2) {
|
|
val = instr[pos]
|
|
if (is_number(val) && (val < 0 || val >= nr_slots)) {
|
|
push(errors, `slot_bounds: instr ${text(i)} op=${op} slot[${text(positions[j])}]=${text(val)} out of range 0..${text(nr_slots - 1)}`)
|
|
}
|
|
}
|
|
j = j + 1
|
|
}
|
|
}
|
|
}
|
|
i = i + 1
|
|
}
|
|
return errors
|
|
}
|
|
|
|
// --- Check: jump_targets ---
|
|
// Verifies every jump target label exists in the instruction stream.
|
|
var check_jump_targets = function(func) {
|
|
var instructions = func.instructions
|
|
var errors = []
|
|
var labels = {}
|
|
var i = 0
|
|
var instr = null
|
|
var op = null
|
|
var label_pos = null
|
|
var target = null
|
|
|
|
if (instructions == null) return errors
|
|
|
|
// Collect all labels (non-nop strings)
|
|
while (i < length(instructions)) {
|
|
instr = instructions[i]
|
|
if (is_text(instr) && !starts_with(instr, "_nop_")) {
|
|
labels[instr] = true
|
|
}
|
|
i = i + 1
|
|
}
|
|
|
|
// Check jump targets
|
|
i = 0
|
|
while (i < length(instructions)) {
|
|
instr = instructions[i]
|
|
if (is_array(instr)) {
|
|
op = instr[0]
|
|
label_pos = jump_label_pos[op]
|
|
if (label_pos != null) {
|
|
target = instr[label_pos + 1]
|
|
if (is_text(target) && labels[target] != true) {
|
|
push(errors, `jump_targets: instr ${text(i)} op=${op} target label "${target}" not found`)
|
|
}
|
|
}
|
|
}
|
|
i = i + 1
|
|
}
|
|
return errors
|
|
}
|
|
|
|
// --- Check: type_consistency ---
|
|
// Verifies typed operators receive compatible known types.
|
|
var check_type_consistency = function(func) {
|
|
var instructions = func.instructions
|
|
var errors = []
|
|
var slot_types = {}
|
|
var i = 0
|
|
var instr = null
|
|
var op = null
|
|
var s2 = null
|
|
var s3 = null
|
|
var t2 = null
|
|
var t3 = null
|
|
|
|
if (instructions == null) return errors
|
|
|
|
// Type constants
|
|
var T_INT = "int"
|
|
var T_FLOAT = "float"
|
|
var T_TEXT = "text"
|
|
var T_BOOL = "bool"
|
|
|
|
var int_ops = {
|
|
add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
|
|
eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
|
|
neg_int: true
|
|
}
|
|
var float_ops = {
|
|
add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
|
|
eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
|
|
neg_float: true
|
|
}
|
|
var text_ops = {
|
|
eq_text: true, ne_text: true, lt_text: true, gt_text: true, le_text: true, ge_text: true,
|
|
concat: true
|
|
}
|
|
var bool_ops = {
|
|
eq_bool: true, ne_bool: true, not: true, and: true, or: true
|
|
}
|
|
|
|
while (i < length(instructions)) {
|
|
instr = instructions[i]
|
|
|
|
// Reset type info at labels (basic block boundaries)
|
|
if (is_text(instr) && !starts_with(instr, "_nop_")) {
|
|
slot_types = {}
|
|
i = i + 1
|
|
continue
|
|
}
|
|
|
|
if (!is_array(instr)) {
|
|
i = i + 1
|
|
continue
|
|
}
|
|
|
|
op = instr[0]
|
|
|
|
// Track known types from constant-producing ops
|
|
if (op == "int") {
|
|
slot_types[text(instr[1])] = T_INT
|
|
} else if (op == "access") {
|
|
if (is_number(instr[2])) {
|
|
if (is_integer(instr[2])) {
|
|
slot_types[text(instr[1])] = T_INT
|
|
} else {
|
|
slot_types[text(instr[1])] = T_FLOAT
|
|
}
|
|
} else if (is_text(instr[2])) {
|
|
slot_types[text(instr[1])] = T_TEXT
|
|
}
|
|
} else if (op == "true" || op == "false") {
|
|
slot_types[text(instr[1])] = T_BOOL
|
|
}
|
|
|
|
// Check typed binary ops
|
|
if (int_ops[op] == true && length(instr) >= 5) {
|
|
s2 = text(instr[2])
|
|
t2 = slot_types[s2]
|
|
if (t2 != null && t2 != T_INT && t2 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected int`)
|
|
}
|
|
if (length(instr) >= 6) {
|
|
s3 = text(instr[3])
|
|
t3 = slot_types[s3]
|
|
if (t3 != null && t3 != T_INT && t3 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src2 slot ${s3} has type ${t3}, expected int`)
|
|
}
|
|
}
|
|
} else if (float_ops[op] == true && length(instr) >= 5) {
|
|
s2 = text(instr[2])
|
|
t2 = slot_types[s2]
|
|
if (t2 != null && t2 != T_FLOAT && t2 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected float`)
|
|
}
|
|
if (length(instr) >= 6) {
|
|
s3 = text(instr[3])
|
|
t3 = slot_types[s3]
|
|
if (t3 != null && t3 != T_FLOAT && t3 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src2 slot ${s3} has type ${t3}, expected float`)
|
|
}
|
|
}
|
|
} else if (text_ops[op] == true && length(instr) >= 5) {
|
|
s2 = text(instr[2])
|
|
t2 = slot_types[s2]
|
|
if (t2 != null && t2 != T_TEXT && t2 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected text`)
|
|
}
|
|
if (length(instr) >= 6) {
|
|
s3 = text(instr[3])
|
|
t3 = slot_types[s3]
|
|
if (t3 != null && t3 != T_TEXT && t3 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src2 slot ${s3} has type ${t3}, expected text`)
|
|
}
|
|
}
|
|
} else if (bool_ops[op] == true && length(instr) >= 5) {
|
|
s2 = text(instr[2])
|
|
t2 = slot_types[s2]
|
|
if (t2 != null && t2 != T_BOOL && t2 != "unknown") {
|
|
push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected bool`)
|
|
}
|
|
}
|
|
|
|
// Clear type info for dest-producing ops
|
|
if (writes_dest[op] == true) {
|
|
slot_types[text(instr[1])] = null
|
|
// Restore type for known-result ops
|
|
if (op == "int" || (op == "access" && is_number(instr[2]))) {
|
|
// already set above
|
|
}
|
|
}
|
|
if (op == "invoke") {
|
|
slot_types[text(instr[2])] = null
|
|
}
|
|
|
|
i = i + 1
|
|
}
|
|
return errors
|
|
}
|
|
|
|
// --- Check: nop_consistency ---
|
|
// Verifies nop markers are not referenced by jumps.
|
|
var check_nop_consistency = function(func) {
|
|
var instructions = func.instructions
|
|
var errors = []
|
|
var nops = {}
|
|
var i = 0
|
|
var instr = null
|
|
var op = null
|
|
var label_pos = null
|
|
var target = null
|
|
|
|
if (instructions == null) return errors
|
|
|
|
// Collect all nop markers
|
|
while (i < length(instructions)) {
|
|
instr = instructions[i]
|
|
if (is_text(instr) && starts_with(instr, "_nop_")) {
|
|
nops[instr] = true
|
|
}
|
|
i = i + 1
|
|
}
|
|
|
|
// Check that no jump targets a nop
|
|
i = 0
|
|
while (i < length(instructions)) {
|
|
instr = instructions[i]
|
|
if (is_array(instr)) {
|
|
op = instr[0]
|
|
label_pos = jump_label_pos[op]
|
|
if (label_pos != null) {
|
|
target = instr[label_pos + 1]
|
|
if (is_text(target) && nops[target] == true) {
|
|
push(errors, `nop_consistency: instr ${text(i)} op=${op} jumps to nop marker "${target}"`)
|
|
}
|
|
}
|
|
}
|
|
i = i + 1
|
|
}
|
|
return errors
|
|
}
|
|
|
|
// --- verify_all ---
|
|
// Runs all checks on a function. Returns array of error strings (empty = pass).
|
|
var verify_all = function(func, pass_name) {
|
|
var all_errors = []
|
|
var check_errors = null
|
|
var i = 0
|
|
var prefix = pass_name != null ? pass_name + ": " : ""
|
|
var fn_name = func.name != null ? func.name : "<unknown>"
|
|
|
|
check_errors = check_slot_bounds(func)
|
|
i = 0
|
|
while (i < length(check_errors)) {
|
|
push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
|
|
i = i + 1
|
|
}
|
|
|
|
check_errors = check_jump_targets(func)
|
|
i = 0
|
|
while (i < length(check_errors)) {
|
|
push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
|
|
i = i + 1
|
|
}
|
|
|
|
check_errors = check_type_consistency(func)
|
|
i = 0
|
|
while (i < length(check_errors)) {
|
|
push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
|
|
i = i + 1
|
|
}
|
|
|
|
check_errors = check_nop_consistency(func)
|
|
i = 0
|
|
while (i < length(check_errors)) {
|
|
push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
|
|
i = i + 1
|
|
}
|
|
|
|
return all_errors
|
|
}
|
|
|
|
return {
|
|
verify_all: verify_all,
|
|
check_slot_bounds: check_slot_bounds,
|
|
check_jump_targets: check_jump_targets,
|
|
check_type_consistency: check_type_consistency,
|
|
check_nop_consistency: check_nop_consistency
|
|
}
|