cell/verify_ir.cm

// verify_ir.cm — validates mcode IR structure after optimizer passes
// Used to catch structural bugs introduced by optimization.

// Operand positions that are slots for each opcode.
// Positions are 0-indexed from the first operand (after the opcode),
// excluding the trailing line/col pair.
var slot_positions = {
  // Constant loaders — only dest
  access: [0],
  int: [0],
  true: [0],
  false: [0],
  null: [0],
  function: [0],
  array: [0],
  record: [0],

  // Unary — dest, src
  move: [0, 1],
  not: [0, 1],
  neg_int: [0, 1],
  neg_float: [0, 1],
  bitnot: [0, 1],
  length: [0, 1],
  typeof: [0, 1],
  is_int: [0, 1],
  is_text: [0, 1],
  is_num: [0, 1],
  is_bool: [0, 1],
  is_null: [0, 1],
  is_array: [0, 1],
  is_func: [0, 1],
  is_record: [0, 1],
  is_stone: [0, 1],
  is_identical: [0, 1, 2],

  // Binary arithmetic/comparison — dest, src1, src2
  add: [0, 1, 2],
  subtract: [0, 1, 2],
  multiply: [0, 1, 2],
  divide: [0, 1, 2],
  modulo: [0, 1, 2],
  pow: [0, 1, 2],
  add_int: [0, 1, 2],
  sub_int: [0, 1, 2],
  mul_int: [0, 1, 2],
  div_int: [0, 1, 2],
  mod_int: [0, 1, 2],
  add_float: [0, 1, 2],
  sub_float: [0, 1, 2],
  mul_float: [0, 1, 2],
  div_float: [0, 1, 2],
  mod_float: [0, 1, 2],
  eq: [0, 1, 2],
  ne: [0, 1, 2],
  lt: [0, 1, 2],
  le: [0, 1, 2],
  gt: [0, 1, 2],
  ge: [0, 1, 2],
  eq_int: [0, 1, 2],
  ne_int: [0, 1, 2],
  lt_int: [0, 1, 2],
  gt_int: [0, 1, 2],
  le_int: [0, 1, 2],
  ge_int: [0, 1, 2],
  eq_float: [0, 1, 2],
  ne_float: [0, 1, 2],
  lt_float: [0, 1, 2],
  gt_float: [0, 1, 2],
  le_float: [0, 1, 2],
  ge_float: [0, 1, 2],
  eq_text: [0, 1, 2],
  ne_text: [0, 1, 2],
  lt_text: [0, 1, 2],
  gt_text: [0, 1, 2],
  le_text: [0, 1, 2],
  ge_text: [0, 1, 2],
  eq_bool: [0, 1, 2],
  ne_bool: [0, 1, 2],
  eq_tol: [0, 1, 2],
  ne_tol: [0, 1, 2],
  concat: [0, 1, 2],
  and: [0, 1, 2],
  or: [0, 1, 2],
  bitand: [0, 1, 2],
  bitor: [0, 1, 2],
  bitxor: [0, 1, 2],
  shl: [0, 1, 2],
  shr: [0, 1, 2],
  ushr: [0, 1, 2],
  in: [0, 1, 2],

  // Element access — all operands are slots
  load_index: [0, 1, 2],
  load_dynamic: [0, 1, 2],
  load_field: [0, 1],
  store_index: [0, 1, 2],
  store_dynamic: [0, 1, 2],
  store_field: [0, 1],

  // Push/pop
  push: [0, 1],
  pop: [0, 1],
  get: [0, 1],

  // Control flow — slot positions only
  return: [0],
  jump: [],
  jump_true: [0],
  jump_false: [0],
  jump_not_null: [0],
  disrupt: [],

  // Invoke
  invoke: [0, 1],
  tail_invoke: [0, 1],
  goinvoke: [0],
  frame: [0, 1],
  setarg: [0, 2]
}

// Opcodes that write to their first operand (position 0)
var writes_dest = {
  access: true, int: true, true: true, false: true, null: true,
  function: true, array: true, record: true,
  move: true, not: true, neg_int: true, neg_float: true, bitnot: true,
  length: true, typeof: true,
  is_int: true, is_text: true, is_num: true,
  is_bool: true, is_null: true, is_array: true,
  is_func: true, is_record: true, is_stone: true, is_identical: true,
  add: true, subtract: true, multiply: true, divide: true,
  modulo: true, pow: true,
  add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
  add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
  eq: true, ne: true, lt: true, le: true, gt: true, ge: true,
  eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
  eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
  eq_text: true, ne_text: true, lt_text: true, gt_text: true, le_text: true, ge_text: true,
  eq_bool: true, ne_bool: true, eq_tol: true, ne_tol: true,
  concat: true, and: true, or: true,
  bitand: true, bitor: true, bitxor: true, shl: true, shr: true, ushr: true,
  in: true,
  load_index: true, load_dynamic: true, load_field: true,
  pop: true, get: true,
  invoke: true,
  tail_invoke: true
}

// Opcodes where invoke writes to position 1 (result slot), not position 0
var invoke_result_pos = 1

// Jump opcodes and the position of their label operand (0-indexed from first operand)
var jump_label_pos = {
  jump: 0,
  jump_true: 1,
  jump_false: 1,
  jump_not_null: 1
}

// --- Check: slot_bounds ---
// Verifies every slot operand is in 0..nr_slots-1.
var check_slot_bounds = function(func) {
  var instructions = func.instructions
  var nr_slots = func.nr_slots
  var errors = []
  var i = 0
  var instr = null
  var op = null
  var positions = null
  var j = 0
  var pos = null
  var val = null

  if (instructions == null) return errors

  while (i < length(instructions)) {
    instr = instructions[i]
    if (is_array(instr)) {
      op = instr[0]
      positions = slot_positions[op]
      if (positions != null) {
        j = 0
        while (j < length(positions)) {
          pos = positions[j] + 1
          if (pos < length(instr) - 2) {
            val = instr[pos]
            if (is_number(val) && (val < 0 || val >= nr_slots)) {
              push(errors, `slot_bounds: instr ${text(i)} op=${op} slot[${text(positions[j])}]=${text(val)} out of range 0..${text(nr_slots - 1)}`)
            }
          }
          j = j + 1
        }
      }
    }
    i = i + 1
  }
  return errors
}

// --- Check: jump_targets ---
// Verifies every jump target label exists in the instruction stream.
var check_jump_targets = function(func) {
  var instructions = func.instructions
  var errors = []
  var labels = {}
  var i = 0
  var instr = null
  var op = null
  var label_pos = null
  var target = null

  if (instructions == null) return errors

  // Collect all labels (non-nop strings)
  while (i < length(instructions)) {
    instr = instructions[i]
    if (is_text(instr) && !starts_with(instr, "_nop_")) {
      labels[instr] = true
    }
    i = i + 1
  }

  // Check jump targets
  i = 0
  while (i < length(instructions)) {
    instr = instructions[i]
    if (is_array(instr)) {
      op = instr[0]
      label_pos = jump_label_pos[op]
      if (label_pos != null) {
        target = instr[label_pos + 1]
        if (is_text(target) && labels[target] != true) {
          push(errors, `jump_targets: instr ${text(i)} op=${op} target label "${target}" not found`)
        }
      }
    }
    i = i + 1
  }
  return errors
}

// --- Check: type_consistency ---
// Verifies typed operators receive compatible known types.
var check_type_consistency = function(func) {
  var instructions = func.instructions
  var errors = []
  var slot_types = {}
  var i = 0
  var instr = null
  var op = null
  var s2 = null
  var s3 = null
  var t2 = null
  var t3 = null

  if (instructions == null) return errors

  // Type constants
  var T_INT = "int"
  var T_FLOAT = "float"
  var T_TEXT = "text"
  var T_BOOL = "bool"

  var int_ops = {
    add_int: true, sub_int: true, mul_int: true, div_int: true, mod_int: true,
    eq_int: true, ne_int: true, lt_int: true, gt_int: true, le_int: true, ge_int: true,
    neg_int: true
  }
  var float_ops = {
    add_float: true, sub_float: true, mul_float: true, div_float: true, mod_float: true,
    eq_float: true, ne_float: true, lt_float: true, gt_float: true, le_float: true, ge_float: true,
    neg_float: true
  }
  var text_ops = {
    eq_text: true, ne_text: true, lt_text: true, gt_text: true, le_text: true, ge_text: true,
    concat: true
  }
  var bool_ops = {
    eq_bool: true, ne_bool: true, not: true, and: true, or: true
  }

  while (i < length(instructions)) {
    instr = instructions[i]

    // Reset type info at labels (basic block boundaries)
    if (is_text(instr) && !starts_with(instr, "_nop_")) {
      slot_types = {}
      i = i + 1
      continue
    }

    if (!is_array(instr)) {
      i = i + 1
      continue
    }

    op = instr[0]

    // Track known types from constant-producing ops
    if (op == "int") {
      slot_types[text(instr[1])] = T_INT
    } else if (op == "access") {
      if (is_number(instr[2])) {
        if (is_integer(instr[2])) {
          slot_types[text(instr[1])] = T_INT
        } else {
          slot_types[text(instr[1])] = T_FLOAT
        }
      } else if (is_text(instr[2])) {
        slot_types[text(instr[1])] = T_TEXT
      }
    } else if (op == "true" || op == "false") {
      slot_types[text(instr[1])] = T_BOOL
    }

    // Check typed binary ops
    if (int_ops[op] == true && length(instr) >= 5) {
      s2 = text(instr[2])
      t2 = slot_types[s2]
      if (t2 != null && t2 != T_INT && t2 != "unknown") {
        push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected int`)
      }
      if (length(instr) >= 6) {
        s3 = text(instr[3])
        t3 = slot_types[s3]
        if (t3 != null && t3 != T_INT && t3 != "unknown") {
          push(errors, `type_consistency: instr ${text(i)} op=${op} src2 slot ${s3} has type ${t3}, expected int`)
        }
      }
    } else if (float_ops[op] == true && length(instr) >= 5) {
      s2 = text(instr[2])
      t2 = slot_types[s2]
      if (t2 != null && t2 != T_FLOAT && t2 != "unknown") {
        push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected float`)
      }
      if (length(instr) >= 6) {
        s3 = text(instr[3])
        t3 = slot_types[s3]
        if (t3 != null && t3 != T_FLOAT && t3 != "unknown") {
          push(errors, `type_consistency: instr ${text(i)} op=${op} src2 slot ${s3} has type ${t3}, expected float`)
        }
      }
    } else if (text_ops[op] == true && length(instr) >= 5) {
      s2 = text(instr[2])
      t2 = slot_types[s2]
      if (t2 != null && t2 != T_TEXT && t2 != "unknown") {
        push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected text`)
      }
      if (length(instr) >= 6) {
        s3 = text(instr[3])
        t3 = slot_types[s3]
        if (t3 != null && t3 != T_TEXT && t3 != "unknown") {
          push(errors, `type_consistency: instr ${text(i)} op=${op} src2 slot ${s3} has type ${t3}, expected text`)
        }
      }
    } else if (bool_ops[op] == true && length(instr) >= 5) {
      s2 = text(instr[2])
      t2 = slot_types[s2]
      if (t2 != null && t2 != T_BOOL && t2 != "unknown") {
        push(errors, `type_consistency: instr ${text(i)} op=${op} src1 slot ${s2} has type ${t2}, expected bool`)
      }
    }

    // Clear type info for dest-producing ops
    if (writes_dest[op] == true) {
      slot_types[text(instr[1])] = null
      // Restore type for known-result ops
      if (op == "int" || (op == "access" && is_number(instr[2]))) {
        // already set above
      }
    }
    if (op == "invoke") {
      slot_types[text(instr[2])] = null
    }

    i = i + 1
  }
  return errors
}

// --- Check: nop_consistency ---
// Verifies nop markers are not referenced by jumps.
var check_nop_consistency = function(func) {
  var instructions = func.instructions
  var errors = []
  var nops = {}
  var i = 0
  var instr = null
  var op = null
  var label_pos = null
  var target = null

  if (instructions == null) return errors

  // Collect all nop markers
  while (i < length(instructions)) {
    instr = instructions[i]
    if (is_text(instr) && starts_with(instr, "_nop_")) {
      nops[instr] = true
    }
    i = i + 1
  }

  // Check that no jump targets a nop
  i = 0
  while (i < length(instructions)) {
    instr = instructions[i]
    if (is_array(instr)) {
      op = instr[0]
      label_pos = jump_label_pos[op]
      if (label_pos != null) {
        target = instr[label_pos + 1]
        if (is_text(target) && nops[target] == true) {
          push(errors, `nop_consistency: instr ${text(i)} op=${op} jumps to nop marker "${target}"`)
        }
      }
    }
    i = i + 1
  }
  return errors
}

// --- verify_all ---
// Runs all checks on a function. Returns array of error strings (empty = pass).
var verify_all = function(func, pass_name) {
  var all_errors = []
  var check_errors = null
  var i = 0
  var prefix = pass_name != null ? pass_name + ": " : ""
  var fn_name = func.name != null ? func.name : "<unknown>"

  check_errors = check_slot_bounds(func)
  i = 0
  while (i < length(check_errors)) {
    push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
    i = i + 1
  }

  check_errors = check_jump_targets(func)
  i = 0
  while (i < length(check_errors)) {
    push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
    i = i + 1
  }

  check_errors = check_type_consistency(func)
  i = 0
  while (i < length(check_errors)) {
    push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
    i = i + 1
  }

  check_errors = check_nop_consistency(func)
  i = 0
  while (i < length(check_errors)) {
    push(all_errors, `${prefix}${fn_name}: ${check_errors[i]}`)
    i = i + 1
  }

  return all_errors
}

return {
  verify_all: verify_all,
  check_slot_bounds: check_slot_bounds,
  check_jump_targets: check_jump_targets,
  check_type_consistency: check_type_consistency,
  check_nop_consistency: check_nop_consistency
}