comprehensive testing for regression analysis

2026-02-12 18:15:03 -06:00
parent 4aedb8b0c5
commit 89e34ba71d
13 changed files with 25599 additions and 19153 deletions
--- a/test.ce
+++ b/test.ce
@@ -14,6 +14,13 @@ var target_pkg = null // null = current package
 var target_test = null // null = all tests, otherwise specific test file
 var all_pkgs = false
 var gc_after_each_test = false
+var verify_ir = false
+var diff_mode = false
+
+var os_ref = use('os')
+var analyze = os_ref.analyze
+var run_ast_fn = os_ref.run_ast_fn
+var run_ast_noopt_fn = os_ref.run_ast_noopt_fn

 // Actor test support
 def ACTOR_TEST_TIMEOUT = 30000 // 30 seconds timeout for actor tests
@@ -46,6 +53,11 @@ function get_current_package_name() {
 //   cell test package <name>           - run all tests for named package
 //   cell test package <name> <test>    - run specific test in named package
 //   cell test package all              - run all tests from all packages
+//
+// Flags:
+//   -g        - run GC after each test
+//   --verify  - enable IR verification (validates mcode IR after each optimizer pass)
+//   --diff    - enable differential testing (run each test optimized and unoptimized, compare results)

 function parse_args() {
    var cleaned_args = []
@@ -57,6 +69,10 @@ function parse_args() {
    for (i = 0; i < length(_args); i++) {
        if (_args[i] == '-g') {
            gc_after_each_test = true
+        } else if (_args[i] == '--verify') {
+            verify_ir = true
+        } else if (_args[i] == '--diff') {
+            diff_mode = true
        } else {
            push(cleaned_args, _args[i])
        }
@@ -162,6 +178,77 @@ if (!parse_args()) {
    return
 }

+// Enable IR verification if requested
+if (verify_ir) {
+    os_ref._verify_ir = true
+    log.console('IR verification enabled')
+}
+
+if (diff_mode && !run_ast_noopt_fn) {
+    log.console('error: --diff requires run_ast_noopt_fn (rebuild bootstrap)')
+    $stop()
+    return
+}
+
+// Diff mode: deep comparison helper
+function values_equal(a, b) {
+    var i = 0
+    if (a == b) return true
+    if (is_null(a) && is_null(b)) return true
+    if (is_null(a) || is_null(b)) return false
+    if (is_array(a) && is_array(b)) {
+        if (length(a) != length(b)) return false
+        i = 0
+        while (i < length(a)) {
+            if (!values_equal(a[i], b[i])) return false
+            i = i + 1
+        }
+        return true
+    }
+    return false
+}
+
+function describe(val) {
+    if (is_null(val)) return "null"
+    if (is_text(val)) return `"${val}"`
+    if (is_number(val)) return text(val)
+    if (is_logical(val)) return text(val)
+    if (is_function(val)) return "<function>"
+    return "<other>"
+}
+
+// Diff mode: run a test function through noopt and compare
+var diff_mismatches = 0
+function diff_check(test_name, file_path, opt_fn, noopt_fn) {
+    if (!diff_mode) return
+    var opt_result = null
+    var noopt_result = null
+    var opt_err = null
+    var noopt_err = null
+
+    var _opt = function() {
+        opt_result = opt_fn()
+    } disruption {
+        opt_err = "disrupted"
+    }
+    _opt()
+
+    var _noopt = function() {
+        noopt_result = noopt_fn()
+    } disruption {
+        noopt_err = "disrupted"
+    }
+    _noopt()
+
+    if (opt_err != noopt_err) {
+        log.console(`    DIFF ${test_name}: disruption mismatch opt=${opt_err != null ? opt_err : "ok"} noopt=${noopt_err != null ? noopt_err : "ok"}`)
+        diff_mismatches = diff_mismatches + 1
+    } else if (!values_equal(opt_result, noopt_result)) {
+        log.console(`    DIFF ${test_name}: result mismatch opt=${describe(opt_result)} noopt=${describe(noopt_result)}`)
+        diff_mismatches = diff_mismatches + 1
+    }
+}
+
 function ensure_dir(path) {
    if (fd.is_dir(path)) return true

@@ -320,9 +407,26 @@ function run_tests(package_name, specific_test) {

        _load_file = function() {
            var test_mod = null
+            var test_mod_noopt = null
            var use_pkg = package_name ? package_name : fd.realpath('.')
+            var _load_noopt = null
            test_mod = shop.use(mod_path, use_pkg)

+            // Load noopt version for diff mode
+            if (diff_mode) {
+                _load_noopt = function() {
+                    var src_path = prefix + '/' + f
+                    var src = text(fd.slurp(src_path))
+                    var ast = analyze(src, src_path)
+                    test_mod_noopt = run_ast_noopt_fn(mod_path + '_noopt', ast, {
+                        use: function(path) { return shop.use(path, use_pkg) }
+                    })
+                } disruption {
+                    log.console(`    DIFF: failed to load noopt module for ${f}`)
+                }
+                _load_noopt()
+            }
+
            var tests = []
            var j = 0
            var t = null
@@ -406,6 +510,12 @@ function run_tests(package_name, specific_test) {
                        }
                    }
                    _run_one()
+
+                    // Differential check: compare opt vs noopt
+                    if (diff_mode && test_mod_noopt && is_object(test_mod_noopt) && is_function(test_mod_noopt[t.name])) {
+                        diff_check(t.name, f, t.fn, test_mod_noopt[t.name])
+                    }
+
                    end_time = time.number()
                    test_entry.duration_ns = round((end_time - start_time) * 1000000000)

@@ -635,6 +745,9 @@ function finalize_results() {

    log.console(`----------------------------------------`)
    log.console(`Tests: ${totals.passed} passed, ${totals.failed} failed, ${totals.total} total`)
+    if (diff_mode) {
+        log.console(`Diff mismatches: ${text(diff_mismatches)}`)
+    }

    generate_reports(totals)
    $stop()
@@ -652,6 +765,9 @@ if (length(all_actor_tests) == 0) {

    log.console(`----------------------------------------`)
    log.console(`Tests: ${totals.passed} passed, ${totals.failed} failed, ${totals.total} total`)
+    if (diff_mode) {
+        log.console(`Diff mismatches: ${text(diff_mismatches)}`)
+    }
 } else {
    $delay(check_timeouts, 1000)
 }