From 747227de40e656ef22f65f745490c4c0d4aac5b8 Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Tue, 10 Feb 2026 06:51:26 -0600 Subject: [PATCH] better parse errors --- docs/language.md | 2 +- parse.cm | 89 ++++++++++++++++++++ tests/decl_restrictions.ce | 165 +++++++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 tests/decl_restrictions.ce diff --git a/docs/language.md b/docs/language.md index 62224d14..962450de 100644 --- a/docs/language.md +++ b/docs/language.md @@ -11,7 +11,7 @@ type: "docs" ### Variables and Constants -Variables are declared with `var`, constants with `def`. All declarations must be initialized and must appear at the function body level — not inside `if`, `while`, `for`, or bare `{}` blocks. +Variables are declared with `var`, constants with `def`. All declarations must be initialized and must appear at the function body level — not inside `if`, `while`, `for`, or `do` blocks. ```javascript var x = 10 diff --git a/parse.cm b/parse.cm index da590c27..40028e04 100644 --- a/parse.cm +++ b/parse.cm @@ -9,6 +9,9 @@ var parse = function(tokens, src, filename, tokenizer) { var tok = null var got_lf = false var prev_tok = null + var _control_depth = 0 + var _control_type = null + var _expecting_body = false var advance = function() { var t = null @@ -176,6 +179,9 @@ var parse = function(tokens, src, filename, tokenizer) { var sub_ast = null var sub_stmt = null var sub_expr = null + var meth_old_cd = 0 + var meth_old_ct = null + var meth_old_eb = false if (k == "number") { node = ast_node("number", start) @@ -399,6 +405,12 @@ var parse = function(tokens, src, filename, tokenizer) { else if (tok.kind == "eof") parse_error(tok, "unterminated method parameter list") if (length(params) > 4) parse_error(tok, "functions cannot have more than 4 parameters") fn.arity = length(params) + meth_old_cd = _control_depth + meth_old_ct = _control_type + meth_old_eb = _expecting_body + _control_depth = 0 + _control_type = null + _expecting_body = false if (tok.kind == "{") { advance() fn.statements = parse_block_statements() @@ -407,6 +419,9 @@ var parse = function(tokens, src, filename, tokenizer) { } else { parse_error(tok, "expected '{' for method body") } + _control_depth = meth_old_cd + _control_type = meth_old_ct + _expecting_body = meth_old_eb fn.function_nr = function_nr function_nr = function_nr + 1 ast_node_end(fn) @@ -788,6 +803,9 @@ var parse = function(tokens, src, filename, tokenizer) { var prev_names = null var pname = null var old_dis = 0 + var old_cd = _control_depth + var old_ct = _control_type + var old_eb = _expecting_body if (in_disruption) { parse_error(tok, "cannot define function inside disruption clause") @@ -834,6 +852,9 @@ var parse = function(tokens, src, filename, tokenizer) { if (length(params) > 4) parse_error(tok, "functions cannot have more than 4 parameters") node.arity = length(params) + _control_depth = 0 + _control_type = null + _expecting_body = false if (tok.kind == "{") { advance() stmts = parse_block_statements() @@ -859,6 +880,9 @@ var parse = function(tokens, src, filename, tokenizer) { } } + _control_depth = old_cd + _control_type = old_ct + _expecting_body = old_eb node.function_nr = function_nr function_nr = function_nr + 1 ast_node_end(node) @@ -875,6 +899,9 @@ var parse = function(tokens, src, filename, tokenizer) { var expr = null var prev_names = null var pname = null + var old_cd = _control_depth + var old_ct = _control_type + var old_eb = _expecting_body node.arrow = true if (in_disruption) { @@ -925,6 +952,9 @@ var parse = function(tokens, src, filename, tokenizer) { advance() } + _control_depth = 0 + _control_type = null + _expecting_body = false if (tok.kind == "{") { advance() stmts = parse_block_statements() @@ -940,6 +970,9 @@ var parse = function(tokens, src, filename, tokenizer) { node.statements = stmts } + _control_depth = old_cd + _control_type = old_ct + _expecting_body = old_eb node.function_nr = function_nr function_nr = function_nr + 1 ast_node_end(node) @@ -971,8 +1004,25 @@ var parse = function(tokens, src, filename, tokenizer) { var elif = null var p1_tok = null var labeled_stmt = null + var depth = 0 + var saved_ct = null + var saved_cd = 0 + var saved_eb = false if (k == "{") { + if (!_expecting_body) { + parse_error(start, "bare block '{ ... }' is not a valid statement; use a function, if, while, or for instead") + advance() + depth = 1 + while (tok.kind != "eof" && depth > 0) { + if (tok.kind == "{") depth = depth + 1 + else if (tok.kind == "}") depth = depth - 1 + if (depth > 0) advance() + } + if (tok.kind == "}") advance() + return null + } + _expecting_body = false node = ast_node("block", start) advance() stmts = parse_block_statements() @@ -983,6 +1033,9 @@ var parse = function(tokens, src, filename, tokenizer) { } if (k == "var" || k == "def") { + if (_control_depth > 0) { + parse_error(start, "'" + k + "' declarations must appear at function body level, not inside '" + _control_type + "'; move this declaration before the '" + _control_type + "' statement") + } kind_name = k is_def = (k == "def") advance() @@ -1009,6 +1062,8 @@ var parse = function(tokens, src, filename, tokenizer) { } } else if (is_def) { parse_error(start, "missing initializer for constant '" + var_name + "'") + } else { + parse_error(start, "'var' declarations must be initialized; use 'var " + var_name + " = null' if no value is needed") } ast_node_end(node) push(decls, node) @@ -1037,6 +1092,11 @@ var parse = function(tokens, src, filename, tokenizer) { else parse_error(tok, "expected ')' after if condition") then_stmts = [] node.then = then_stmts + saved_ct = _control_type + saved_cd = _control_depth + _control_type = "if" + _control_depth = _control_depth + 1 + _expecting_body = true body = parse_statement() if (body != null) push(then_stmts, body) else_ifs = [] @@ -1044,15 +1104,22 @@ var parse = function(tokens, src, filename, tokenizer) { if (tok.kind == "else") { advance() if (tok.kind == "if") { + _control_depth = saved_cd + _control_type = saved_ct elif = parse_statement() if (elif != null) push(else_ifs, elif) + ast_node_end(node) + return node } else { else_stmts = [] node.else = else_stmts + _expecting_body = true body = parse_statement() if (body != null) push(else_stmts, body) } } + _control_depth = saved_cd + _control_type = saved_ct ast_node_end(node) return node } @@ -1068,8 +1135,15 @@ var parse = function(tokens, src, filename, tokenizer) { else parse_error(tok, "expected ')' after while condition") stmts = [] node.statements = stmts + saved_ct = _control_type + saved_cd = _control_depth + _control_type = "while" + _control_depth = _control_depth + 1 + _expecting_body = true body = parse_statement() if (body != null) push(stmts, body) + _control_depth = saved_cd + _control_type = saved_ct ast_node_end(node) return node } @@ -1079,8 +1153,15 @@ var parse = function(tokens, src, filename, tokenizer) { advance() stmts = [] node.statements = stmts + saved_ct = _control_type + saved_cd = _control_depth + _control_type = "do" + _control_depth = _control_depth + 1 + _expecting_body = true body = parse_statement() if (body != null) push(stmts, body) + _control_depth = saved_cd + _control_type = saved_ct if (tok.kind == "while") advance() else parse_error(tok, "expected 'while' after do body") if (tok.kind == "(") advance() @@ -1101,6 +1182,7 @@ var parse = function(tokens, src, filename, tokenizer) { else parse_error(tok, "expected '(' after for") if (tok.kind != ";") { if (tok.kind == "var" || tok.kind == "def") { + parse_error(tok, "'" + tok.kind + "' declarations cannot appear in the for initializer; declare variables before the for loop") init = parse_statement() node.init = init } else { @@ -1124,8 +1206,15 @@ var parse = function(tokens, src, filename, tokenizer) { else parse_error(tok, "expected ')' after for clauses") stmts = [] node.statements = stmts + saved_ct = _control_type + saved_cd = _control_depth + _control_type = "for" + _control_depth = _control_depth + 1 + _expecting_body = true body = parse_statement() if (body != null) push(stmts, body) + _control_depth = saved_cd + _control_type = saved_ct ast_node_end(node) return node } diff --git a/tests/decl_restrictions.ce b/tests/decl_restrictions.ce new file mode 100644 index 00000000..c2ce2280 --- /dev/null +++ b/tests/decl_restrictions.ce @@ -0,0 +1,165 @@ +// Declaration restriction tests +// Run: ./cell tests/decl_restrictions.ce + +var tokenize = use("tokenize") +var parse = use("parse") + +var passed = 0 +var failed = 0 +var error_names = [] +var error_reasons = [] +var fail_msg = "" + +var _i = 0 +for (_i = 0; _i < 20; _i++) { + error_names[] = null + error_reasons[] = null +} + +var fail = function(msg) { + fail_msg = msg + disrupt +} + +var run = function(name, fn) { + fail_msg = "" + fn() + passed = passed + 1 +} disruption { + error_names[failed] = name + error_reasons[failed] = fail_msg == "" ? "disruption" : fail_msg + failed = failed + 1 +} + +var parse_snippet = function(src) { + var result = tokenize(src, "") + var ast = parse(result.tokens, src, "", tokenize) + return ast +} + +var has_error = function(ast, substring) { + if (ast.errors == null) return false + var i = 0 + while (i < length(ast.errors)) { + if (search(ast.errors[i].message, substring) != null) return true + i = i + 1 + } + return false +} + +var has_no_errors = function(ast) { + return ast.errors == null || length(ast.errors) == 0 +} + +// === BARE BLOCK === + +run("bare block rejected", function() { + var ast = parse_snippet("{ var x = 1 }") + if (!has_error(ast, "bare block")) fail("expected 'bare block' error, got: " + text(ast.errors)) +}) + +// === VAR IN IF (braces) === + +run("var in if braces", function() { + var ast = parse_snippet("if (true) { var x = 1 }") + if (!has_error(ast, "not inside 'if'")) fail("expected 'not inside if' error, got: " + text(ast.errors)) +}) + +// === VAR IN IF (no braces) === + +run("var in if no braces", function() { + var ast = parse_snippet("if (true) var x = 1") + if (!has_error(ast, "not inside 'if'")) fail("expected 'not inside if' error, got: " + text(ast.errors)) +}) + +// === VAR IN WHILE === + +run("var in while", function() { + var ast = parse_snippet("while (true) { var x = 1; break }") + if (!has_error(ast, "not inside 'while'")) fail("expected 'not inside while' error, got: " + text(ast.errors)) +}) + +// === VAR IN FOR INIT === + +run("var in for init", function() { + var ast = parse_snippet("for (var i = 0; i < 1; i++) {}") + if (!has_error(ast, "for initializer")) fail("expected 'for initializer' error, got: " + text(ast.errors)) +}) + +// === VAR IN FOR BODY === + +run("var in for body", function() { + var ast = parse_snippet("var i = 0; for (i = 0; i < 1; i++) { var x = 1 }") + if (!has_error(ast, "not inside 'for'")) fail("expected 'not inside for' error, got: " + text(ast.errors)) +}) + +// === VAR IN DO === + +run("var in do", function() { + var ast = parse_snippet("do { var x = 1; break } while (true)") + if (!has_error(ast, "not inside 'do'")) fail("expected 'not inside do' error, got: " + text(ast.errors)) +}) + +// === DEF IN IF === + +run("def in if", function() { + var ast = parse_snippet("if (true) { def x = 1 }") + if (!has_error(ast, "not inside 'if'")) fail("expected 'not inside if' error, got: " + text(ast.errors)) +}) + +// === UNINITIALIZED VAR === + +run("uninitialized var", function() { + var ast = parse_snippet("var x") + if (!has_error(ast, "must be initialized")) fail("expected 'must be initialized' error, got: " + text(ast.errors)) +}) + +// === NESTED: VAR IN IF INSIDE WHILE === + +run("nested var in if inside while", function() { + var ast = parse_snippet("while (true) { if (true) { var x = 1 }; break }") + if (!has_error(ast, "not inside 'if'")) fail("expected 'not inside if' error, got: " + text(ast.errors)) +}) + +// === VALID: NESTED FUNCTION === + +run("valid nested fn in control flow", function() { + var ast = parse_snippet("if (true) { var fn = function() { var x = 1; return x } }") + // The var inside the function is fine; only the var fn = ... inside if should error + if (!has_error(ast, "not inside 'if'")) fail("expected error for var fn inside if") + // But there should NOT be an error about var x inside the function body + var i = 0 + var bad = false + if (ast.errors != null) { + while (i < length(ast.errors)) { + if (search(ast.errors[i].message, "var x") != null) bad = true + i = i + 1 + } + } + if (bad) fail("should not error on var inside nested function") +}) + +// === VALID: NORMAL VAR === + +run("valid normal var", function() { + var ast = parse_snippet("var x = 1") + if (!has_no_errors(ast)) fail("expected no errors, got: " + text(ast.errors)) +}) + +// === VALID: VAR IN FUNCTION BODY === + +run("valid var in function body", function() { + var ast = parse_snippet("var fn = function() { var x = 1; return x }") + if (!has_no_errors(ast)) fail("expected no errors, got: " + text(ast.errors)) +}) + +// === SUMMARY === + +print(text(passed) + " passed, " + text(failed) + " failed out of " + text(passed + failed)) +var _j = 0 +if (failed > 0) { + print("") + for (_j = 0; _j < failed; _j++) { + print(" FAIL " + error_names[_j] + ": " + error_reasons[_j]) + } +}