287 Commits

Author SHA1 Message Date
John Alanbrook
4b7cde9400 progress on aot 2026-02-16 21:58:45 -06:00
John Alanbrook
a1ee7dd458 better json pretty print 2026-02-16 17:00:06 -06:00
John Alanbrook
9dbe699033 better make 2026-02-16 01:45:00 -06:00
John Alanbrook
f809cb05f0 Merge branch 'fix_core_scripts' into quicken_mcode 2026-02-16 01:43:08 -06:00
John Alanbrook
788ea98651 bootstrap init 2026-02-16 01:36:36 -06:00
John Alanbrook
433ce8a86e update actors 2026-02-16 01:35:07 -06:00
John Alanbrook
cd6e357b6e Merge branch 'quicken_mcode' into gen_dylib 2026-02-16 00:35:40 -06:00
John Alanbrook
f4f56ed470 run dylibs 2026-02-16 00:35:23 -06:00
John Alanbrook
ff61ab1f50 better streamline 2026-02-16 00:34:49 -06:00
John Alanbrook
46c345d34e cache invalidation 2026-02-16 00:04:30 -06:00
John Alanbrook
dc440587ff pretty json 2026-02-15 22:55:11 -06:00
John Alanbrook
8f92870141 correct syntax errors in core scripts 2026-02-15 22:23:04 -06:00
John Alanbrook
7fc4a205f6 go reuses frames 2026-02-15 19:45:17 -06:00
John Alanbrook
23b201bdd7 dynamic dispatch 2026-02-15 17:51:07 -06:00
John Alanbrook
913ec9afb1 Merge branch 'audit_gc' into fix_slots 2026-02-15 15:44:28 -06:00
John Alanbrook
56de0ce803 fix infinite loop in shop 2026-02-15 15:41:09 -06:00
John Alanbrook
96bbb9e4c8 idompent 2026-02-15 14:58:46 -06:00
John Alanbrook
ebd624b772 fixing gc bugs; nearly idempotent 2026-02-15 13:14:26 -06:00
John Alanbrook
7de20b39da more detail on broken pipeline and vm suit tests 2026-02-15 11:51:23 -06:00
John Alanbrook
ee646db394 failsafe boot mode 2026-02-15 11:44:33 -06:00
John Alanbrook
ff80e0d30d Merge branch 'fix_gc' into pitweb 2026-02-15 10:04:54 -06:00
John Alanbrook
d9f41db891 fix syntax errors in build 2026-02-15 09:29:07 -06:00
John Alanbrook
860632e0fa update cli docs and fix cli scripts with new syntax 2026-02-14 22:24:32 -06:00
John Alanbrook
dcc9659e6b Merge branch 'runtime_rework' into fix_gc 2026-02-14 22:11:31 -06:00
John Alanbrook
2f7f2233b8 compiling 2026-02-14 22:08:55 -06:00
John Alanbrook
eee06009b9 no more special case for core C 2026-02-14 22:00:12 -06:00
John Alanbrook
a765872017 remove if/else dispatch from compile chain 2026-02-14 17:57:48 -06:00
John Alanbrook
a93218e1ff faster streamline 2026-02-14 17:14:43 -06:00
John Alanbrook
f2c4fa2f2b remove redundant check 2026-02-14 16:49:16 -06:00
John Alanbrook
5fe05c60d3 faster gc 2026-02-14 16:46:11 -06:00
John Alanbrook
e75596ce30 respsect array and object length requests 2026-02-14 15:42:19 -06:00
John Alanbrook
86609c27f8 correct sections 2026-02-14 15:13:18 -06:00
John Alanbrook
356c51bde3 better array allocation 2026-02-14 14:44:00 -06:00
John Alanbrook
89421e11a4 pull out prettify mcode 2026-02-14 14:14:34 -06:00
John Alanbrook
e5fc04fecd faster mach compile 2026-02-14 14:02:15 -06:00
John Alanbrook
8ec56e85fa shop audit 2026-02-14 14:00:27 -06:00
John Alanbrook
f49ca530bb fix delete gc bug 2026-02-13 21:52:37 -06:00
John Alanbrook
83263379bd ocaml style rooting macros 2026-02-13 20:46:31 -06:00
John Alanbrook
e80e615634 fix array gc bug; new gc error chasing 2026-02-13 16:58:42 -06:00
John Alanbrook
c1430fd59b Merge branch 'fix_gc' into runtime_rework 2026-02-13 15:42:37 -06:00
John Alanbrook
db73eb4eeb Merge branch 'mcode_streamline' into runtime_rework 2026-02-13 15:42:20 -06:00
John Alanbrook
f2556c5622 proper shop caching 2026-02-13 09:04:25 -06:00
John Alanbrook
291304f75d new way to track actor bad memory access 2026-02-13 09:03:33 -06:00
John Alanbrook
3795533554 clean up bytecode 2026-02-13 09:03:00 -06:00
John Alanbrook
d26a96bc62 cached bootstrap 2026-02-13 08:11:35 -06:00
John Alanbrook
0acaabd5fa merge add 2026-02-13 08:09:12 -06:00
John Alanbrook
1ba060668e growable buddy memory runtime 2026-02-13 07:59:52 -06:00
John Alanbrook
77fa058135 mach loading 2026-02-13 07:26:49 -06:00
John Alanbrook
f7e2ff13b5 guard hoisting 2026-02-13 06:32:58 -06:00
John Alanbrook
36fd0a35f9 Merge branch 'fix_gc' into mcode_streamline 2026-02-13 05:59:11 -06:00
John Alanbrook
77c02bf9bf simplify text 2026-02-13 05:59:01 -06:00
John Alanbrook
f251691146 Merge branch 'mach_memory' into mcode_streamline 2026-02-13 05:58:21 -06:00
John Alanbrook
e9ea6ec299 Merge branch 'runtime_rework' into mach_memory 2026-02-13 05:54:28 -06:00
John Alanbrook
bf5fdbc688 backward inference 2026-02-13 05:39:25 -06:00
John Alanbrook
b960d03eeb immediate ascii for string path 2026-02-13 05:35:11 -06:00
John Alanbrook
b4d42fb83d stone pool renamed to constant pool - more appropriate 2026-02-13 05:17:22 -06:00
John Alanbrook
0a680a0cd3 gc print 2026-02-13 05:03:45 -06:00
John Alanbrook
9f0fd84f4f fix growing gc 2026-02-13 04:33:32 -06:00
John Alanbrook
cb9d6e0c0e mmap for poison heap 2026-02-13 04:03:36 -06:00
John Alanbrook
4f18a0b524 tco 2026-02-13 03:57:18 -06:00
John Alanbrook
f296a0c10d fix segv 2026-02-13 03:08:27 -06:00
John Alanbrook
1df6553577 Merge branch 'runtime_rework' into mcode_streamline 2026-02-13 02:52:54 -06:00
John Alanbrook
30a9cfee79 simplify gc model 2026-02-13 02:33:25 -06:00
John Alanbrook
6fff96d9d9 lower intrinsics in mcode 2026-02-13 02:31:16 -06:00
John Alanbrook
4a50d0587d guards in mcode 2026-02-13 02:30:41 -06:00
John Alanbrook
e346348eb5 Merge branch 'fix_gc' into mcode_streamline 2026-02-12 19:15:13 -06:00
John Alanbrook
ff560973f3 Merge branch 'fix_gc' into runtime_rework 2026-02-12 18:57:44 -06:00
John Alanbrook
de4b3079d4 organize 2026-02-12 18:53:06 -06:00
John Alanbrook
29227e655b Merge branch 'pretty_mcode' into mcode_streamline 2026-02-12 18:48:17 -06:00
John Alanbrook
588e88373e Merge branch 'fix_ternary' into pretty_mcode 2026-02-12 18:46:04 -06:00
John Alanbrook
9aca365771 Merge branch 'runtime_rework' into pretty_mcode 2026-02-12 18:44:56 -06:00
John Alanbrook
c56d4d5c3c some cleanup 2026-02-12 18:44:09 -06:00
John Alanbrook
c1e101b24f benchmarks 2026-02-12 18:41:15 -06:00
John Alanbrook
9f0dfbc6a2 fix ternary operator in object literals 2026-02-12 18:33:43 -06:00
John Alanbrook
5c9403a43b compiler optimization output 2026-02-12 18:27:19 -06:00
John Alanbrook
89e34ba71d comprehensive testing for regression analysis 2026-02-12 18:15:03 -06:00
John Alanbrook
73bfa8d7b1 rm some functions 2026-02-12 18:08:56 -06:00
John Alanbrook
4aedb8b0c5 Merge branch 'cli_audit' into ir_artifact 2026-02-12 17:20:45 -06:00
John Alanbrook
ec072f3b63 Merge branch 'runtime_rework' into ir_artifact 2026-02-12 17:18:23 -06:00
John Alanbrook
65755d9c0c fix using old mach 2026-02-12 17:17:12 -06:00
John Alanbrook
19524b3a53 faster json decode 2026-02-12 17:06:48 -06:00
John Alanbrook
f901332c5b clean up cli 2026-02-12 16:45:10 -06:00
John Alanbrook
add136c140 Merge branch 'pretty_mcode' into runtime_rework 2026-02-12 16:36:58 -06:00
John Alanbrook
c1a99dfd4c mcode looks better 2026-02-12 16:36:53 -06:00
John Alanbrook
7b46c6e947 update docs 2026-02-12 16:34:45 -06:00
John Alanbrook
1efb0b1bc9 run with mcode 2026-02-12 16:14:46 -06:00
John Alanbrook
0ba2783b48 Merge branch 'bytecode_cleanup' into mach 2026-02-12 14:08:45 -06:00
John Alanbrook
6de542f0d0 Merge branch 'mach_suite_fix' into bytecode_cleanup 2026-02-12 12:32:06 -06:00
John Alanbrook
6ba4727119 rm call 2026-02-12 11:58:29 -06:00
John Alanbrook
900db912a5 streamline mcode 2026-02-12 09:43:13 -06:00
John Alanbrook
b771b2b5d8 suite passes now with mcode->mach lowering 2026-02-12 09:40:24 -06:00
John Alanbrook
68fb440502 Merge branch 'mach' into bytecode_cleanup 2026-02-12 07:50:09 -06:00
John Alanbrook
e7a2f16004 mcode to mach 2026-02-12 05:23:33 -06:00
John Alanbrook
3a8a17ab60 mcode->mach 2026-02-12 04:28:14 -06:00
John Alanbrook
8a84be65e1 new path 2026-02-11 14:41:37 -06:00
John Alanbrook
c1910ee1db Merge branch 'mcode2' into mach 2026-02-11 13:16:07 -06:00
John Alanbrook
7036cdf2d1 Merge branch 'mach' into bytecode_cleanup 2026-02-11 13:15:20 -06:00
John Alanbrook
fbeec17ce5 simplifications 2026-02-11 13:15:04 -06:00
John Alanbrook
2c55ae8cb2 quiesence exit 2026-02-11 11:50:29 -06:00
John Alanbrook
259bc139fc rm stack usage 2026-02-11 10:17:55 -06:00
John Alanbrook
a252412eca removal of old code 2026-02-11 09:47:30 -06:00
John Alanbrook
b327e16463 rm unused functions 2026-02-11 09:09:40 -06:00
John Alanbrook
da6f096a56 qbe rt 2026-02-10 20:28:51 -06:00
John Alanbrook
1320ef9f47 Merge branch 'mcode2' into mach 2026-02-10 19:04:35 -06:00
John Alanbrook
ed4a5474d5 Merge branch 'mach' into mcode2 2026-02-10 19:04:22 -06:00
John Alanbrook
f52dd80d52 fix compile error 2026-02-10 19:02:42 -06:00
John Alanbrook
504e268b9d run native modules 2026-02-10 18:52:11 -06:00
John Alanbrook
0d47002167 add compile script 2026-02-10 18:35:18 -06:00
John Alanbrook
b65db63447 remove vm_test, update test harness 2026-02-10 17:52:57 -06:00
John Alanbrook
c1ccff5437 fix >256 object literal error 2026-02-10 17:42:58 -06:00
John Alanbrook
2f681fa366 output for parser stages and c runtime doc 2026-02-10 17:38:15 -06:00
John Alanbrook
682b1cf9cf Merge branch 'pitweb' into mcode2 2026-02-10 17:29:03 -06:00
John Alanbrook
ddf3fc1c77 add object literal test 2026-02-10 17:28:59 -06:00
John Alanbrook
f1a5072ff2 fix increment operators on objects 2026-02-10 17:17:36 -06:00
John Alanbrook
f44fb502be string literal error 2026-02-10 17:02:22 -06:00
John Alanbrook
d75ce916d7 compile optimization 2026-02-10 16:37:11 -06:00
John Alanbrook
fe5dc6ecc9 fix fd.c bugs 2026-02-10 14:21:49 -06:00
John Alanbrook
54673e4a04 better disrupt logging; actor exit on crash 2026-02-10 12:38:06 -06:00
John Alanbrook
0d8b5cfb04 bootstrap loads engine 2026-02-10 12:13:18 -06:00
John Alanbrook
3d71f4a363 Merge branch 'mach' into pitweb 2026-02-10 11:15:44 -06:00
John Alanbrook
4deb0e2577 new syntax for internals 2026-02-10 11:03:01 -06:00
John Alanbrook
67b96e1627 add test for multiple declaration 2026-02-10 10:39:23 -06:00
John Alanbrook
4e5f1d8faa fix labeled loops, do-while, shorthand property syntax, and added more tests 2026-02-10 10:32:54 -06:00
John Alanbrook
bd577712d9 fix function shorthand default params 2026-02-10 10:13:46 -06:00
John Alanbrook
6df3b741cf add runtime warnings for stale files 2026-02-10 10:05:27 -06:00
John Alanbrook
178837b88d bootstrap 2026-02-10 09:53:41 -06:00
John Alanbrook
120ce9d30c Merge branch 'mcode2' into pitweb 2026-02-10 09:23:30 -06:00
John Alanbrook
58f185b379 fix merge 2026-02-10 09:21:33 -06:00
John Alanbrook
f7b5252044 core flag 2026-02-10 09:21:21 -06:00
John Alanbrook
ded5f7d74b cell shop env var 2026-02-10 09:13:10 -06:00
John Alanbrook
fe6033d6cb deploy website script 2026-02-10 08:12:51 -06:00
John Alanbrook
60e61eef76 scheduler starts 2026-02-10 08:12:42 -06:00
John Alanbrook
ad863fb89b postfix/prefix operators handled correctly 2026-02-10 08:12:27 -06:00
John Alanbrook
96f8157039 Merge branch 'mach' into mcode2 2026-02-10 07:38:35 -06:00
John Alanbrook
c4ff0bc109 intrinsics rewritten without ++, --, etc 2026-02-10 07:19:45 -06:00
John Alanbrook
877250b1d8 decomposed mcode 2026-02-10 07:12:27 -06:00
John Alanbrook
747227de40 better parse errors 2026-02-10 06:51:26 -06:00
John Alanbrook
3f7e34cd7a more useful parse errors 2026-02-10 06:08:15 -06:00
John Alanbrook
cef5c50169 add is_letter intrinsic 2026-02-10 06:00:47 -06:00
John Alanbrook
0428424ec7 Merge branch 'mach' into mcode2 2026-02-10 05:53:51 -06:00
John Alanbrook
78e64c5067 optimize parse 2026-02-10 05:53:49 -06:00
John Alanbrook
ff11c49c39 optimize tokenize 2026-02-10 05:52:19 -06:00
John Alanbrook
b8b110b616 bootstrap with serialized mach 2026-02-09 22:54:42 -06:00
John Alanbrook
930dcfba36 Merge branch 'mach' into mqbe 2026-02-09 22:22:15 -06:00
John Alanbrook
eeccb3b34a bootstrap 2026-02-09 22:21:55 -06:00
John Alanbrook
407797881c bytecode serialization 2026-02-09 22:19:41 -06:00
John Alanbrook
7069475729 Merge branch 'pitweb' into mcode2 2026-02-09 20:33:56 -06:00
John Alanbrook
3e42c57479 rm tokenizer/parser/mcode generators from C 2026-02-09 20:05:50 -06:00
John Alanbrook
4b76728230 ast folding 2026-02-09 20:04:40 -06:00
John Alanbrook
4ff9332d38 lsp 2026-02-09 18:53:13 -06:00
John Alanbrook
27e852af5b Merge branch 'mach' into mqbe 2026-02-09 18:46:10 -06:00
John Alanbrook
66a44595c8 fix errors with mcode 2026-02-09 18:45:55 -06:00
John Alanbrook
fc0a1547dc Merge branch 'mach' into mqbe 2026-02-09 18:36:47 -06:00
John Alanbrook
c0b4e70eb2 fix two gc bugs 2026-02-09 18:32:41 -06:00
John Alanbrook
f4714b2b36 qbe macros 2026-02-09 18:17:31 -06:00
John Alanbrook
7f691fd52b fix mach vm suite errors 2026-02-09 18:12:44 -06:00
John Alanbrook
d5209e1d59 fix issues with parse.cm and tokenize.cm 2026-02-09 17:43:44 -06:00
John Alanbrook
68e2395b92 mcode generators 2026-02-09 17:01:39 -06:00
John Alanbrook
1b747720b7 fix regex parser error 2026-02-09 14:34:33 -06:00
John Alanbrook
849123d8fc streamlined cell running 2026-02-09 13:12:05 -06:00
John Alanbrook
6ad919624b Merge branch 'mcode2' into mach 2026-02-09 12:58:05 -06:00
John Alanbrook
a11f3e7d47 Merge branch 'pitweb' into mach 2026-02-09 12:57:01 -06:00
John Alanbrook
3d1fd37979 rm quickjs vm 2026-02-09 12:54:55 -06:00
John Alanbrook
8fc9bfe013 parse and tokenize modules 2026-02-09 12:19:05 -06:00
John Alanbrook
368511f666 parse.ce and tokenize.ce 2026-02-09 11:56:09 -06:00
John Alanbrook
3934cdb683 fix disrupts 2026-02-09 11:28:10 -06:00
John Alanbrook
45556c344d Merge branch 'pitweb' into mach 2026-02-09 11:17:45 -06:00
John Alanbrook
bc87fe5f70 string indexing 2026-02-09 11:17:42 -06:00
John Alanbrook
790293d915 Merge branch 'mach' into pitweb 2026-02-09 11:15:44 -06:00
John Alanbrook
872cd6ab51 more correct syntax and AI instructions 2026-02-09 11:00:23 -06:00
John Alanbrook
e04ab4c30c bootstrap 2026-02-09 10:56:15 -06:00
John Alanbrook
0503acb7e6 rm block scope 2026-02-09 10:11:22 -06:00
John Alanbrook
d0c68d7a7d Merge branch 'mcode2' into pitweb 2026-02-09 10:00:28 -06:00
John Alanbrook
7469383e66 refactor into multiple files 2026-02-08 16:32:14 -06:00
John Alanbrook
1fee8f9f8b condense jsruntime and jscontext 2026-02-08 10:10:42 -06:00
John Alanbrook
a4f3b025c5 update 2026-02-08 08:25:48 -06:00
John Alanbrook
d18ea1b330 update engine.cm 2026-02-08 08:24:49 -06:00
John Alanbrook
4de0659474 allow tokens as properties 2026-02-08 00:34:15 -06:00
John Alanbrook
27a9b72b07 functino tests; default args for mach and mcode 2026-02-08 00:31:18 -06:00
John Alanbrook
a3622bd5bd better parser error reporting 2026-02-08 00:23:47 -06:00
John Alanbrook
2f6700415e add functinos 2026-02-07 23:38:39 -06:00
John Alanbrook
243d92f7f3 rm ?? and .? 2026-02-07 22:09:40 -06:00
John Alanbrook
8f9d026b9b use casesensitive json 2026-02-07 17:01:11 -06:00
John Alanbrook
2c9ac8f7b6 no json roundtrip for mcode 2026-02-07 16:29:04 -06:00
John Alanbrook
80f24e131f all suite asan errors fixed for mcode 2026-02-07 16:15:58 -06:00
John Alanbrook
a8f8af7662 Merge branch 'mach' into mcode2 2026-02-07 15:49:38 -06:00
John Alanbrook
f5b3494762 memfree for mcode 2026-02-07 15:49:36 -06:00
John Alanbrook
13a6f6c79d faster mach bytecode generation 2026-02-07 15:49:09 -06:00
John Alanbrook
1a925371d3 faster parsing 2026-02-07 15:38:36 -06:00
John Alanbrook
08d2bacb1f improve ast parsing time 2026-02-07 15:22:18 -06:00
John Alanbrook
7322153e57 Merge branch 'mach' into mcode2 2026-02-07 14:53:41 -06:00
John Alanbrook
cc72c4cb0f fix mem errors for mcode 2026-02-07 14:53:35 -06:00
John Alanbrook
ae1f09a28f fix all memory errors 2026-02-07 14:53:14 -06:00
John Alanbrook
3c842912a1 gc fixing in mach vm 2026-02-07 14:25:04 -06:00
John Alanbrook
7cacf32078 Merge branch 'mach' into mcode2 2026-02-07 14:24:52 -06:00
John Alanbrook
b740612761 gc fixing in mach vm 2026-02-07 14:24:49 -06:00
John Alanbrook
6001c2b4bb Merge branch 'mach' into mcode2 2026-02-07 14:19:19 -06:00
John Alanbrook
98625fa15b mcode fix tests 2026-02-07 14:19:17 -06:00
John Alanbrook
87fafa44c8 fix last error 2026-02-07 13:43:13 -06:00
John Alanbrook
45ce76aef7 fixes 2026-02-07 12:50:46 -06:00
John Alanbrook
32fb44857c 1 test failing now 2026-02-07 12:50:26 -06:00
John Alanbrook
31d67f6710 fix vm suite tests 2026-02-07 12:34:18 -06:00
John Alanbrook
bae4e957e9 hugo website for pit 2026-02-07 12:01:58 -06:00
John Alanbrook
3621b1ef33 Merge branch 'mach' into mcode2 2026-02-07 11:53:44 -06:00
John Alanbrook
836227c8d3 fix mach proxy and templates 2026-02-07 11:53:39 -06:00
John Alanbrook
0ae59705d4 fix errors 2026-02-07 11:53:26 -06:00
John Alanbrook
8e2607b6ca Merge branch 'mcode2' into mach 2026-02-07 10:54:19 -06:00
John Alanbrook
dc73e86d8c handle mcode in callinternal 2026-02-07 10:51:45 -06:00
John Alanbrook
555cceb9d6 fixed text runner 2026-02-07 10:51:27 -06:00
John Alanbrook
fbb7933eb6 Merge branch 'mcode2' into mach 2026-02-07 10:40:20 -06:00
John Alanbrook
0287d6ada4 regex uses C strings now 2026-02-07 10:28:35 -06:00
John Alanbrook
73cd6a255d more test fixing 2026-02-07 07:59:52 -06:00
John Alanbrook
83ea67c01b Merge branch 'mach' into mcode2 2026-02-07 00:10:01 -06:00
John Alanbrook
16059cca4e fix tests 2026-02-07 00:09:58 -06:00
John Alanbrook
9ffe60ebef vm suite 2026-02-07 00:09:41 -06:00
John Alanbrook
2beafec5d9 fix tests 2026-02-07 00:09:21 -06:00
John Alanbrook
aba8eb66bd crash fixes 2026-02-06 23:38:56 -06:00
John Alanbrook
1abcaa92c7 Merge branch 'mach' into mcode2 2026-02-06 23:20:55 -06:00
John Alanbrook
168f7c71d5 fix text header chasing 2026-02-06 23:20:48 -06:00
John Alanbrook
56ed895b6e Merge branch 'mach' into mcode2 2026-02-06 23:15:38 -06:00
John Alanbrook
1e4646999d fix mach crashes 2026-02-06 23:15:33 -06:00
John Alanbrook
68d6c907fe fix mcode compilation 2026-02-06 23:13:13 -06:00
John Alanbrook
8150c64c7d pitcode 2026-02-06 22:58:21 -06:00
John Alanbrook
024d796ca4 add asan error vm stacktrace 2026-02-06 21:49:53 -06:00
John Alanbrook
ea185dbffd rm typeof 2026-02-06 21:26:45 -06:00
John Alanbrook
6571262af0 mach disrupt support 2026-02-06 21:09:18 -06:00
John Alanbrook
77ae133747 Merge branch 'mcode2' into mach 2026-02-06 20:45:57 -06:00
John Alanbrook
142a2d518b Merge branch 'stacktrace' into mach 2026-02-06 20:44:43 -06:00
John Alanbrook
5b65c64fe5 stack traces 2026-02-06 20:44:38 -06:00
John Alanbrook
e985fa5fe1 disrupt/disruption; remove try/catch 2026-02-06 18:40:56 -06:00
John Alanbrook
160ade2410 smarter gc malloc for large allocations 2026-02-06 18:38:23 -06:00
John Alanbrook
e2bc5948c1 fix functions and closures in mach 2026-02-06 18:30:26 -06:00
John Alanbrook
8cf98d8a9e Merge branch 'mcode2' into mach 2026-02-06 15:14:40 -06:00
John Alanbrook
3c38e828e5 context free tokenizing, parsing, compiling 2026-02-06 15:14:18 -06:00
John Alanbrook
af2d296f40 use new parser info 2026-02-06 12:45:25 -06:00
John Alanbrook
0a45394689 fix crash related to allocating in context heap 2026-02-06 12:43:19 -06:00
John Alanbrook
32885a422f bring in mcode 2026-02-06 04:24:14 -06:00
John Alanbrook
8959e53303 Merge branch 'newsyn' into mcode2 2026-02-06 03:55:56 -06:00
John Alanbrook
8a9a02b131 Merge branch 'newsyn' into mach 2026-02-06 03:54:38 -06:00
John Alanbrook
f9d68b2990 fix if/else, chained assignment 2026-02-06 03:54:25 -06:00
John Alanbrook
017a57b1eb use new parser information 2026-02-06 03:44:44 -06:00
John Alanbrook
ff8c68d01c mcode and mcode interpreter 2026-02-06 03:31:31 -06:00
John Alanbrook
9212003401 cannot set unbound 2026-02-06 03:24:01 -06:00
John Alanbrook
f9f8a4db42 Merge branch 'newsyn' into mach 2026-02-06 03:10:14 -06:00
John Alanbrook
8db95c654b more info in AST parser 2026-02-06 03:00:46 -06:00
John Alanbrook
63feabed5d mach vm 2026-02-06 02:50:48 -06:00
John Alanbrook
c814c0e1d8 rm new; rm void 2026-02-06 02:12:19 -06:00
John Alanbrook
bead0c48d4 Merge branch 'mcode' into newsyn 2026-02-06 02:02:46 -06:00
John Alanbrook
98dcab4ba7 comprehensive syntax test; fix multiple default args 2026-02-06 02:02:17 -06:00
John Alanbrook
ae44ce7b4b mcode and mach 2026-02-06 01:56:26 -06:00
John Alanbrook
1c38699b5a fix scope resolution 2026-02-06 01:41:03 -06:00
John Alanbrook
9a70a12d82 object literal 2026-02-05 21:41:34 -06:00
John Alanbrook
a8a271e014 Merge branch 'syntax' into ast 2026-02-05 20:39:56 -06:00
John Alanbrook
91761c03e6 push/pop syntax 2026-02-05 20:39:53 -06:00
John Alanbrook
5a479cc765 function literal in record literal 2026-02-05 20:32:57 -06:00
John Alanbrook
97a003e025 errors 2026-02-05 20:12:06 -06:00
John Alanbrook
20f14abd17 string templates 2026-02-05 19:34:06 -06:00
John Alanbrook
19ba184fec default params for functions 2026-02-05 18:44:40 -06:00
John Alanbrook
7909b11f6b better errors 2026-02-05 18:35:48 -06:00
John Alanbrook
27229c675c add parser and tokenizer errors 2026-02-05 18:14:49 -06:00
John Alanbrook
64d234ee35 Merge branch 'syntax' into ast 2026-02-05 17:45:15 -06:00
John Alanbrook
e861d73eec mkarecord 2026-02-05 17:45:13 -06:00
John Alanbrook
a24331aae5 tokenize 2026-02-05 11:21:34 -06:00
John Alanbrook
c1cb922b64 more comprehensive ast 2026-02-05 10:59:56 -06:00
John Alanbrook
aacb0b48bf more vm tests 2026-02-05 10:44:53 -06:00
John Alanbrook
b38aec95b6 Merge branch 'syntax' into ast 2026-02-05 10:29:29 -06:00
John Alanbrook
b29d3c2fe0 add vm tests 2026-02-05 10:29:09 -06:00
John Alanbrook
1cc3005b68 better jump labels 2026-02-05 10:28:13 -06:00
John Alanbrook
b86cd042fc vm unit tests 2026-02-05 10:21:16 -06:00
John Alanbrook
8b7af0c22a vm bytecode output 2026-02-05 10:14:14 -06:00
John Alanbrook
f71f6a296b register vm 2026-02-05 06:55:45 -06:00
John Alanbrook
9bd764b11b add go 2026-02-05 03:10:06 -06:00
John Alanbrook
058cdfd2e4 groundwork for vm 2026-02-05 02:59:16 -06:00
John Alanbrook
1ef837c6ff rm bound function stuff 2026-02-05 02:36:14 -06:00
John Alanbrook
cd21de3d70 rm realm concept on function 2026-02-05 02:33:50 -06:00
John Alanbrook
a98faa4dbb debugging 2026-02-05 02:27:26 -06:00
John Alanbrook
08559234c4 fix closures 2026-02-05 02:07:18 -06:00
John Alanbrook
c3dc27eac6 machine code 2026-02-04 23:45:51 -06:00
John Alanbrook
7170a9c7eb ast 2026-02-04 22:20:57 -06:00
John Alanbrook
a08ee50f84 serializable bytecode 2026-02-04 20:57:44 -06:00
John Alanbrook
ed7dd91c3f rm global 2026-02-04 18:57:45 -06:00
John Alanbrook
3abe20fee0 merge 2026-02-04 18:38:46 -06:00
John Alanbrook
a92a96118e remove eval parser; consolidate addintrinsic 2026-02-04 17:15:03 -06:00
John Alanbrook
4e407fe301 migrate nota, wota into quickjs.c 2026-02-04 17:03:48 -06:00
John Alanbrook
ab74cdc173 merge warningfix 2026-02-04 16:17:52 -06:00
John Alanbrook
2c9d039271 massive cleanup 2026-02-04 14:26:17 -06:00
John Alanbrook
d4635f2a75 remove unused vars, fix warnings 2026-02-04 13:49:43 -06:00
243 changed files with 149414 additions and 33229 deletions

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
*.mach binary merge=ours

4
.gitignore vendored
View File

@@ -1,6 +1,7 @@
.git/
.obj/
website/
website/public/
website/.hugo_build.lock
bin/
build/
*.zip
@@ -15,6 +16,7 @@ build/
source/shaders/*.h
.DS_Store
*.html
!website/themes/**/*.html
.vscode
*.icns
icon.ico

152
CLAUDE.md
View File

@@ -1,25 +1,141 @@
# Code style
All code is done with 2 spaces for indentation.
# ƿit (pit) Language Project
For cell script and its integration files, objects are preferred over classes, and preferrably limited use of prototypes, make objects sendable between actors (.ce files).
## Building
## cell script format
Cell script files end in .ce or .cm. Cell script is similar to Javascript but with some differences.
Recompile after changes: `make`
Bootstrap from scratch (first time): `make bootstrap`
Run `cell --help` to see all CLI flags.
Variables are delcared with 'var'. Var behaves like let.
Constants are declared with 'def'.
!= and == are strict, there is no !== or ===.
There is no undefined, only null.
There are no classes, only objects and prototypes.
Prefer backticks for string interpolation. Otherwise, convering non strings with the text() function is required.
Everything should be lowercase.
## Code Style
There are no arraybuffers, only blobs, which work with bits. They must be stoned like stone(blob) before being read from.
All code uses 2 spaces for indentation. K&R style for C and Javascript.
## c format
For cell script integration files, everything should be declared static that can be. Most don't have headers at all. Files in a package are not shared between packages.
## ƿit Script Quick Reference
There is no undefined, so JS_IsNull and JS_NULL should be used only.
ƿit script files: `.ce` (actors) and `.cm` (modules). The syntax is similar to JavaScript with important differences listed below.
## how module loading is done in cell script
Within a package, a c file, if using the correct macros (CELL_USE_FUNCS etc), will be loaded as a module with its name; so png.c inside ac package is loaded as <package>/png, giving you access to its functions.
### Key Differences from JavaScript
- `var` (mutable) and `def` (constant) — no `let` or `const`
- `==` and `!=` are strict (no `===` or `!==`)
- No `undefined` — only `null`
- No classes — only objects and prototypes (`meme()`, `proto()`, `isa()`)
- No `for...in`, `for...of`, spread (`...`), rest params, or default params
- No named function declarations — use `var fn = function() {}` or arrow functions
- Variables must be declared at function body level only (not in if/while/for/blocks)
- All variables must be initialized at declaration (`var x` alone is an error; use `var x = null`)
- No `try`/`catch`/`throw` — use `disrupt`/`disruption`
- No arraybuffers — only `blob` (works with bits; must `stone(blob)` before reading)
- Identifiers can contain `?` and `!` (e.g., `nil?`, `set!`, `is?valid`)
- Prefer backticks for string interpolation; otherwise use `text()` to convert non-strings
- Everything should be lowercase
### Intrinsic Functions (always available, no `use()` needed)
The creator functions are **polymorphic** — behavior depends on argument types:
- `array(number)` — create array of size N filled with null
- `array(number, value_or_fn)` — create array with initial values
- `array(array)` — copy array
- `array(array, fn)` — map
- `array(array, array)` — concatenate
- `array(array, from, to)` — slice
- `array(record)` — get keys as array of text
- **`array(text)` — split text into individual characters** (e.g., `array("hello")``["h","e","l","l","o"]`)
- `array(text, separator)` — split by separator
- `array(text, length)` — split into chunks of length
- `text(array, separator)` — join array into text
- `text(number)` or `text(number, radix)` — number to text
- `text(text, from, to)` — substring
- `number(text)` or `number(text, radix)` — parse text to number
- `number(logical)` — boolean to number
- `record(record)` — copy
- `record(record, another)` — merge
- `record(array_of_keys)` — create record from keys
Other key intrinsics: `length()`, `stone()`, `is_stone()`, `print()`, `filter()`, `find()`, `reduce()`, `sort()`, `reverse()`, `some()`, `every()`, `starts_with()`, `ends_with()`, `meme()`, `proto()`, `isa()`, `splat()`, `apply()`, `extract()`, `replace()`, `search()`, `format()`, `lower()`, `upper()`, `trim()`
Sensory functions: `is_array()`, `is_text()`, `is_number()`, `is_object()`, `is_function()`, `is_null()`, `is_logical()`, `is_integer()`, `is_stone()`, etc.
### Standard Library (loaded with `use()`)
- `blob` — binary data (bits, not bytes)
- `time` — time constants and conversions
- `math` — trig, logarithms, roots (`math/radians`, `math/turns`)
- `json` — JSON encoding/decoding
- `random` — random number generation
### Actor Model
- `.ce` files are actors (independent execution units, don't return values)
- `.cm` files are modules (return a value, cached and frozen)
- Actors never share memory; communicate via `$send()` message passing
- Actor intrinsics start with `$`: `$me`, `$stop()`, `$send()`, `$start()`, `$delay()`, `$receiver()`, `$clock()`, `$portal()`, `$contact()`, `$couple()`, `$unneeded()`, `$connection()`, `$time_limit()`
### Requestors (async composition)
`sequence()`, `parallel()`, `race()`, `fallback()` — compose asynchronous operations. See docs/requestors.md.
### Error Handling
```javascript
var fn = function() {
disrupt // bare keyword, no value
} disruption {
// handle error; can re-raise with disrupt
}
```
### Push/Pop Syntax
```javascript
var a = [1, 2]
a[] = 3 // push: [1, 2, 3]
var v = a[] // pop: v is 3, a is [1, 2]
```
## C Integration
- Declare everything `static` that can be
- Most files don't have headers; files in a package are not shared between packages
- No undefined in C API: use `JS_IsNull` and `JS_NULL` only
- A C file with correct macros (`CELL_USE_FUNCS` etc) is loaded as a module by its name (e.g., `png.c` in a package → `use('<package>/png')`)
- C symbol naming: `js_<pkg>_<file>_use` (e.g., `js_core_math_radians_use` for `core/math/radians`)
- Core is the `core` package — its symbols follow the same `js_core_<name>_use` pattern as all other packages
- Package directories should contain only source files (no `.mach`/`.mcode` alongside source)
- Build cache files in `build/` are bare hashes (no extensions)
- Use `JS_FRAME`/`JS_ROOT`/`JS_RETURN` macros for any C function that allocates multiple heap objects. Any `JS_New*`/`JS_SetProperty*` call can trigger GC.
## Project Layout
- `source/` — C source for the cell runtime and CLI
- `docs/` — master documentation (Markdown), reflected on the website
- `website/` — Hugo site; theme at `website/themes/knr/`
- `internal/` — internal ƿit scripts (engine.cm etc.)
- `packages/` — core packages
- `Makefile` — build system (`make` to rebuild, `make bootstrap` for first build)
## Testing
After any C runtime changes, run all three test suites before considering the work done:
```
make # rebuild
./cell --dev vm_suite # VM-level tests (641 tests)
./cell --dev test suite # language-level tests (493 tests)
./cell --dev fuzz # fuzzer (100 iterations)
```
All three must pass with 0 failures.
## Documentation
The `docs/` folder is the single source of truth. The website at `website/` mounts it via Hugo. Key files:
- `docs/language.md` — language syntax reference
- `docs/functions.md` — all built-in intrinsic functions
- `docs/actors.md` — actor model and actor intrinsics
- `docs/requestors.md` — async requestor pattern
- `docs/library/*.md` — intrinsic type reference (text, number, array, object) and standard library modules

View File

@@ -9,6 +9,8 @@
CELL_SHOP = $(HOME)/.cell
CELL_CORE_PACKAGE = $(CELL_SHOP)/packages/core
doit: bootstrap
maker: install
makecell:
@@ -16,7 +18,7 @@ makecell:
cp cell /opt/homebrew/bin/
# Install core: symlink this directory to ~/.cell/core
install: bootstrap $(CELL_SHOP)
install: cell $(CELL_SHOP)
@echo "Linking cell core to $(CELL_CORE_PACKAGE)"
rm -rf $(CELL_CORE_PACKAGE)
ln -s $(PWD) $(CELL_CORE_PACKAGE)
@@ -57,13 +59,12 @@ static:
# Bootstrap: build cell from scratch using meson (only needed once)
# Also installs core scripts to ~/.cell/core
bootstrap:
meson setup build_bootstrap -Dbuildtype=debug -Db_sanitize=address
bootstrap:
meson setup build_bootstrap -Dbuildtype=debugoptimized
meson compile -C build_bootstrap
cp build_bootstrap/cell .
cp build_bootstrap/libcell_runtime.dylib .
@echo "Bootstrap complete. Cell shop initialized at $(CELL_SHOP)"
@echo "Now run 'make' to rebuild with cell itself."
@echo "Bootstrap complete. Run cell like ./cell --dev to use a local shop at .cell."
# Clean build artifacts
clean:

33
add.ce
View File

@@ -13,6 +13,10 @@ var fd = use('fd')
var locator = null
var alias = null
var resolved = null
var parts = null
var cwd = null
var build_target = null
array(args, function(arg) {
if (arg == '--help' || arg == '-h') {
@@ -41,7 +45,7 @@ if (!locator) {
// Resolve relative paths to absolute paths
if (locator == '.' || starts_with(locator, './') || starts_with(locator, '../') || fd.is_dir(locator)) {
var resolved = fd.realpath(locator)
resolved = fd.realpath(locator)
if (resolved) {
locator = resolved
}
@@ -50,7 +54,7 @@ if (locator == '.' || starts_with(locator, './') || starts_with(locator, '../')
// Generate default alias from locator
if (!alias) {
// Use the last component of the locator as alias
var parts = array(locator, '/')
parts = array(locator, '/')
alias = parts[length(parts) - 1]
// Remove any version suffix
if (search(alias, '@') != null) {
@@ -59,7 +63,7 @@ if (!alias) {
}
// Check we're in a package directory
var cwd = fd.realpath('.')
cwd = fd.realpath('.')
if (!fd.is_file(cwd + '/cell.toml')) {
log.error("Not in a package directory (no cell.toml found)")
$stop()
@@ -68,16 +72,17 @@ if (!fd.is_file(cwd + '/cell.toml')) {
log.console("Adding " + locator + " as '" + alias + "'...")
// Add to local project's cell.toml
try {
var _add_dep = function() {
pkg.add_dependency(null, locator, alias)
log.console(" Added to cell.toml")
} catch (e) {
log.error("Failed to update cell.toml: " + e)
} disruption {
log.error("Failed to update cell.toml")
$stop()
}
_add_dep()
// Install to shop
try {
var _install = function() {
shop.get(locator)
shop.extract(locator)
@@ -85,18 +90,20 @@ try {
shop.build_package_scripts(locator)
// Build C code if any
try {
var target = build.detect_host_target()
build.build_dynamic(locator, target, 'release')
} catch (e) {
var _build_c = function() {
build_target = build.detect_host_target()
build.build_dynamic(locator, build_target, 'release')
} disruption {
// Not all packages have C code
}
_build_c()
log.console(" Installed to shop")
} catch (e) {
log.error("Failed to install: " + e)
} disruption {
log.error("Failed to install")
$stop()
}
_install()
log.console("Added " + alias + " (" + locator + ")")

View File

@@ -319,7 +319,7 @@ JSValue js_reader_list(JSContext *js, JSValue self, int argc, JSValue *argv)
JS_FreeValue(js, arr);
return filename;
}
JS_SetPropertyUint32(js, arr, arr_index++, filename);
JS_SetPropertyNumber(js, arr, arr_index++, filename);
}
return arr;
@@ -379,21 +379,23 @@ static const JSCFunctionListEntry js_reader_funcs[] = {
JS_CFUNC_DEF("count", 0, js_reader_count),
};
JSValue js_miniz_use(JSContext *js)
JSValue js_core_miniz_use(JSContext *js)
{
JS_FRAME(js);
JS_NewClassID(&js_reader_class_id);
JS_NewClass(JS_GetRuntime(js), js_reader_class_id, &js_reader_class);
JSValue reader_proto = JS_NewObject(js);
JS_SetPropertyFunctionList(js, reader_proto, js_reader_funcs, sizeof(js_reader_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_reader_class_id, reader_proto);
JS_NewClass(js, js_reader_class_id, &js_reader_class);
JS_ROOT(reader_proto, JS_NewObject(js));
JS_SetPropertyFunctionList(js, reader_proto.val, js_reader_funcs, sizeof(js_reader_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_reader_class_id, reader_proto.val);
JS_NewClassID(&js_writer_class_id);
JS_NewClass(JS_GetRuntime(js), js_writer_class_id, &js_writer_class);
JSValue writer_proto = JS_NewObject(js);
JS_SetPropertyFunctionList(js, writer_proto, js_writer_funcs, sizeof(js_writer_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_writer_class_id, writer_proto);
JSValue export = JS_NewObject(js);
JS_SetPropertyFunctionList(js, export, js_miniz_funcs, sizeof(js_miniz_funcs)/sizeof(JSCFunctionListEntry));
return export;
JS_NewClass(js, js_writer_class_id, &js_writer_class);
JS_ROOT(writer_proto, JS_NewObject(js));
JS_SetPropertyFunctionList(js, writer_proto.val, js_writer_funcs, sizeof(js_writer_funcs) / sizeof(JSCFunctionListEntry));
JS_SetClassProto(js, js_writer_class_id, writer_proto.val);
JS_ROOT(export, JS_NewObject(js));
JS_SetPropertyFunctionList(js, export.val, js_miniz_funcs, sizeof(js_miniz_funcs)/sizeof(JSCFunctionListEntry));
JS_RETURN(export.val);
}

167
bench.ce
View File

@@ -8,7 +8,7 @@ var os = use('os')
var testlib = use('internal/testlib')
var math = use('math/radians')
if (!args) args = []
var _args = args == null ? [] : args
var target_pkg = null // null = current package
var target_bench = null // null = all benchmarks, otherwise specific bench file
@@ -55,14 +55,19 @@ function stddev(arr, mean_val) {
function percentile(arr, p) {
if (length(arr) == 0) return 0
var sorted = sort(arr)
var idx = floor(arr) * p / 100
var idx = floor(length(arr) * p / 100)
if (idx >= length(arr)) idx = length(arr) - 1
return sorted[idx]
}
// Parse arguments similar to test.ce
function parse_args() {
if (length(args) == 0) {
var name = null
var lock = null
var resolved = null
var bench_path = null
if (length(_args) == 0) {
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
@@ -71,7 +76,7 @@ function parse_args() {
return true
}
if (args[0] == 'all') {
if (_args[0] == 'all') {
if (!testlib.is_valid_package('.')) {
log.console('No cell.toml found in current directory')
return false
@@ -80,28 +85,28 @@ function parse_args() {
return true
}
if (args[0] == 'package') {
if (length(args) < 2) {
if (_args[0] == 'package') {
if (length(_args) < 2) {
log.console('Usage: cell bench package <name> [bench]')
log.console(' cell bench package all')
return false
}
if (args[1] == 'all') {
if (_args[1] == 'all') {
all_pkgs = true
log.console('Benchmarking all packages...')
return true
}
var name = args[1]
var lock = shop.load_lock()
name = _args[1]
lock = shop.load_lock()
if (lock[name]) {
target_pkg = name
} else if (starts_with(name, '/') && testlib.is_valid_package(name)) {
target_pkg = name
} else {
if (testlib.is_valid_package('.')) {
var resolved = pkg.alias_to_package(null, name)
resolved = pkg.alias_to_package(null, name)
if (resolved) {
target_pkg = resolved
} else {
@@ -114,8 +119,8 @@ function parse_args() {
}
}
if (length(args) >= 3) {
target_bench = args[2]
if (length(_args) >= 3) {
target_bench = _args[2]
}
log.console(`Benchmarking package: ${target_pkg}`)
@@ -123,7 +128,7 @@ function parse_args() {
}
// cell bench benches/suite or cell bench <path>
var bench_path = args[0]
bench_path = _args[0]
// Normalize path - add benches/ prefix if not present
if (!starts_with(bench_path, 'benches/') && !starts_with(bench_path, '/')) {
@@ -160,12 +165,15 @@ function collect_benches(package_name, specific_bench) {
var files = pkg.list_files(package_name)
var bench_files = []
arrfor(files, function(f) {
var bench_name = null
var match_name = null
var match_base = null
if (starts_with(f, "benches/") && ends_with(f, ".cm")) {
if (specific_bench) {
var bench_name = text(f, 0, -3)
var match_name = specific_bench
bench_name = text(f, 0, -3)
match_name = specific_bench
if (!starts_with(match_name, 'benches/')) match_name = 'benches/' + match_name
var match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
if (bench_name != match_base) return
}
push(bench_files, f)
@@ -180,24 +188,25 @@ function calibrate_batch_size(bench_fn, is_batch) {
var n = MIN_BATCH_SIZE
var dt = 0
var start = 0
var new_n = 0
var calc = 0
var target_n = 0
// Find a batch size that takes at least MIN_SAMPLE_NS
while (n < MAX_BATCH_SIZE) {
// Ensure n is a valid number before calling
if (!is_number(n) || n < 1) {
n = 1
break
}
var start = os.now()
start = os.now()
bench_fn(n)
dt = os.now() - start
if (dt >= MIN_SAMPLE_NS) break
// Double the batch size
var new_n = n * 2
// Check if multiplication produced a valid number
new_n = n * 2
if (!is_number(new_n) || new_n > MAX_BATCH_SIZE) {
n = MAX_BATCH_SIZE
break
@@ -207,10 +216,9 @@ function calibrate_batch_size(bench_fn, is_batch) {
// Adjust to target sample duration
if (dt > 0 && dt < TARGET_SAMPLE_NS && is_number(n) && is_number(dt)) {
var calc = n * TARGET_SAMPLE_NS / dt
calc = n * TARGET_SAMPLE_NS / dt
if (is_number(calc) && calc > 0) {
var target_n = floor(calc)
// Check if floor returned a valid number
target_n = floor(calc)
if (is_number(target_n) && target_n > 0) {
if (target_n > MAX_BATCH_SIZE) target_n = MAX_BATCH_SIZE
if (target_n < MIN_BATCH_SIZE) target_n = MIN_BATCH_SIZE
@@ -219,7 +227,6 @@ function calibrate_batch_size(bench_fn, is_batch) {
}
}
// Safety check - ensure we always return a valid batch size
if (!is_number(n) || n < 1) {
n = 1
}
@@ -230,72 +237,70 @@ function calibrate_batch_size(bench_fn, is_batch) {
// Run a single benchmark function
function run_single_bench(bench_fn, bench_name) {
var timings_per_op = []
// Detect benchmark format:
// 1. Object with { setup, run, teardown } - structured format
// 2. Function that accepts (n) - batch format
// 3. Function that accepts () - legacy format
var is_structured = is_object(bench_fn) && bench_fn.run
var is_batch = false
var batch_size = 1
var setup_fn = null
var run_fn = null
var teardown_fn = null
var calibrate_fn = null
var _detect = null
var i = 0
var state = null
var start = 0
var duration = 0
var ns_per_op = 0
if (is_structured) {
setup_fn = bench_fn.setup || function() { return null }
run_fn = bench_fn.run
teardown_fn = bench_fn.teardown || function(state) {}
teardown_fn = bench_fn.teardown || function(s) {}
// Check if run function accepts batch size
try {
_detect = function() {
var test_state = setup_fn()
run_fn(1, test_state)
is_batch = true
if (teardown_fn) teardown_fn(test_state)
} catch (e) {
} disruption {
is_batch = false
}
_detect()
// Create wrapper for calibration
var calibrate_fn = function(n) {
var state = setup_fn()
run_fn(n, state)
if (teardown_fn) teardown_fn(state)
calibrate_fn = function(n) {
var s = setup_fn()
run_fn(n, s)
if (teardown_fn) teardown_fn(s)
}
batch_size = calibrate_batch_size(calibrate_fn, is_batch)
// Safety check for structured benchmarks
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
} else {
// Simple function format
try {
_detect = function() {
bench_fn(1)
is_batch = true
} catch (e) {
} disruption {
is_batch = false
}
_detect()
batch_size = calibrate_batch_size(bench_fn, is_batch)
}
// Safety check - ensure batch_size is valid
if (!batch_size || batch_size < 1) {
batch_size = 1
}
// Warmup phase
for (var i = 0; i < WARMUP_BATCHES; i++) {
// Ensure batch_size is valid before warmup
for (i = 0; i < WARMUP_BATCHES; i++) {
if (!is_number(batch_size) || batch_size < 1) {
var type_str = is_null(batch_size) ? 'null' : is_number(batch_size) ? 'number' : is_text(batch_size) ? 'text' : is_object(batch_size) ? 'object' : is_array(batch_size) ? 'array' : is_function(batch_size) ? 'function' : is_logical(batch_size) ? 'logical' : 'unknown'
log.console(`WARNING: batch_size became ${type_str} = ${batch_size}, resetting to 1`)
batch_size = 1
}
if (is_structured) {
var state = setup_fn()
state = setup_fn()
if (is_batch) {
run_fn(batch_size, state)
} else {
@@ -312,35 +317,34 @@ function run_single_bench(bench_fn, bench_name) {
}
// Measurement phase - collect SAMPLES timing samples
for (var i = 0; i < SAMPLES; i++) {
// Double-check batch_size is valid (should never happen, but defensive)
for (i = 0; i < SAMPLES; i++) {
if (!is_number(batch_size) || batch_size < 1) {
batch_size = 1
}
if (is_structured) {
var state = setup_fn()
var start = os.now()
state = setup_fn()
start = os.now()
if (is_batch) {
run_fn(batch_size, state)
} else {
run_fn(state)
}
var duration = os.now() - start
duration = os.now() - start
if (teardown_fn) teardown_fn(state)
var ns_per_op = is_batch ? duration / batch_size : duration
ns_per_op = is_batch ? duration / batch_size : duration
push(timings_per_op, ns_per_op)
} else {
var start = os.now()
start = os.now()
if (is_batch) {
bench_fn(batch_size)
} else {
bench_fn()
}
var duration = os.now() - start
duration = os.now() - start
var ns_per_op = is_batch ? duration / batch_size : duration
ns_per_op = is_batch ? duration / batch_size : duration
push(timings_per_op, ns_per_op)
}
}
@@ -354,7 +358,6 @@ function run_single_bench(bench_fn, bench_name) {
var p95_ns = percentile(timings_per_op, 95)
var p99_ns = percentile(timings_per_op, 99)
// Calculate ops/s from median
var ops_per_sec = 0
if (median_ns > 0) {
ops_per_sec = floor(1000000000 / median_ns)
@@ -408,18 +411,21 @@ function run_benchmarks(package_name, specific_bench) {
arrfor(bench_files, function(f) {
var mod_path = text(f, 0, -3)
var load_error = false
var bench_mod = null
var use_pkg = null
var benches = []
var error_result = null
var file_result = {
name: f,
benchmarks: []
}
try {
var bench_mod
var use_pkg = package_name ? package_name : fd.realpath('.')
var _load_file = function() {
use_pkg = package_name ? package_name : fd.realpath('.')
bench_mod = shop.use(mod_path, use_pkg)
var benches = []
if (is_function(bench_mod)) {
push(benches, {name: 'main', fn: bench_mod})
} else if (is_object(bench_mod)) {
@@ -432,8 +438,11 @@ function run_benchmarks(package_name, specific_bench) {
if (length(benches) > 0) {
log.console(` ${f}`)
arrfor(benches, function(b) {
try {
var result = run_single_bench(b.fn, b.name)
var bench_error = false
var result = null
var _run_bench = function() {
result = run_single_bench(b.fn, b.name)
result.package = pkg_result.package
push(file_result.benchmarks, result)
pkg_result.total++
@@ -444,25 +453,32 @@ function run_benchmarks(package_name, specific_bench) {
if (result.batch_size > 1) {
log.console(` batch: ${result.batch_size} samples: ${result.samples}`)
}
} catch (e) {
log.console(` ERROR ${b.name}: ${e}`)
log.error(e)
var error_result = {
} disruption {
bench_error = true
}
_run_bench()
if (bench_error) {
log.console(` ERROR ${b.name}`)
error_result = {
package: pkg_result.package,
name: b.name,
error: e.toString()
error: "benchmark disrupted"
}
push(file_result.benchmarks, error_result)
pkg_result.total++
}
})
}
} catch (e) {
log.console(` Error loading ${f}: ${e}`)
var error_result = {
} disruption {
load_error = true
}
_load_file()
if (load_error) {
log.console(` Error loading ${f}`)
error_result = {
package: pkg_result.package,
name: "load_module",
error: `Error loading module: ${e}`
error: "error loading module"
}
push(file_result.benchmarks, error_result)
pkg_result.total++
@@ -478,15 +494,16 @@ function run_benchmarks(package_name, specific_bench) {
// Run all benchmarks
var all_results = []
var packages = null
if (all_pkgs) {
if (testlib.is_valid_package('.')) {
push(all_results, run_benchmarks(null, null))
}
var packages = shop.list_packages()
arrfor(packages, function(pkg) {
push(all_results, run_benchmarks(pkg, null))
packages = shop.list_packages()
arrfor(packages, function(p) {
push(all_results, run_benchmarks(p, null))
})
} else {
push(all_results, run_benchmarks(target_pkg, target_bench))

194
bench_native.ce Normal file
View File

@@ -0,0 +1,194 @@
// bench_native.ce — compare VM vs native execution speed
//
// Usage:
// cell --dev bench_native.ce <module.cm> [iterations]
//
// Compiles (if needed) and benchmarks a module via both VM and native dylib.
// Reports median/mean timing per benchmark + speedup ratio.
var os = use('os')
var fd = use('fd')
if (length(args) < 1) {
print('usage: cell --dev bench_native.ce <module.cm> [iterations]')
return
}
var file = args[0]
var name = file
if (ends_with(name, '.cm')) {
name = text(name, 0, length(name) - 3)
}
var iterations = 11
if (length(args) > 1) {
iterations = number(args[1])
}
def WARMUP = 3
var safe = replace(replace(name, '/', '_'), '-', '_')
var symbol = 'js_' + safe + '_use'
var dylib_path = './' + file + '.dylib'
// --- Statistics ---
var stat_sort = function(arr) {
return sort(arr)
}
var stat_median = function(arr) {
if (length(arr) == 0) return 0
var sorted = stat_sort(arr)
var mid = floor(length(arr) / 2)
if (length(arr) % 2 == 0) {
return (sorted[mid - 1] + sorted[mid]) / 2
}
return sorted[mid]
}
var stat_mean = function(arr) {
if (length(arr) == 0) return 0
var sum = reduce(arr, function(a, b) { return a + b })
return sum / length(arr)
}
var format_ns = function(ns) {
if (ns < 1000) return text(round(ns)) + 'ns'
if (ns < 1000000) return text(round(ns / 1000 * 100) / 100) + 'us'
if (ns < 1000000000) return text(round(ns / 1000000 * 100) / 100) + 'ms'
return text(round(ns / 1000000000 * 100) / 100) + 's'
}
// --- Collect benchmarks from module ---
var collect_benches = function(mod) {
var benches = []
var keys = null
var i = 0
var k = null
if (is_function(mod)) {
push(benches, {name: 'main', fn: mod})
} else if (is_object(mod)) {
keys = array(mod)
i = 0
while (i < length(keys)) {
k = keys[i]
if (is_function(mod[k])) {
push(benches, {name: k, fn: mod[k]})
}
i = i + 1
}
}
return benches
}
// --- Run one benchmark function ---
var run_bench = function(fn, label) {
var samples = []
var i = 0
var t1 = 0
var t2 = 0
// warmup
i = 0
while (i < WARMUP) {
fn(1)
i = i + 1
}
// collect samples
i = 0
while (i < iterations) {
t1 = os.now()
fn(1)
t2 = os.now()
push(samples, t2 - t1)
i = i + 1
}
return {
label: label,
median: stat_median(samples),
mean: stat_mean(samples)
}
}
// --- Load VM module ---
print('loading VM module: ' + file)
var vm_mod = use(name)
var vm_benches = collect_benches(vm_mod)
if (length(vm_benches) == 0) {
print('no benchmarkable functions found in ' + file)
return
}
// --- Load native module ---
var native_mod = null
var native_benches = []
var has_native = fd.is_file(dylib_path)
var lib = null
if (has_native) {
print('loading native module: ' + dylib_path)
lib = os.dylib_open(dylib_path)
native_mod = os.dylib_symbol(lib, symbol)
native_benches = collect_benches(native_mod)
} else {
print('no ' + dylib_path + ' found -- VM-only benchmarking')
print(' hint: cell --dev compile.ce ' + file)
}
// --- Run benchmarks ---
print('')
print('samples: ' + text(iterations) + ' (warmup: ' + text(WARMUP) + ')')
print('')
var pad = function(s, n) {
var result = s
while (length(result) < n) result = result + ' '
return result
}
var i = 0
var b = null
var vm_result = null
var j = 0
var found = false
var nat_result = null
var speedup = 0
while (i < length(vm_benches)) {
b = vm_benches[i]
vm_result = run_bench(b.fn, 'vm')
print(pad(b.name, 20) + ' VM: ' + pad(format_ns(vm_result.median), 12) + ' (median) ' + format_ns(vm_result.mean) + ' (mean)')
// find matching native bench
j = 0
found = false
while (j < length(native_benches)) {
if (native_benches[j].name == b.name) {
nat_result = run_bench(native_benches[j].fn, 'native')
print(pad('', 20) + ' NT: ' + pad(format_ns(nat_result.median), 12) + ' (median) ' + format_ns(nat_result.mean) + ' (mean)')
if (nat_result.median > 0) {
speedup = vm_result.median / nat_result.median
print(pad('', 20) + ' speedup: ' + text(round(speedup * 100) / 100) + 'x')
}
found = true
}
j = j + 1
}
if (has_native && !found) {
print(pad('', 20) + ' NT: (no matching function)')
}
print('')
i = i + 1
}

232
benches/actor_patterns.cm Normal file
View File

@@ -0,0 +1,232 @@
// actor_patterns.cm — Actor concurrency benchmarks
// Message passing, fan-out/fan-in, mailbox throughput.
// These use structured benchmarks with setup/run/teardown.
// Note: actor benchmarks are measured differently from pure compute.
// Each iteration sends messages and waits for results, so they're
// inherently slower but test real concurrency costs.
// Simple ping-pong: two actors sending messages back and forth
// Since we can't create real actors from a module, we simulate
// the message-passing patterns with function call overhead that
// mirrors what the actor dispatch does.
// Simulate message dispatch overhead
function make_mailbox() {
return {
queue: [],
delivered: 0
}
}
function send(mailbox, msg) {
push(mailbox.queue, msg)
return null
}
function receive(mailbox) {
if (length(mailbox.queue) == 0) return null
mailbox.delivered++
return pop(mailbox.queue)
}
function drain(mailbox) {
var count = 0
while (length(mailbox.queue) > 0) {
pop(mailbox.queue)
count++
}
return count
}
// Ping-pong: simulate two actors exchanging messages
function ping_pong(rounds) {
var box_a = make_mailbox()
var box_b = make_mailbox()
var i = 0
var msg = null
send(box_a, {type: "ping", val: 0})
for (i = 0; i < rounds; i++) {
// A receives and sends to B
msg = receive(box_a)
if (msg) {
send(box_b, {type: "pong", val: msg.val + 1})
}
// B receives and sends to A
msg = receive(box_b)
if (msg) {
send(box_a, {type: "ping", val: msg.val + 1})
}
}
return box_a.delivered + box_b.delivered
}
// Fan-out: one sender, N receivers
function fan_out(n_receivers, messages_per) {
var receivers = []
var i = 0
var j = 0
for (i = 0; i < n_receivers; i++) {
push(receivers, make_mailbox())
}
// Send messages to all receivers
for (j = 0; j < messages_per; j++) {
for (i = 0; i < n_receivers; i++) {
send(receivers[i], {seq: j, data: j * 17})
}
}
// All receivers drain
var total = 0
for (i = 0; i < n_receivers; i++) {
total += drain(receivers[i])
}
return total
}
// Fan-in: N senders, one receiver
function fan_in(n_senders, messages_per) {
var inbox = make_mailbox()
var i = 0
var j = 0
// Each sender sends messages
for (i = 0; i < n_senders; i++) {
for (j = 0; j < messages_per; j++) {
send(inbox, {sender: i, seq: j, data: i * 100 + j})
}
}
// Receiver processes all
var total = 0
var msg = null
msg = receive(inbox)
while (msg) {
total += msg.data
msg = receive(inbox)
}
return total
}
// Pipeline: chain of processors
function pipeline(stages, items) {
var boxes = []
var i = 0
var j = 0
var msg = null
for (i = 0; i <= stages; i++) {
push(boxes, make_mailbox())
}
// Feed input
for (i = 0; i < items; i++) {
send(boxes[0], {val: i})
}
// Process each stage
for (j = 0; j < stages; j++) {
msg = receive(boxes[j])
while (msg) {
send(boxes[j + 1], {val: msg.val * 2 + 1})
msg = receive(boxes[j])
}
}
// Drain output
var total = 0
msg = receive(boxes[stages])
while (msg) {
total += msg.val
msg = receive(boxes[stages])
}
return total
}
// Request-response pattern (simulate RPC)
function request_response(n_requests) {
var client_box = make_mailbox()
var server_box = make_mailbox()
var i = 0
var req = null
var resp = null
var total = 0
for (i = 0; i < n_requests; i++) {
// Client sends request
send(server_box, {id: i, payload: i * 3, reply_to: client_box})
// Server processes
req = receive(server_box)
if (req) {
send(req.reply_to, {id: req.id, result: req.payload * 2 + 1})
}
// Client receives response
resp = receive(client_box)
if (resp) {
total += resp.result
}
}
return total
}
return {
// Ping-pong: 10K rounds
ping_pong_10k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += ping_pong(10000)
}
return x
},
// Fan-out: 100 receivers, 100 messages each
fan_out_100x100: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += fan_out(100, 100)
}
return x
},
// Fan-in: 100 senders, 100 messages each
fan_in_100x100: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += fan_in(100, 100)
}
return x
},
// Pipeline: 10 stages, 1000 items
pipeline_10x1k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += pipeline(10, 1000)
}
return x
},
// Request-response: 5K requests
rpc_5k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += request_response(5000)
}
return x
}
}

141
benches/cli_tool.cm Normal file
View File

@@ -0,0 +1,141 @@
// cli_tool.cm — CLI tool simulation (macro benchmark)
// Parse args + process data + transform + format output.
// Simulates a realistic small utility program.
var json = use('json')
// Generate fake records
function generate_records(n) {
var records = []
var x = 42
var i = 0
var status_vals = ["active", "inactive", "pending", "archived"]
var dept_vals = ["eng", "sales", "ops", "hr", "marketing"]
for (i = 0; i < n; i++) {
x = ((x * 1103515245 + 12345) & 0x7FFFFFFF) | 0
push(records, {
id: i + 1,
name: `user_${i}`,
score: (x % 1000) / 10,
status: status_vals[i % 4],
department: dept_vals[i % 5]
})
}
return records
}
// Filter records by field value
function filter_records(records, field, value) {
var result = []
var i = 0
for (i = 0; i < length(records); i++) {
if (records[i][field] == value) {
push(result, records[i])
}
}
return result
}
// Group by a field
function group_by(records, field) {
var groups = {}
var i = 0
var key = null
for (i = 0; i < length(records); i++) {
key = records[i][field]
if (!key) key = "unknown"
if (!groups[key]) groups[key] = []
push(groups[key], records[i])
}
return groups
}
// Aggregate: compute stats per group
function aggregate(groups) {
var keys = array(groups)
var result = []
var i = 0
var j = 0
var grp = null
var total = 0
var mn = 0
var mx = 0
for (i = 0; i < length(keys); i++) {
grp = groups[keys[i]]
total = 0
mn = 999999
mx = 0
for (j = 0; j < length(grp); j++) {
total += grp[j].score
if (grp[j].score < mn) mn = grp[j].score
if (grp[j].score > mx) mx = grp[j].score
}
push(result, {
group: keys[i],
count: length(grp),
average: total / length(grp),
low: mn,
high: mx
})
}
return result
}
// Full pipeline: load → filter → sort → group → aggregate → encode
function run_pipeline(n_records) {
// Generate data
var records = generate_records(n_records)
// Filter to active records
var filtered = filter_records(records, "status", "active")
// Sort by score
filtered = sort(filtered, "score")
// Limit to first 50
if (length(filtered) > 50) {
filtered = array(filtered, 0, 50)
}
// Group and aggregate
var groups = group_by(filtered, "department")
var stats = aggregate(groups)
stats = sort(stats, "average")
// Encode as JSON
var output = json.encode(stats)
return length(output)
}
return {
// Small dataset (100 records)
cli_pipeline_100: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += run_pipeline(100)
}
return x
},
// Medium dataset (1000 records)
cli_pipeline_1k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += run_pipeline(1000)
}
return x
},
// Large dataset (10K records)
cli_pipeline_10k: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += run_pipeline(10000)
}
return x
}
}

162
benches/deltablue.cm Normal file
View File

@@ -0,0 +1,162 @@
// deltablue.cm — Constraint solver kernel (DeltaBlue-inspired)
// Dynamic dispatch, pointer chasing, object-heavy workload.
def REQUIRED = 0
def STRONG = 1
def NORMAL = 2
def WEAK = 3
def WEAKEST = 4
function make_variable(name, value) {
return {
name: name,
value: value,
constraints: [],
determined_by: null,
stay: true,
mark: 0
}
}
function make_constraint(strength, variables, satisfy_fn) {
return {
strength: strength,
variables: variables,
satisfy: satisfy_fn,
output: null
}
}
// Constraint propagation: simple forward solver
function propagate(vars, constraints) {
var changed = true
var passes = 0
var max_passes = length(constraints) * 3
var i = 0
var c = null
var old_val = 0
while (changed && passes < max_passes) {
changed = false
passes++
for (i = 0; i < length(constraints); i++) {
c = constraints[i]
old_val = c.output ? c.output.value : null
c.satisfy(c)
if (c.output && c.output.value != old_val) {
changed = true
}
}
}
return passes
}
// Build a chain of equality constraints: v[i] = v[i-1] + 1
function build_chain(n) {
var vars = []
var constraints = []
var i = 0
for (i = 0; i < n; i++) {
push(vars, make_variable(`v${i}`, 0))
}
// Set first variable
vars[0].value = 1
var c = null
for (i = 1; i < n; i++) {
c = make_constraint(NORMAL, [vars[i - 1], vars[i]], function(self) {
self.variables[1].value = self.variables[0].value + 1
self.output = self.variables[1]
})
push(constraints, c)
push(vars[i].constraints, c)
}
return {vars: vars, constraints: constraints}
}
// Build a projection: pairs of variables with scaling constraints
function build_projection(n) {
var src = []
var dst = []
var constraints = []
var i = 0
for (i = 0; i < n; i++) {
push(src, make_variable(`src${i}`, i * 10))
push(dst, make_variable(`dst${i}`, 0))
}
var scale_c = null
for (i = 0; i < n; i++) {
scale_c = make_constraint(STRONG, [src[i], dst[i]], function(self) {
self.variables[1].value = self.variables[0].value * 2 + 1
self.output = self.variables[1]
})
push(constraints, scale_c)
push(dst[i].constraints, scale_c)
}
return {src: src, dst: dst, constraints: constraints}
}
// Edit constraint: change a source, re-propagate
function run_edits(system, edits) {
var i = 0
var total_passes = 0
for (i = 0; i < edits; i++) {
system.vars[0].value = i
total_passes += propagate(system.vars, system.constraints)
}
return total_passes
}
return {
// Chain of 100 variables, propagate
chain_100: function(n) {
var i = 0
var chain = null
var x = 0
for (i = 0; i < n; i++) {
chain = build_chain(100)
x += propagate(chain.vars, chain.constraints)
}
return x
},
// Chain of 500 variables, propagate
chain_500: function(n) {
var i = 0
var chain = null
var x = 0
for (i = 0; i < n; i++) {
chain = build_chain(500)
x += propagate(chain.vars, chain.constraints)
}
return x
},
// Projection of 100 pairs
projection_100: function(n) {
var i = 0
var proj = null
var x = 0
for (i = 0; i < n; i++) {
proj = build_projection(100)
x += propagate(proj.src, proj.constraints)
}
return x
},
// Edit and re-propagate (incremental update)
chain_edit_100: function(n) {
var chain = build_chain(100)
var i = 0
var x = 0
for (i = 0; i < n; i++) {
chain.vars[0].value = i
x += propagate(chain.vars, chain.constraints)
}
return x
}
}

126
benches/fibonacci.cm Normal file
View File

@@ -0,0 +1,126 @@
// fibonacci.cm — Fibonacci variants kernel
// Tests recursion overhead, memoization patterns, iteration vs recursion.
// Naive recursive (exponential) — measures call overhead
function fib_naive(n) {
if (n <= 1) return n
return fib_naive(n - 1) + fib_naive(n - 2)
}
// Iterative (linear)
function fib_iter(n) {
var a = 0
var b = 1
var i = 0
var tmp = 0
for (i = 0; i < n; i++) {
tmp = a + b
a = b
b = tmp
}
return a
}
// Memoized recursive (tests object property lookup + recursion)
function make_memo_fib() {
var cache = {}
var fib = function(n) {
var key = text(n)
if (cache[key]) return cache[key]
var result = null
if (n <= 1) {
result = n
} else {
result = fib(n - 1) + fib(n - 2)
}
cache[key] = result
return result
}
return fib
}
// CPS (continuation passing style) — tests closure creation
function fib_cps(n, cont) {
if (n <= 1) return cont(n)
return fib_cps(n - 1, function(a) {
return fib_cps(n - 2, function(b) {
return cont(a + b)
})
})
}
// Matrix exponentiation style (accumulator)
function fib_matrix(n) {
var a = 1
var b = 0
var c = 0
var d = 1
var ta = 0
var tb = 0
var m = n
while (m > 0) {
if (m % 2 == 1) {
ta = a * d + b * c // wrong but stresses numeric ops
tb = b * d + a * c
a = ta
b = tb
}
ta = c * c + d * d
tb = d * (2 * c + d)
c = ta
d = tb
m = floor(m / 2)
}
return b
}
return {
fib_naive_25: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_naive(25)
return x
},
fib_naive_30: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_naive(30)
return x
},
fib_iter_80: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_iter(80)
return x
},
fib_memo_100: function(n) {
var i = 0
var x = 0
var fib = null
for (i = 0; i < n; i++) {
fib = make_memo_fib()
x += fib(100)
}
return x
},
fib_cps_20: function(n) {
var i = 0
var x = 0
var identity = function(v) { return v }
for (i = 0; i < n; i++) {
x += fib_cps(20, identity)
}
return x
},
fib_matrix_80: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) x += fib_matrix(80)
return x
}
}

159
benches/hash_workload.cm Normal file
View File

@@ -0,0 +1,159 @@
// hash_workload.cm — Hash-heavy / word-count / map-reduce kernel
// Stresses record (object) creation, property access, and string handling.
function make_words(count) {
// Generate a repeating word list to simulate text processing
var base_words = [
"the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
"and", "cat", "sat", "on", "mat", "with", "hat", "bat",
"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
"hello", "world", "foo", "bar", "baz", "qux", "quux", "corge"
]
var words = []
var i = 0
for (i = 0; i < count; i++) {
push(words, base_words[i % length(base_words)])
}
return words
}
// Word frequency count
function word_count(words) {
var freq = {}
var i = 0
var w = null
for (i = 0; i < length(words); i++) {
w = words[i]
if (freq[w]) {
freq[w] = freq[w] + 1
} else {
freq[w] = 1
}
}
return freq
}
// Find top-N words by frequency
function top_n(freq, n) {
var keys = array(freq)
var pairs = []
var i = 0
for (i = 0; i < length(keys); i++) {
push(pairs, {word: keys[i], count: freq[keys[i]]})
}
var sorted = sort(pairs, "count")
// Return last N (highest counts)
var result = []
var start = length(sorted) - n
if (start < 0) start = 0
for (i = start; i < length(sorted); i++) {
push(result, sorted[i])
}
return result
}
// Histogram: group words by length
function group_by_length(words) {
var groups = {}
var i = 0
var w = null
var k = null
for (i = 0; i < length(words); i++) {
w = words[i]
k = text(length(w))
if (!groups[k]) groups[k] = []
push(groups[k], w)
}
return groups
}
// Simple hash table with chaining (stress property access patterns)
function hash_table_ops(n) {
var table = {}
var i = 0
var k = null
var collisions = 0
// Insert phase
for (i = 0; i < n; i++) {
k = `key_${i % 512}`
if (table[k]) collisions++
table[k] = i
}
// Lookup phase
var found = 0
for (i = 0; i < n; i++) {
k = `key_${i % 512}`
if (table[k]) found++
}
// Delete phase
var deleted = 0
for (i = 0; i < n; i += 3) {
k = `key_${i % 512}`
if (table[k]) {
delete table[k]
deleted++
}
}
return found - deleted + collisions
}
var words_1k = make_words(1000)
var words_10k = make_words(10000)
return {
// Word count on 1K words
wordcount_1k: function(n) {
var i = 0
var freq = null
for (i = 0; i < n; i++) {
freq = word_count(words_1k)
}
return freq
},
// Word count on 10K words
wordcount_10k: function(n) {
var i = 0
var freq = null
for (i = 0; i < n; i++) {
freq = word_count(words_10k)
}
return freq
},
// Word count + top-10 extraction
wordcount_top10: function(n) {
var i = 0
var freq = null
var top = null
for (i = 0; i < n; i++) {
freq = word_count(words_10k)
top = top_n(freq, 10)
}
return top
},
// Group words by length
group_by_len: function(n) {
var i = 0
var groups = null
for (i = 0; i < n; i++) {
groups = group_by_length(words_10k)
}
return groups
},
// Hash table insert/lookup/delete
hash_table: function(n) {
var i = 0
var x = 0
for (i = 0; i < n; i++) {
x += hash_table_ops(2048)
}
return x
}
}

167
benches/json_walk.cm Normal file
View File

@@ -0,0 +1,167 @@
// json_walk.cm — JSON parse + walk + serialize kernel
// Stresses strings, records, arrays, and recursive traversal.
var json = use('json')
function make_nested_object(depth, breadth) {
var obj = {}
var i = 0
var k = null
if (depth <= 0) {
for (i = 0; i < breadth; i++) {
k = `key_${i}`
obj[k] = i * 3.14
}
return obj
}
for (i = 0; i < breadth; i++) {
k = `node_${i}`
obj[k] = make_nested_object(depth - 1, breadth)
}
obj.value = depth
obj.name = `level_${depth}`
return obj
}
function make_array_data(size) {
var arr = []
var i = 0
for (i = 0; i < size; i++) {
push(arr, {
id: i,
name: `item_${i}`,
active: i % 2 == 0,
score: i * 1.5,
tags: [`tag_${i % 5}`, `tag_${(i + 1) % 5}`]
})
}
return arr
}
// Walk an object tree, counting nodes
function walk_count(obj) {
var count = 1
var keys = null
var i = 0
var v = null
if (is_object(obj)) {
keys = array(obj)
for (i = 0; i < length(keys); i++) {
v = obj[keys[i]]
if (is_object(v) || is_array(v)) {
count += walk_count(v)
}
}
} else if (is_array(obj)) {
for (i = 0; i < length(obj); i++) {
v = obj[i]
if (is_object(v) || is_array(v)) {
count += walk_count(v)
}
}
}
return count
}
// Walk and extract all numbers
function walk_sum(obj) {
var sum = 0
var keys = null
var i = 0
var v = null
if (is_object(obj)) {
keys = array(obj)
for (i = 0; i < length(keys); i++) {
v = obj[keys[i]]
if (is_number(v)) {
sum += v
} else if (is_object(v) || is_array(v)) {
sum += walk_sum(v)
}
}
} else if (is_array(obj)) {
for (i = 0; i < length(obj); i++) {
v = obj[i]
if (is_number(v)) {
sum += v
} else if (is_object(v) || is_array(v)) {
sum += walk_sum(v)
}
}
}
return sum
}
// Pre-build test data strings
var nested_obj = make_nested_object(3, 4)
var nested_json = json.encode(nested_obj)
var array_data = make_array_data(200)
var array_json = json.encode(array_data)
return {
// Parse nested JSON
json_parse_nested: function(n) {
var i = 0
var obj = null
for (i = 0; i < n; i++) {
obj = json.decode(nested_json)
}
return obj
},
// Parse array-of-records JSON
json_parse_array: function(n) {
var i = 0
var arr = null
for (i = 0; i < n; i++) {
arr = json.decode(array_json)
}
return arr
},
// Encode nested object to JSON
json_encode_nested: function(n) {
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = json.encode(nested_obj)
}
return s
},
// Encode array to JSON
json_encode_array: function(n) {
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = json.encode(array_data)
}
return s
},
// Parse + walk + count
json_roundtrip_walk: function(n) {
var i = 0
var obj = null
var count = 0
for (i = 0; i < n; i++) {
obj = json.decode(nested_json)
count += walk_count(obj)
}
return count
},
// Parse + sum all numbers + re-encode
json_roundtrip_full: function(n) {
var i = 0
var obj = null
var sum = 0
var out = null
for (i = 0; i < n; i++) {
obj = json.decode(array_json)
sum += walk_sum(obj)
out = json.encode(obj)
}
return sum
}
}

View File

@@ -1,24 +1,24 @@
// micro_ops.bench.ce (or .cm depending on your convention)
// micro_ops.cm — microbenchmarks for core operations
// Note: We use a function-local sink in each benchmark to avoid cross-contamination
function blackhole(sink, x) {
// Prevent dead-code elimination
return (sink + (x | 0)) | 0
}
function make_obj_xy(x, y) {
return { x, y }
return {x: x, y: y}
}
function make_obj_yx(x, y) {
// Different insertion order to force a different shape in many engines
return { y, x }
// Different insertion order to force a different shape
return {y: y, x: x}
}
function make_shapes(n) {
var out = []
for (var i = 0; i < n; i++) {
var o = { a: i }
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = {a: i}
o[`p${i}`] = i
push(out, o)
}
@@ -27,13 +27,15 @@ function make_shapes(n) {
function make_packed_array(n) {
var a = []
for (var i = 0; i < n; i++) push(a, i)
var i = 0
for (i = 0; i < n; i++) push(a, i)
return a
}
function make_holey_array(n) {
var a = []
for (var i = 0; i < n; i += 2) a[i] = i
var i = 0
for (i = 0; i < n; i += 2) a[i] = i
return a
}
@@ -41,7 +43,8 @@ return {
// 0) Baseline loop cost
loop_empty: function(n) {
var sink = 0
for (var i = 0; i < n; i++) {}
var i = 0
for (i = 0; i < n; i++) {}
return blackhole(sink, n)
},
@@ -49,35 +52,40 @@ return {
i32_add: function(n) {
var sink = 0
var x = 1
for (var i = 0; i < n; i++) x = (x + 3) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + 3) | 0
return blackhole(sink, x)
},
f64_add: function(n) {
var sink = 0
var x = 1.0
for (var i = 0; i < n; i++) x = x + 3.14159
var i = 0
for (i = 0; i < n; i++) x = x + 3.14159
return blackhole(sink, x | 0)
},
mixed_add: function(n) {
var sink = 0
var x = 1
for (var i = 0; i < n; i++) x = x + 0.25
var i = 0
for (i = 0; i < n; i++) x = x + 0.25
return blackhole(sink, x | 0)
},
bit_ops: function(n) {
var sink = 0
var x = 0x12345678
for (var i = 0; i < n; i++) x = ((x << 5) ^ (x >>> 3)) | 0
var i = 0
for (i = 0; i < n; i++) x = ((x << 5) ^ (x >>> 3)) | 0
return blackhole(sink, x)
},
overflow_path: function(n) {
var sink = 0
var x = 0x70000000
for (var i = 0; i < n; i++) x = (x + 0x10000000) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + 0x10000000) | 0
return blackhole(sink, x)
},
@@ -85,7 +93,8 @@ return {
branch_predictable: function(n) {
var sink = 0
var x = 0
for (var i = 0; i < n; i++) {
var i = 0
for (i = 0; i < n; i++) {
if ((i & 7) != 0) x++
else x += 2
}
@@ -95,7 +104,8 @@ return {
branch_alternating: function(n) {
var sink = 0
var x = 0
for (var i = 0; i < n; i++) {
var i = 0
for (i = 0; i < n; i++) {
if ((i & 1) == 0) x++
else x += 2
}
@@ -105,29 +115,47 @@ return {
// 3) Calls
call_direct: function(n) {
var sink = 0
function f(a) { return (a + 1) | 0 }
var f = function(a) { return (a + 1) | 0 }
var x = 0
for (var i = 0; i < n; i++) x = f(x)
var i = 0
for (i = 0; i < n; i++) x = f(x)
return blackhole(sink, x)
},
call_indirect: function(n) {
var sink = 0
function f(a) { return (a + 1) | 0 }
var f = function(a) { return (a + 1) | 0 }
var g = f
var x = 0
for (var i = 0; i < n; i++) x = g(x)
var i = 0
for (i = 0; i < n; i++) x = g(x)
return blackhole(sink, x)
},
call_closure: function(n) {
var sink = 0
function make_adder(k) {
var make_adder = function(k) {
return function(a) { return (a + k) | 0 }
}
var add3 = make_adder(3)
var x = 0
for (var i = 0; i < n; i++) x = add3(x)
var i = 0
for (i = 0; i < n; i++) x = add3(x)
return blackhole(sink, x)
},
call_multi_arity: function(n) {
var sink = 0
var f0 = function() { return 1 }
var f1 = function(a) { return a + 1 }
var f2 = function(a, b) { return a + b }
var f3 = function(a, b, c) { return a + b + c }
var f4 = function(a, b, c, d) { return a + b + c + d }
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + f0() + f1(i) + f2(i, 1) + f3(i, 1, 2) + f4(i, 1, 2, 3)) | 0
}
return blackhole(sink, x)
},
@@ -136,7 +164,8 @@ return {
var sink = 0
var o = make_obj_xy(1, 2)
var x = 0
for (var i = 0; i < n; i++) x = (x + o.x) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + o.x) | 0
return blackhole(sink, x)
},
@@ -145,20 +174,38 @@ return {
var a = make_obj_xy(1, 2)
var b = make_obj_yx(1, 2)
var x = 0
for (var i = 0; i < n; i++) {
var o = (i & 1) == 0 ? a : b
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = (i & 1) == 0 ? a : b
x = (x + o.x) | 0
}
return blackhole(sink, x)
},
prop_read_poly_4: function(n) {
var sink = 0
var shapes = [
{x: 1, y: 2},
{y: 2, x: 1},
{x: 1, z: 3, y: 2},
{w: 0, x: 1, y: 2}
]
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + shapes[i & 3].x) | 0
}
return blackhole(sink, x)
},
prop_read_mega: function(n) {
var sink = 0
var objs = make_shapes(32)
var x = 0
for (var i = 0; i < n; i++) {
var o = objs[i & 31]
x = (x + o.a) | 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + objs[i & 31].a) | 0
}
return blackhole(sink, x)
},
@@ -166,7 +213,8 @@ return {
prop_write_mono: function(n) {
var sink = 0
var o = make_obj_xy(1, 2)
for (var i = 0; i < n; i++) o.x = (o.x + 1) | 0
var i = 0
for (i = 0; i < n; i++) o.x = (o.x + 1) | 0
return blackhole(sink, o.x)
},
@@ -175,14 +223,16 @@ return {
var sink = 0
var a = make_packed_array(1024)
var x = 0
for (var i = 0; i < n; i++) x = (x + a[i & 1023]) | 0
var i = 0
for (i = 0; i < n; i++) x = (x + a[i & 1023]) | 0
return blackhole(sink, x)
},
array_write_packed: function(n) {
var sink = 0
var a = make_packed_array(1024)
for (var i = 0; i < n; i++) a[i & 1023] = i
var i = 0
for (i = 0; i < n; i++) a[i & 1023] = i
return blackhole(sink, a[17] | 0)
},
@@ -190,9 +240,10 @@ return {
var sink = 0
var a = make_holey_array(2048)
var x = 0
for (var i = 0; i < n; i++) {
var v = a[(i & 2047)]
// If "missing" is a special value in your language, this stresses that path too
var i = 0
var v = null
for (i = 0; i < n; i++) {
v = a[(i & 2047)]
if (v) x = (x + v) | 0
}
return blackhole(sink, x)
@@ -201,21 +252,97 @@ return {
array_push_steady: function(n) {
var sink = 0
var x = 0
for (var j = 0; j < n; j++) {
var a = []
for (var i = 0; i < 256; i++) push(a, i)
var j = 0
var i = 0
var a = null
for (j = 0; j < n; j++) {
a = []
for (i = 0; i < 256; i++) push(a, i)
x = (x + length(a)) | 0
}
return blackhole(sink, x)
},
array_push_pop: function(n) {
var sink = 0
var a = []
var x = 0
var i = 0
var v = 0
for (i = 0; i < n; i++) {
push(a, i)
if (length(a) > 64) {
v = pop(a)
x = (x + v) | 0
}
}
return blackhole(sink, x)
},
array_indexed_sum: function(n) {
var sink = 0
var a = make_packed_array(1024)
var x = 0
var j = 0
var i = 0
for (j = 0; j < n; j++) {
x = 0
for (i = 0; i < 1024; i++) {
x = (x + a[i]) | 0
}
}
return blackhole(sink, x)
},
// 6) Strings
string_concat_small: function(n) {
var sink = 0
var x = 0
for (var j = 0; j < n; j++) {
var s = ""
for (var i = 0; i < 16; i++) s = s + "x"
var j = 0
var i = 0
var s = null
for (j = 0; j < n; j++) {
s = ""
for (i = 0; i < 16; i++) s = s + "x"
x = (x + length(s)) | 0
}
return blackhole(sink, x)
},
string_concat_medium: function(n) {
var sink = 0
var x = 0
var j = 0
var i = 0
var s = null
for (j = 0; j < n; j++) {
s = ""
for (i = 0; i < 100; i++) s = s + "abcdefghij"
x = (x + length(s)) | 0
}
return blackhole(sink, x)
},
string_interpolation: function(n) {
var sink = 0
var x = 0
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = `item_${i}_value_${i * 2}`
x = (x + length(s)) | 0
}
return blackhole(sink, x)
},
string_slice: function(n) {
var sink = 0
var base = "the quick brown fox jumps over the lazy dog"
var x = 0
var i = 0
var s = null
for (i = 0; i < n; i++) {
s = text(base, i % 10, i % 10 + 10)
x = (x + length(s)) | 0
}
return blackhole(sink, x)
@@ -225,8 +352,10 @@ return {
alloc_tiny_objects: function(n) {
var sink = 0
var x = 0
for (var i = 0; i < n; i++) {
var o = { a: i, b: i + 1, c: i + 2 }
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = {a: i, b: i + 1, c: i + 2}
x = (x + o.b) | 0
}
return blackhole(sink, x)
@@ -235,9 +364,12 @@ return {
alloc_linked_list: function(n) {
var sink = 0
var head = null
for (var i = 0; i < n; i++) head = { v: i, next: head }
var i = 0
var x = 0
var p = head
var p = null
for (i = 0; i < n; i++) head = {v: i, next: head}
x = 0
p = head
while (p) {
x = (x + p.v) | 0
p = p.next
@@ -245,18 +377,118 @@ return {
return blackhole(sink, x)
},
// 8) meme-specific (adapt these to your exact semantics)
meme_clone_read: function(n) {
// If meme(obj) clones like Object.create / prototypal clone, this hits it hard.
// Replace with your exact meme call form.
alloc_arrays: function(n) {
var sink = 0
var base = { x: 1, y: 2 }
var x = 0
for (var i = 0; i < n; i++) {
var o = meme(base)
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = [i, i + 1, i + 2, i + 3]
x = (x + a[2]) | 0
}
return blackhole(sink, x)
},
alloc_short_lived: function(n) {
var sink = 0
var x = 0
var i = 0
var o = null
// Allocate objects that immediately become garbage
for (i = 0; i < n; i++) {
o = {val: i, data: {inner: i + 1}}
x = (x + o.data.inner) | 0
}
return blackhole(sink, x)
},
alloc_long_lived_pressure: function(n) {
var sink = 0
var store = []
var x = 0
var i = 0
var o = null
// Keep first 1024 objects alive, churn the rest
for (i = 0; i < n; i++) {
o = {val: i, data: i * 2}
if (i < 1024) {
push(store, o)
}
x = (x + o.data) | 0
}
return blackhole(sink, x)
},
// 8) Meme (prototype clone)
meme_clone_read: function(n) {
var sink = 0
var base = {x: 1, y: 2}
var x = 0
var i = 0
var o = null
for (i = 0; i < n; i++) {
o = meme(base)
x = (x + o.x) | 0
}
return blackhole(sink, x)
},
// 9) Guard / type check paths
guard_hot_number: function(n) {
// Monomorphic number path — guards should hoist
var sink = 0
var x = 1
var i = 0
for (i = 0; i < n; i++) x = x + 1
return blackhole(sink, x | 0)
},
guard_mixed_types: function(n) {
// Alternating number/text — guards must stay
var sink = 0
var vals = [1, "a", 2, "b", 3, "c", 4, "d"]
var x = 0
var i = 0
for (i = 0; i < n; i++) {
if (is_number(vals[i & 7])) x = (x + vals[i & 7]) | 0
}
return blackhole(sink, x)
},
// 10) Reduce / higher-order
reduce_sum: function(n) {
var sink = 0
var a = make_packed_array(256)
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + reduce(a, function(acc, v) { return acc + v }, 0)) | 0
}
return blackhole(sink, x)
},
filter_evens: function(n) {
var sink = 0
var a = make_packed_array(256)
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x = (x + length(filter(a, function(v) { return v % 2 == 0 }))) | 0
}
return blackhole(sink, x)
},
arrfor_sum: function(n) {
var sink = 0
var a = make_packed_array(256)
var x = 0
var i = 0
var sum = 0
for (i = 0; i < n; i++) {
sum = 0
arrfor(a, function(v) { sum += v })
x = (x + sum) | 0
}
return blackhole(sink, x)
}
}

249
benches/module_load.cm Normal file
View File

@@ -0,0 +1,249 @@
// module_load.cm — Module loading simulation (macro benchmark)
// Simulates parsing many small modules, linking, and running.
// Tests the "build scenario" pattern.
var json = use('json')
// Simulate a small module: parse token stream + build AST + evaluate
function tokenize(src) {
var tokens = []
var i = 0
var ch = null
var chars = array(src)
var buf = ""
for (i = 0; i < length(chars); i++) {
ch = chars[i]
if (ch == " " || ch == "\n" || ch == "\t") {
if (length(buf) > 0) {
push(tokens, buf)
buf = ""
}
} else if (ch == "(" || ch == ")" || ch == "+" || ch == "-"
|| ch == "*" || ch == "=" || ch == ";" || ch == ",") {
if (length(buf) > 0) {
push(tokens, buf)
buf = ""
}
push(tokens, ch)
} else {
buf = buf + ch
}
}
if (length(buf) > 0) push(tokens, buf)
return tokens
}
// Build a simple AST from tokens
function parse_tokens(tokens) {
var ast = []
var i = 0
var tok = null
var node = null
for (i = 0; i < length(tokens); i++) {
tok = tokens[i]
if (tok == "var" || tok == "def") {
node = {type: "decl", kind: tok, name: null, value: null}
i++
if (i < length(tokens)) node.name = tokens[i]
i++ // skip =
i++
if (i < length(tokens)) node.value = tokens[i]
push(ast, node)
} else if (tok == "return") {
node = {type: "return", value: null}
i++
if (i < length(tokens)) node.value = tokens[i]
push(ast, node)
} else if (tok == "function") {
node = {type: "func", name: null, body: []}
i++
if (i < length(tokens)) node.name = tokens[i]
// Skip to matching )
while (i < length(tokens) && tokens[i] != ")") i++
push(ast, node)
} else {
push(ast, {type: "expr", value: tok})
}
}
return ast
}
// Evaluate: simple symbol table + resolution
function evaluate(ast, env) {
var result = null
var i = 0
var node = null
for (i = 0; i < length(ast); i++) {
node = ast[i]
if (node.type == "decl") {
env[node.name] = node.value
} else if (node.type == "return") {
result = node.value
if (env[result]) result = env[result]
} else if (node.type == "func") {
env[node.name] = node
}
}
return result
}
// Generate fake module source code
function generate_module(id, dep_count) {
var src = ""
var i = 0
src = src + "var _id = " + text(id) + ";\n"
for (i = 0; i < dep_count; i++) {
src = src + "var dep" + text(i) + " = use(mod_" + text(i) + ");\n"
}
src = src + "var x = " + text(id * 17) + ";\n"
src = src + "var y = " + text(id * 31) + ";\n"
src = src + "function compute(a, b) { return a + b; }\n"
src = src + "var result = compute(x, y);\n"
src = src + "return result;\n"
return src
}
// Simulate loading N modules with dependency chains
function simulate_build(n_modules, deps_per_module) {
var modules = []
var loaded = {}
var i = 0
var j = 0
var src = null
var tokens = null
var ast = null
var env = null
var result = null
var total_tokens = 0
var total_nodes = 0
// Generate all module sources
for (i = 0; i < n_modules; i++) {
src = generate_module(i, deps_per_module)
push(modules, src)
}
// "Load" each module: tokenize → parse → evaluate
for (i = 0; i < n_modules; i++) {
tokens = tokenize(modules[i])
total_tokens += length(tokens)
ast = parse_tokens(tokens)
total_nodes += length(ast)
env = {}
// Resolve dependencies
for (j = 0; j < deps_per_module; j++) {
if (j < i) {
env["dep" + text(j)] = loaded["mod_" + text(j)]
}
}
result = evaluate(ast, env)
loaded["mod_" + text(i)] = result
}
return {
modules: n_modules,
total_tokens: total_tokens,
total_nodes: total_nodes,
last_result: result
}
}
// Dependency graph analysis (topological sort simulation)
function topo_sort(n_modules, deps_per_module) {
// Build adjacency list
var adj = {}
var in_degree = {}
var i = 0
var j = 0
var name = null
var dep = null
for (i = 0; i < n_modules; i++) {
name = "mod_" + text(i)
adj[name] = []
in_degree[name] = 0
}
for (i = 0; i < n_modules; i++) {
name = "mod_" + text(i)
for (j = 0; j < deps_per_module; j++) {
if (j < i) {
dep = "mod_" + text(j)
push(adj[dep], name)
in_degree[name] = in_degree[name] + 1
}
}
}
// Kahn's algorithm
var queue = []
var keys = array(in_degree)
for (i = 0; i < length(keys); i++) {
if (in_degree[keys[i]] == 0) push(queue, keys[i])
}
var order = []
var current = null
var neighbors = null
var qi = 0
while (qi < length(queue)) {
current = queue[qi]
qi++
push(order, current)
neighbors = adj[current]
if (neighbors) {
for (i = 0; i < length(neighbors); i++) {
in_degree[neighbors[i]] = in_degree[neighbors[i]] - 1
if (in_degree[neighbors[i]] == 0) push(queue, neighbors[i])
}
}
}
return order
}
return {
// Small build: 50 modules, 3 deps each
build_50: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = simulate_build(50, 3)
}
return result
},
// Medium build: 200 modules, 5 deps each
build_200: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = simulate_build(200, 5)
}
return result
},
// Large build: 500 modules, 5 deps each
build_500: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = simulate_build(500, 5)
}
return result
},
// Topo sort of 500 module dependency graph
topo_sort_500: function(n) {
var i = 0
var order = null
for (i = 0; i < n; i++) {
order = topo_sort(500, 5)
}
return order
}
}

160
benches/nbody.cm Normal file
View File

@@ -0,0 +1,160 @@
// nbody.cm — N-body gravitational simulation kernel
// Pure numeric + allocation workload. Classic VM benchmark.
var math = use('math/radians')
def PI = 3.141592653589793
def SOLAR_MASS = 4 * PI * PI
def DAYS_PER_YEAR = 365.24
function make_system() {
// Sun + 4 Jovian planets
var sun = {x: 0, y: 0, z: 0, vx: 0, vy: 0, vz: 0, mass: SOLAR_MASS}
var jupiter = {
x: 4.84143144246472090,
y: -1.16032004402742839,
z: -0.103622044471123109,
vx: 0.00166007664274403694 * DAYS_PER_YEAR,
vy: 0.00769901118419740425 * DAYS_PER_YEAR,
vz: -0.0000690460016972063023 * DAYS_PER_YEAR,
mass: 0.000954791938424326609 * SOLAR_MASS
}
var saturn = {
x: 8.34336671824457987,
y: 4.12479856412430479,
z: -0.403523417114321381,
vx: -0.00276742510726862411 * DAYS_PER_YEAR,
vy: 0.00499852801234917238 * DAYS_PER_YEAR,
vz: 0.0000230417297573763929 * DAYS_PER_YEAR,
mass: 0.000285885980666130812 * SOLAR_MASS
}
var uranus = {
x: 12.8943695621391310,
y: -15.1111514016986312,
z: -0.223307578892655734,
vx: 0.00296460137564761618 * DAYS_PER_YEAR,
vy: 0.00237847173959480950 * DAYS_PER_YEAR,
vz: -0.0000296589568540237556 * DAYS_PER_YEAR,
mass: 0.0000436624404335156298 * SOLAR_MASS
}
var neptune = {
x: 15.3796971148509165,
y: -25.9193146099879641,
z: 0.179258772950371181,
vx: 0.00268067772490389322 * DAYS_PER_YEAR,
vy: 0.00162824170038242295 * DAYS_PER_YEAR,
vz: -0.0000951592254519715870 * DAYS_PER_YEAR,
mass: 0.0000515138902046611451 * SOLAR_MASS
}
var bodies = [sun, jupiter, saturn, uranus, neptune]
// Offset momentum
var px = 0
var py = 0
var pz = 0
var i = 0
for (i = 0; i < length(bodies); i++) {
px += bodies[i].vx * bodies[i].mass
py += bodies[i].vy * bodies[i].mass
pz += bodies[i].vz * bodies[i].mass
}
sun.vx = -px / SOLAR_MASS
sun.vy = -py / SOLAR_MASS
sun.vz = -pz / SOLAR_MASS
return bodies
}
function advance(bodies, dt) {
var n = length(bodies)
var i = 0
var j = 0
var bi = null
var bj = null
var dx = 0
var dy = 0
var dz = 0
var dist_sq = 0
var dist = 0
var mag = 0
for (i = 0; i < n; i++) {
bi = bodies[i]
for (j = i + 1; j < n; j++) {
bj = bodies[j]
dx = bi.x - bj.x
dy = bi.y - bj.y
dz = bi.z - bj.z
dist_sq = dx * dx + dy * dy + dz * dz
dist = math.sqrt(dist_sq)
mag = dt / (dist_sq * dist)
bi.vx -= dx * bj.mass * mag
bi.vy -= dy * bj.mass * mag
bi.vz -= dz * bj.mass * mag
bj.vx += dx * bi.mass * mag
bj.vy += dy * bi.mass * mag
bj.vz += dz * bi.mass * mag
}
}
for (i = 0; i < n; i++) {
bi = bodies[i]
bi.x += dt * bi.vx
bi.y += dt * bi.vy
bi.z += dt * bi.vz
}
}
function energy(bodies) {
var e = 0
var n = length(bodies)
var i = 0
var j = 0
var bi = null
var bj = null
var dx = 0
var dy = 0
var dz = 0
for (i = 0; i < n; i++) {
bi = bodies[i]
e += 0.5 * bi.mass * (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz)
for (j = i + 1; j < n; j++) {
bj = bodies[j]
dx = bi.x - bj.x
dy = bi.y - bj.y
dz = bi.z - bj.z
e -= (bi.mass * bj.mass) / math.sqrt(dx * dx + dy * dy + dz * dz)
}
}
return e
}
return {
nbody_1k: function(n) {
var i = 0
var j = 0
var bodies = null
for (i = 0; i < n; i++) {
bodies = make_system()
for (j = 0; j < 1000; j++) advance(bodies, 0.01)
energy(bodies)
}
},
nbody_10k: function(n) {
var i = 0
var j = 0
var bodies = null
for (i = 0; i < n; i++) {
bodies = make_system()
for (j = 0; j < 10000; j++) advance(bodies, 0.01)
energy(bodies)
}
}
}

154
benches/ray_tracer.cm Normal file
View File

@@ -0,0 +1,154 @@
// ray_tracer.cm — Simple ray tracer kernel
// Control flow + numeric + allocation. Classic VM benchmark.
var math = use('math/radians')
function vec(x, y, z) {
return {x: x, y: y, z: z}
}
function vadd(a, b) {
return {x: a.x + b.x, y: a.y + b.y, z: a.z + b.z}
}
function vsub(a, b) {
return {x: a.x - b.x, y: a.y - b.y, z: a.z - b.z}
}
function vmul(v, s) {
return {x: v.x * s, y: v.y * s, z: v.z * s}
}
function vdot(a, b) {
return a.x * b.x + a.y * b.y + a.z * b.z
}
function vnorm(v) {
var len = math.sqrt(vdot(v, v))
if (len == 0) return vec(0, 0, 0)
return vmul(v, 1 / len)
}
function make_sphere(center, radius, color) {
return {
center: center,
radius: radius,
color: color
}
}
function intersect_sphere(origin, dir, sphere) {
var oc = vsub(origin, sphere.center)
var b = vdot(oc, dir)
var c = vdot(oc, oc) - sphere.radius * sphere.radius
var disc = b * b - c
if (disc < 0) return -1
var sq = math.sqrt(disc)
var t1 = -b - sq
var t2 = -b + sq
if (t1 > 0.001) return t1
if (t2 > 0.001) return t2
return -1
}
function make_scene() {
var spheres = [
make_sphere(vec(0, -1, 5), 1, vec(1, 0, 0)),
make_sphere(vec(2, 0, 6), 1, vec(0, 1, 0)),
make_sphere(vec(-2, 0, 4), 1, vec(0, 0, 1)),
make_sphere(vec(0, 1, 4.5), 0.5, vec(1, 1, 0)),
make_sphere(vec(1, -0.5, 3), 0.3, vec(1, 0, 1)),
make_sphere(vec(0, -101, 5), 100, vec(0.5, 0.5, 0.5))
]
var light = vnorm(vec(1, 1, -1))
return {spheres: spheres, light: light}
}
function trace(origin, dir, scene) {
var closest_t = 999999
var closest_sphere = null
var i = 0
var t = 0
for (i = 0; i < length(scene.spheres); i++) {
t = intersect_sphere(origin, dir, scene.spheres[i])
if (t > 0 && t < closest_t) {
closest_t = t
closest_sphere = scene.spheres[i]
}
}
if (!closest_sphere) return vec(0.2, 0.3, 0.5) // sky color
var hit = vadd(origin, vmul(dir, closest_t))
var normal = vnorm(vsub(hit, closest_sphere.center))
var diffuse = vdot(normal, scene.light)
if (diffuse < 0) diffuse = 0
// Shadow check
var shadow_origin = vadd(hit, vmul(normal, 0.001))
var in_shadow = false
for (i = 0; i < length(scene.spheres); i++) {
if (scene.spheres[i] != closest_sphere) {
t = intersect_sphere(shadow_origin, scene.light, scene.spheres[i])
if (t > 0) {
in_shadow = true
break
}
}
}
var ambient = 0.15
var intensity = in_shadow ? ambient : ambient + diffuse * 0.85
return vmul(closest_sphere.color, intensity)
}
function render(width, height, scene) {
var aspect = width / height
var fov = 1.0
var total_r = 0
var total_g = 0
var total_b = 0
var y = 0
var x = 0
var u = 0
var v = 0
var dir = null
var color = null
var origin = vec(0, 0, 0)
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
u = (2 * (x + 0.5) / width - 1) * aspect * fov
v = (1 - 2 * (y + 0.5) / height) * fov
dir = vnorm(vec(u, v, 1))
color = trace(origin, dir, scene)
total_r += color.x
total_g += color.y
total_b += color.z
}
}
return {r: total_r, g: total_g, b: total_b}
}
var scene = make_scene()
return {
raytrace_32x32: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = render(32, 32, scene)
}
return result
},
raytrace_64x64: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = render(64, 64, scene)
}
return result
}
}

251
benches/richards.cm Normal file
View File

@@ -0,0 +1,251 @@
// richards.cm — Richards benchmark (scheduler simulation)
// Object-ish workload: dynamic dispatch, state machines, queuing.
def IDLE = 0
def WORKER = 1
def HANDLER_A = 2
def HANDLER_B = 3
def DEVICE_A = 4
def DEVICE_B = 5
def NUM_TASKS = 6
def TASK_RUNNING = 0
def TASK_WAITING = 1
def TASK_HELD = 2
def TASK_SUSPENDED = 3
function make_packet(link, id, kind) {
return {link: link, id: id, kind: kind, datum: 0, data: array(4, 0)}
}
function scheduler() {
var tasks = array(NUM_TASKS, null)
var current = null
var queue_count = 0
var hold_count = 0
var v1 = 0
var v2 = 0
var w_id = HANDLER_A
var w_datum = 0
var h_a_queue = null
var h_a_count = 0
var h_b_queue = null
var h_b_count = 0
var dev_a_pkt = null
var dev_b_pkt = null
var find_next = function() {
var best = null
var i = 0
for (i = 0; i < NUM_TASKS; i++) {
if (tasks[i] && tasks[i].state == TASK_RUNNING) {
if (!best || tasks[i].priority > best.priority) {
best = tasks[i]
}
}
}
return best
}
var hold_self = function() {
hold_count++
if (current) current.state = TASK_HELD
return find_next()
}
var release = function(id) {
var t = tasks[id]
if (!t) return find_next()
if (t.state == TASK_HELD) t.state = TASK_RUNNING
if (t.priority > (current ? current.priority : -1)) return t
return current
}
var queue_packet = function(pkt) {
var t = tasks[pkt.id]
var p = null
if (!t) return find_next()
queue_count++
pkt.link = null
pkt.id = current ? current.id : 0
if (!t.queue) {
t.queue = pkt
t.state = TASK_RUNNING
if (t.priority > (current ? current.priority : -1)) return t
} else {
p = t.queue
while (p.link) p = p.link
p.link = pkt
}
return current
}
// Idle task
tasks[IDLE] = {id: IDLE, priority: 0, queue: null, state: TASK_RUNNING,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
v1--
if (v1 == 0) return hold_self()
if ((v2 & 1) == 0) {
v2 = v2 >> 1
return release(DEVICE_A)
}
v2 = (v2 >> 1) ^ 0xD008
return release(DEVICE_B)
}
}
// Worker task
tasks[WORKER] = {id: WORKER, priority: 1000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var i = 0
if (!pkt) return hold_self()
w_id = (w_id == HANDLER_A) ? HANDLER_B : HANDLER_A
pkt.id = w_id
pkt.datum = 0
for (i = 0; i < 4; i++) {
w_datum++
if (w_datum > 26) w_datum = 1
pkt.data[i] = 65 + w_datum
}
return queue_packet(pkt)
}
}
// Handler A
tasks[HANDLER_A] = {id: HANDLER_A, priority: 2000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { h_a_queue = pkt; h_a_count++ }
if (h_a_queue) {
p = h_a_queue
h_a_queue = p.link
if (h_a_count < 3) return queue_packet(p)
return release(DEVICE_A)
}
return hold_self()
}
}
// Handler B
tasks[HANDLER_B] = {id: HANDLER_B, priority: 3000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { h_b_queue = pkt; h_b_count++ }
if (h_b_queue) {
p = h_b_queue
h_b_queue = p.link
if (h_b_count < 3) return queue_packet(p)
return release(DEVICE_B)
}
return hold_self()
}
}
// Device A
tasks[DEVICE_A] = {id: DEVICE_A, priority: 4000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { dev_a_pkt = pkt; return hold_self() }
if (dev_a_pkt) {
p = dev_a_pkt
dev_a_pkt = null
return queue_packet(p)
}
return hold_self()
}
}
// Device B
tasks[DEVICE_B] = {id: DEVICE_B, priority: 5000, queue: null, state: TASK_SUSPENDED,
hold_count: 0, queue_count: 0,
fn: function(pkt) {
var p = null
if (pkt) { dev_b_pkt = pkt; return hold_self() }
if (dev_b_pkt) {
p = dev_b_pkt
dev_b_pkt = null
return queue_packet(p)
}
return hold_self()
}
}
var run = function(iterations) {
var i = 0
var pkt1 = null
var pkt2 = null
var steps = 0
var pkt = null
var next = null
v1 = iterations
v2 = 0xBEEF
queue_count = 0
hold_count = 0
w_id = HANDLER_A
w_datum = 0
h_a_queue = null
h_a_count = 0
h_b_queue = null
h_b_count = 0
dev_a_pkt = null
dev_b_pkt = null
for (i = 0; i < NUM_TASKS; i++) {
if (tasks[i]) {
tasks[i].state = (i == IDLE) ? TASK_RUNNING : TASK_SUSPENDED
tasks[i].queue = null
}
}
pkt1 = make_packet(null, WORKER, 1)
pkt2 = make_packet(pkt1, WORKER, 1)
tasks[WORKER].queue = pkt2
tasks[WORKER].state = TASK_RUNNING
current = find_next()
while (current && steps < iterations * 10) {
pkt = current.queue
if (pkt) {
current.queue = pkt.link
current.queue_count++
}
next = current.fn(pkt)
if (next) current = next
else current = find_next()
steps++
}
return {queue_count: queue_count, hold_count: hold_count, steps: steps}
}
return {run: run}
}
return {
richards_100: function(n) {
var i = 0
var s = null
var result = null
for (i = 0; i < n; i++) {
s = scheduler()
result = s.run(100)
}
return result
},
richards_1k: function(n) {
var i = 0
var s = null
var result = null
for (i = 0; i < n; i++) {
s = scheduler()
result = s.run(1000)
}
return result
}
}

180
benches/sorting.cm Normal file
View File

@@ -0,0 +1,180 @@
// sorting.cm — Sorting and searching kernel
// Array manipulation, comparison-heavy, allocation patterns.
function make_random_array(n, seed) {
var a = []
var x = seed
var i = 0
for (i = 0; i < n; i++) {
x = ((x * 1103515245 + 12345) & 0x7FFFFFFF) | 0
push(a, x % 10000)
}
return a
}
function make_descending(n) {
var a = []
var i = 0
for (i = n - 1; i >= 0; i--) push(a, i)
return a
}
// Manual quicksort (tests recursion + array mutation)
function qsort(arr, lo, hi) {
var i = lo
var j = hi
var pivot = arr[floor((lo + hi) / 2)]
var tmp = 0
if (lo >= hi) return null
while (i <= j) {
while (arr[i] < pivot) i++
while (arr[j] > pivot) j--
if (i <= j) {
tmp = arr[i]
arr[i] = arr[j]
arr[j] = tmp
i++
j--
}
}
if (lo < j) qsort(arr, lo, j)
if (i < hi) qsort(arr, i, hi)
return null
}
// Merge sort (tests allocation + array creation)
function msort(arr) {
var n = length(arr)
if (n <= 1) return arr
var mid = floor(n / 2)
var left = msort(array(arr, 0, mid))
var right = msort(array(arr, mid, n))
return merge(left, right)
}
function merge(a, b) {
var result = []
var i = 0
var j = 0
while (i < length(a) && j < length(b)) {
if (a[i] <= b[j]) {
push(result, a[i])
i++
} else {
push(result, b[j])
j++
}
}
while (i < length(a)) {
push(result, a[i])
i++
}
while (j < length(b)) {
push(result, b[j])
j++
}
return result
}
// Binary search
function bsearch(arr, target) {
var lo = 0
var hi = length(arr) - 1
var mid = 0
while (lo <= hi) {
mid = floor((lo + hi) / 2)
if (arr[mid] == target) return mid
if (arr[mid] < target) lo = mid + 1
else hi = mid - 1
}
return -1
}
// Sort objects by field
function sort_records(n) {
var records = []
var x = 42
var i = 0
for (i = 0; i < n; i++) {
x = ((x * 1103515245 + 12345) & 0x7FFFFFFF) | 0
push(records, {id: i, score: x % 10000, name: `item_${i}`})
}
return sort(records, "score")
}
return {
// Quicksort 1K random integers
qsort_1k: function(n) {
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = make_random_array(1000, i)
qsort(a, 0, length(a) - 1)
}
return a
},
// Quicksort 10K random integers
qsort_10k: function(n) {
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = make_random_array(10000, i)
qsort(a, 0, length(a) - 1)
}
return a
},
// Merge sort 1K (allocation heavy)
msort_1k: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = msort(make_random_array(1000, i))
}
return result
},
// Built-in sort 1K
builtin_sort_1k: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = sort(make_random_array(1000, i))
}
return result
},
// Sort worst case (descending → ascending)
sort_worst_case: function(n) {
var i = 0
var a = null
for (i = 0; i < n; i++) {
a = make_descending(1000)
qsort(a, 0, length(a) - 1)
}
return a
},
// Binary search in sorted array
bsearch_1k: function(n) {
var sorted = make_random_array(1000, 42)
sorted = sort(sorted)
var found = 0
var i = 0
for (i = 0; i < n; i++) {
if (bsearch(sorted, sorted[i % 1000]) >= 0) found++
}
return found
},
// Sort records by field
sort_records_500: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = sort_records(500)
}
return result
}
}

82
benches/spectral_norm.cm Normal file
View File

@@ -0,0 +1,82 @@
// spectral_norm.cm — Spectral norm kernel
// Pure numeric, dense array access, mathematical computation.
var math = use('math/radians')
function eval_a(i, j) {
return 1.0 / ((i + j) * (i + j + 1) / 2 + i + 1)
}
function eval_a_times_u(n, u, au) {
var i = 0
var j = 0
var sum = 0
for (i = 0; i < n; i++) {
sum = 0
for (j = 0; j < n; j++) {
sum += eval_a(i, j) * u[j]
}
au[i] = sum
}
}
function eval_at_times_u(n, u, atu) {
var i = 0
var j = 0
var sum = 0
for (i = 0; i < n; i++) {
sum = 0
for (j = 0; j < n; j++) {
sum += eval_a(j, i) * u[j]
}
atu[i] = sum
}
}
function eval_ata_times_u(n, u, atau) {
var v = array(n, 0)
eval_a_times_u(n, u, v)
eval_at_times_u(n, v, atau)
}
function spectral_norm(n) {
var u = array(n, 1)
var v = array(n, 0)
var i = 0
var vbv = 0
var vv = 0
for (i = 0; i < 10; i++) {
eval_ata_times_u(n, u, v)
eval_ata_times_u(n, v, u)
}
vbv = 0
vv = 0
for (i = 0; i < n; i++) {
vbv += u[i] * v[i]
vv += v[i] * v[i]
}
return math.sqrt(vbv / vv)
}
return {
spectral_100: function(n) {
var i = 0
var result = 0
for (i = 0; i < n; i++) {
result = spectral_norm(100)
}
return result
},
spectral_200: function(n) {
var i = 0
var result = 0
for (i = 0; i < n; i++) {
result = spectral_norm(200)
}
return result
}
}

View File

@@ -0,0 +1,188 @@
// string_processing.cm — String-heavy kernel
// Concat, split, search, replace, interning path stress.
function make_lorem(paragraphs) {
var base = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat"
var result = ""
var i = 0
for (i = 0; i < paragraphs; i++) {
if (i > 0) result = result + " "
result = result + base
}
return result
}
// Build a lookup table from text
function build_index(txt) {
var words = array(txt, " ")
var index = {}
var i = 0
var w = null
for (i = 0; i < length(words); i++) {
w = words[i]
if (!index[w]) {
index[w] = []
}
push(index[w], i)
}
return index
}
// Levenshtein-like distance (simplified)
function edit_distance(a, b) {
var la = length(a)
var lb = length(b)
if (la == 0) return lb
if (lb == 0) return la
// Use flat array for 2 rows of DP matrix
var prev = array(lb + 1, 0)
var curr = array(lb + 1, 0)
var i = 0
var j = 0
var cost = 0
var del = 0
var ins = 0
var sub = 0
var tmp = null
var ca = array(a)
var cb = array(b)
for (j = 0; j <= lb; j++) prev[j] = j
for (i = 1; i <= la; i++) {
curr[0] = i
for (j = 1; j <= lb; j++) {
cost = ca[i - 1] == cb[j - 1] ? 0 : 1
del = prev[j] + 1
ins = curr[j - 1] + 1
sub = prev[j - 1] + cost
curr[j] = del
if (ins < curr[j]) curr[j] = ins
if (sub < curr[j]) curr[j] = sub
}
tmp = prev
prev = curr
curr = tmp
}
return prev[lb]
}
var lorem_5 = make_lorem(5)
var lorem_20 = make_lorem(20)
return {
// Split text into words and count
string_split_count: function(n) {
var i = 0
var words = null
var count = 0
for (i = 0; i < n; i++) {
words = array(lorem_5, " ")
count += length(words)
}
return count
},
// Build word index (split + hash + array ops)
string_index_build: function(n) {
var i = 0
var idx = null
for (i = 0; i < n; i++) {
idx = build_index(lorem_5)
}
return idx
},
// Search for substrings
string_search: function(n) {
var targets = ["dolor", "minim", "quis", "magna", "ipsum"]
var i = 0
var j = 0
var count = 0
for (i = 0; i < n; i++) {
for (j = 0; j < length(targets); j++) {
if (search(lorem_20, targets[j])) count++
}
}
return count
},
// Replace operations
string_replace: function(n) {
var i = 0
var result = null
for (i = 0; i < n; i++) {
result = replace(lorem_5, "dolor", "DOLOR")
result = replace(result, "ipsum", "IPSUM")
result = replace(result, "amet", "AMET")
}
return result
},
// String concatenation builder
string_builder: function(n) {
var i = 0
var j = 0
var s = null
var total = 0
for (i = 0; i < n; i++) {
s = ""
for (j = 0; j < 50; j++) {
s = s + "key=" + text(j) + "&value=" + text(j * 17) + "&"
}
total += length(s)
}
return total
},
// Edit distance (DP + array + string ops)
edit_distance: function(n) {
var words = ["kitten", "sitting", "saturday", "sunday", "intention", "execution"]
var i = 0
var j = 0
var total = 0
for (i = 0; i < n; i++) {
for (j = 0; j < length(words) - 1; j++) {
total += edit_distance(words[j], words[j + 1])
}
}
return total
},
// Upper/lower/trim chain
string_transforms: function(n) {
var src = " Hello World "
var i = 0
var x = 0
var result = null
for (i = 0; i < n; i++) {
result = trim(src)
result = upper(result)
result = lower(result)
x += length(result)
}
return x
},
// Starts_with / ends_with (interning path)
string_prefix_suffix: function(n) {
var strs = [
"application/json",
"text/html",
"image/png",
"application/xml",
"text/plain"
]
var i = 0
var j = 0
var count = 0
for (i = 0; i < n; i++) {
for (j = 0; j < length(strs); j++) {
if (starts_with(strs[j], "application/")) count++
if (ends_with(strs[j], "/json")) count++
if (starts_with(strs[j], "text/")) count++
}
}
return count
}
}

137
benches/tree_ops.cm Normal file
View File

@@ -0,0 +1,137 @@
// tree_ops.cm — Tree data structure operations kernel
// Pointer chasing, recursion, allocation patterns.
// Binary tree: create, walk, transform, check
function make_tree(depth) {
if (depth <= 0) return {val: 1, left: null, right: null}
return {
val: depth,
left: make_tree(depth - 1),
right: make_tree(depth - 1)
}
}
function tree_check(node) {
if (!node) return 0
if (!node.left) return node.val
return node.val + tree_check(node.left) - tree_check(node.right)
}
function tree_sum(node) {
if (!node) return 0
return node.val + tree_sum(node.left) + tree_sum(node.right)
}
function tree_depth(node) {
if (!node) return 0
var l = tree_depth(node.left)
var r = tree_depth(node.right)
return 1 + (l > r ? l : r)
}
function tree_count(node) {
if (!node) return 0
return 1 + tree_count(node.left) + tree_count(node.right)
}
// Transform tree: map values
function tree_map(node, fn) {
if (!node) return null
return {
val: fn(node.val),
left: tree_map(node.left, fn),
right: tree_map(node.right, fn)
}
}
// Flatten tree to array (in-order)
function tree_flatten(node, result) {
if (!node) return null
tree_flatten(node.left, result)
push(result, node.val)
tree_flatten(node.right, result)
return null
}
// Build sorted tree from array (balanced)
function build_balanced(arr, lo, hi) {
if (lo > hi) return null
var mid = floor((lo + hi) / 2)
return {
val: arr[mid],
left: build_balanced(arr, lo, mid - 1),
right: build_balanced(arr, mid + 1, hi)
}
}
// Find a value in BST
function bst_find(node, val) {
if (!node) return false
if (val == node.val) return true
if (val < node.val) return bst_find(node.left, val)
return bst_find(node.right, val)
}
return {
// Binary tree create + check (allocation heavy)
tree_create_check: function(n) {
var i = 0
var t = null
var x = 0
for (i = 0; i < n; i++) {
t = make_tree(10)
x += tree_check(t)
}
return x
},
// Deep tree traversals
tree_traversal: function(n) {
var t = make_tree(12)
var x = 0
var i = 0
for (i = 0; i < n; i++) {
x += tree_sum(t) + tree_depth(t) + tree_count(t)
}
return x
},
// Tree map (create new tree from old)
tree_transform: function(n) {
var t = make_tree(10)
var i = 0
var mapped = null
for (i = 0; i < n; i++) {
mapped = tree_map(t, function(v) { return v * 2 + 1 })
}
return mapped
},
// Flatten + rebuild (array <-> tree conversion)
tree_flatten_rebuild: function(n) {
var t = make_tree(10)
var i = 0
var flat = null
var rebuilt = null
for (i = 0; i < n; i++) {
flat = []
tree_flatten(t, flat)
rebuilt = build_balanced(flat, 0, length(flat) - 1)
}
return rebuilt
},
// BST search (pointer chasing)
bst_search: function(n) {
// Build a balanced BST of 1024 elements
var data = []
var i = 0
for (i = 0; i < 1024; i++) push(data, i)
var bst = build_balanced(data, 0, 1023)
var found = 0
for (i = 0; i < n; i++) {
if (bst_find(bst, i % 1024)) found++
}
return found
}
}

6200
boot/bootstrap.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

21056
boot/fold.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

24810
boot/mcode.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

29874
boot/parse.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

10436
boot/tokenize.cm.mcode Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -17,8 +17,16 @@ var target_package = null
var buildtype = 'release'
var force_rebuild = false
var dry_run = false
var i = 0
var targets = null
var t = 0
var resolved = null
var lib = null
var results = null
var success = 0
var failed = 0
for (var i = 0; i < length(args); i++) {
for (i = 0; i < length(args); i++) {
if (args[i] == '-t' || args[i] == '--target') {
if (i + 1 < length(args)) {
target = args[++i]
@@ -51,8 +59,8 @@ for (var i = 0; i < length(args); i++) {
dry_run = true
} else if (args[i] == '--list-targets') {
log.console('Available targets:')
var targets = build.list_targets()
for (var t = 0; t < length(targets); t++) {
targets = build.list_targets()
for (t = 0; t < length(targets); t++) {
log.console(' ' + targets[t])
}
$stop()
@@ -65,7 +73,7 @@ for (var i = 0; i < length(args); i++) {
// Resolve local paths to absolute paths
if (target_package) {
if (target_package == '.' || starts_with(target_package, './') || starts_with(target_package, '../') || fd.is_dir(target_package)) {
var resolved = fd.realpath(target_package)
resolved = fd.realpath(target_package)
if (resolved) {
target_package = resolved
}
@@ -91,33 +99,35 @@ arrfor(packages, function(package) {
shop.extract(package)
})
var _build = null
if (target_package) {
// Build single package
log.console('Building ' + target_package + '...')
try {
var lib = build.build_dynamic(target_package, target, buildtype)
_build = function() {
lib = build.build_dynamic(target_package, target, buildtype)
if (lib) {
log.console('Built: ' + lib)
}
} catch (e) {
log.error('Build failed: ' + e)
} disruption {
log.error('Build failed')
$stop()
}
_build()
} else {
// Build all packages
log.console('Building all packages...')
var results = build.build_all_dynamic(target, buildtype)
var success = 0
var failed = 0
for (var i = 0; i < length(results); i++) {
results = build.build_all_dynamic(target, buildtype)
success = 0
failed = 0
for (i = 0; i < length(results); i++) {
if (results[i].library) {
success++
} else if (results[i].error) {
failed++
}
}
log.console(`Build complete: ${success} libraries built${failed > 0 ? `, ${failed} failed` : ''}`)
}

480
build.cm
View File

@@ -85,7 +85,8 @@ function ensure_dir(path) {
if (fd.stat(path).isDirectory) return
var parts = array(path, '/')
var current = starts_with(path, '/') ? '/' : ''
for (var i = 0; i < length(parts); i++) {
var i = 0
for (i = 0; i < length(parts); i++) {
if (parts[i] == '') continue
current += parts[i] + '/'
if (!fd.stat(current).isDirectory) fd.mkdir(current)
@@ -100,12 +101,13 @@ Build.ensure_dir = ensure_dir
// Compile a single C file for a package
// Returns the object file path (content-addressed in .cell/build)
Build.compile_file = function(pkg, file, target, buildtype = 'release') {
Build.compile_file = function(pkg, file, target, buildtype) {
var _buildtype = buildtype || 'release'
var pkg_dir = shop.get_package_dir(pkg)
var src_path = pkg_dir + '/' + file
if (!fd.is_file(src_path)) {
throw Error('Source file not found: ' + src_path)
print('Source file not found: ' + src_path); disrupt
}
// Get flags (with sigil replacement)
@@ -120,11 +122,11 @@ Build.compile_file = function(pkg, file, target, buildtype = 'release') {
var cmd_parts = [cc, '-c', '-fPIC']
// Add buildtype-specific flags
if (buildtype == 'release') {
if (_buildtype == 'release') {
cmd_parts = array(cmd_parts, ['-O3', '-DNDEBUG'])
} else if (buildtype == 'debug') {
} else if (_buildtype == 'debug') {
cmd_parts = array(cmd_parts, ['-O2', '-g'])
} else if (buildtype == 'minsize') {
} else if (_buildtype == 'minsize') {
cmd_parts = array(cmd_parts, ['-Os', '-DNDEBUG'])
}
@@ -133,10 +135,11 @@ Build.compile_file = function(pkg, file, target, buildtype = 'release') {
// Add package CFLAGS (resolve relative -I paths)
arrfor(cflags, function(flag) {
if (starts_with(flag, '-I') && !starts_with(flag, '-I/')) {
flag = '-I"' + pkg_dir + '/' + text(flag, 2) + '"'
var f = flag
if (starts_with(f, '-I') && !starts_with(f, '-I/')) {
f = '-I"' + pkg_dir + '/' + text(f, 2) + '"'
}
push(cmd_parts, flag)
push(cmd_parts, f)
})
// Add target CFLAGS
@@ -167,7 +170,7 @@ Build.compile_file = function(pkg, file, target, buildtype = 'release') {
log.console('Compiling ' + file)
var ret = os.system(full_cmd)
if (ret != 0) {
throw Error('Compilation failed: ' + file)
print('Compilation failed: ' + file); disrupt
}
return obj_path
@@ -175,12 +178,14 @@ Build.compile_file = function(pkg, file, target, buildtype = 'release') {
// Build all C files for a package
// Returns array of object file paths
Build.build_package = function(pkg, target = Build.detect_host_target(), exclude_main, buildtype = 'release') {
var c_files = pkg_tools.get_c_files(pkg, target, exclude_main)
Build.build_package = function(pkg, target, exclude_main, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var c_files = pkg_tools.get_c_files(pkg, _target, exclude_main)
var objects = []
arrfor(c_files, function(file) {
var obj = Build.compile_file(pkg, file, target, buildtype)
var obj = Build.compile_file(pkg, file, _target, _buildtype)
push(objects, obj)
})
@@ -192,14 +197,14 @@ Build.build_package = function(pkg, target = Build.detect_host_target(), exclude
// ============================================================================
// Compute link key from all inputs that affect the dylib output
function compute_link_key(objects, ldflags, target_ldflags, target, cc) {
function compute_link_key(objects, ldflags, target_ldflags, opts) {
// Sort objects for deterministic hash
var sorted_objects = sort(objects)
// Build a string representing all link inputs
var parts = []
push(parts, 'target:' + target)
push(parts, 'cc:' + cc)
push(parts, 'target:' + opts.target)
push(parts, 'cc:' + opts.cc)
arrfor(sorted_objects, function(obj) {
// Object paths are content-addressed, so the path itself is the hash
push(parts, 'obj:' + obj)
@@ -214,74 +219,46 @@ function compute_link_key(objects, ldflags, target_ldflags, target, cc) {
return content_hash(text(parts, '\n'))
}
// Build a dynamic library for a package
// Output goes to .cell/lib/<package_name>.<ext>
// Dynamic libraries do NOT link against core; undefined symbols are resolved at dlopen time
// Uses content-addressed store + symlink for caching
Build.build_dynamic = function(pkg, target = Build.detect_host_target(), buildtype = 'release') {
var objects = Build.build_package(pkg, target, true, buildtype) // exclude main.c
// Build a per-module dynamic library for a single C file
// Returns the content-addressed dylib path in .cell/build/<hash>.<target>.dylib
Build.build_module_dylib = function(pkg, file, target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var obj = Build.compile_file(pkg, file, _target, _buildtype)
if (length(objects) == 0) {
log.console('No C files in ' + pkg)
return null
}
var lib_dir = shop.get_lib_dir()
var store_dir = lib_dir + '/store'
ensure_dir(lib_dir)
ensure_dir(store_dir)
var lib_name = shop.lib_name_for_package(pkg)
var dylib_ext = toolchains[target].system == 'windows' ? '.dll' : (toolchains[target].system == 'darwin' ? '.dylib' : '.so')
var stable_path = lib_dir + '/' + lib_name + dylib_ext
// Get link flags (with sigil replacement)
var ldflags = replace_sigils_array(pkg_tools.get_flags(pkg, 'LDFLAGS', target))
var target_ldflags = toolchains[target].c_link_args || []
var cc = toolchains[target].cpp || toolchains[target].c
var pkg_dir = shop.get_package_dir(pkg)
var tc = toolchains[_target]
var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so')
var cc = tc.cpp || tc.c
var local_dir = get_local_dir()
var tc = toolchains[target]
var pkg_dir = shop.get_package_dir(pkg)
// Resolve relative -L paths in ldflags for hash computation
// Get link flags
var ldflags = replace_sigils_array(pkg_tools.get_flags(pkg, 'LDFLAGS', _target))
var target_ldflags = tc.c_link_args || []
var resolved_ldflags = []
arrfor(ldflags, function(flag) {
if (starts_with(flag, '-L') && !starts_with(flag, '-L/')) {
flag = '-L"' + pkg_dir + '/' + text(flag, 2) + '"'
var f = flag
if (starts_with(f, '-L') && !starts_with(f, '-L/')) {
f = '-L"' + pkg_dir + '/' + text(f, 2) + '"'
}
push(resolved_ldflags, flag)
push(resolved_ldflags, f)
})
// Compute link key
var link_key = compute_link_key(objects, resolved_ldflags, target_ldflags, target, cc)
var store_path = store_dir + '/' + lib_name + '-' + link_key + dylib_ext
// Content-addressed output: hash of (object + link flags + target)
var link_key = compute_link_key([obj], resolved_ldflags, target_ldflags, {target: _target, cc: cc})
var build_dir = get_build_dir()
ensure_dir(build_dir)
var dylib_path = build_dir + '/' + link_key + '.' + _target + dylib_ext
// Check if already linked in store
if (fd.is_file(store_path)) {
// Ensure symlink points to the store file
if (fd.is_link(stable_path)) {
var current_target = fd.readlink(stable_path)
if (current_target == store_path) {
// Already up to date
return stable_path
}
fd.unlink(stable_path)
} else if (fd.is_file(stable_path)) {
fd.unlink(stable_path)
}
fd.symlink(store_path, stable_path)
return stable_path
}
if (fd.is_file(dylib_path))
return dylib_path
// Build link command
var cmd_parts = [cc, '-shared', '-fPIC']
// Platform-specific flags for undefined symbols (resolved at dlopen) and size optimization
if (tc.system == 'darwin') {
cmd_parts = array(cmd_parts, [
'-undefined', 'dynamic_lookup',
'-Wl,-dead_strip',
'-Wl,-install_name,' + stable_path,
'-Wl,-rpath,@loader_path/../local',
'-Wl,-rpath,' + local_dir
])
@@ -293,41 +270,53 @@ Build.build_dynamic = function(pkg, target = Build.detect_host_target(), buildty
'-Wl,-rpath,' + local_dir
])
} else if (tc.system == 'windows') {
// Windows DLLs: use --allow-shlib-undefined for mingw
push(cmd_parts, '-Wl,--allow-shlib-undefined')
}
// Add .cell/local to library search path
push(cmd_parts, '-L"' + local_dir + '"')
arrfor(objects, function(obj) {
push(cmd_parts, '"' + obj + '"')
})
// Do NOT link against core library - symbols resolved at dlopen time
push(cmd_parts, '"' + obj + '"')
cmd_parts = array(cmd_parts, resolved_ldflags)
cmd_parts = array(cmd_parts, target_ldflags)
push(cmd_parts, '-o')
push(cmd_parts, '"' + store_path + '"')
push(cmd_parts, '"' + dylib_path + '"')
var cmd_str = text(cmd_parts, ' ')
log.console('Linking ' + lib_name + dylib_ext)
log.console('Linking module ' + file + ' -> ' + fd.basename(dylib_path))
var ret = os.system(cmd_str)
if (ret != 0) {
throw Error('Linking failed: ' + pkg)
print('Linking failed: ' + file); disrupt
}
// Update symlink to point to the new store file
if (fd.is_link(stable_path)) {
fd.unlink(stable_path)
} else if (fd.is_file(stable_path)) {
fd.unlink(stable_path)
// Install to deterministic lib/<pkg>/<stem>.dylib
var file_stem = file
var install_dir = shop.get_lib_dir() + '/' + shop.lib_name_for_package(pkg)
var stem_dir = fd.dirname(file_stem)
if (stem_dir && stem_dir != '.') {
install_dir = install_dir + '/' + stem_dir
}
fd.symlink(store_path, stable_path)
ensure_dir(install_dir)
var install_path = shop.get_lib_dir() + '/' + shop.lib_name_for_package(pkg) + '/' + file_stem + dylib_ext
fd.slurpwrite(install_path, fd.slurp(dylib_path))
return stable_path
return dylib_path
}
// Build a dynamic library for a package (one dylib per C file)
// Returns array of {file, symbol, dylib} for each module
// Also writes a manifest mapping symbols to dylib paths
Build.build_dynamic = function(pkg, target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var c_files = pkg_tools.get_c_files(pkg, _target, true)
var results = []
arrfor(c_files, function(file) {
var sym_name = shop.c_symbol_for_file(pkg, file)
var dylib = Build.build_module_dylib(pkg, file, _target, _buildtype)
push(results, {file: file, symbol: sym_name, dylib: dylib})
})
return results
}
// ============================================================================
@@ -337,7 +326,9 @@ Build.build_dynamic = function(pkg, target = Build.detect_host_target(), buildty
// Build a static binary from multiple packages
// packages: array of package names
// output: output binary path
Build.build_static = function(packages, target = Build.detect_host_target(), output, buildtype = 'release') {
Build.build_static = function(packages, target, output, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var all_objects = []
var all_ldflags = []
var seen_flags = {}
@@ -347,14 +338,14 @@ Build.build_static = function(packages, target = Build.detect_host_target(), out
var is_core = (pkg == 'core')
// For core, include main.c; for others, exclude it
var objects = Build.build_package(pkg, target, !is_core, buildtype)
var objects = Build.build_package(pkg, _target, !is_core, _buildtype)
arrfor(objects, function(obj) {
push(all_objects, obj)
})
// Collect LDFLAGS (with sigil replacement)
var ldflags = replace_sigils_array(pkg_tools.get_flags(pkg, 'LDFLAGS', target))
var ldflags = replace_sigils_array(pkg_tools.get_flags(pkg, 'LDFLAGS', _target))
var pkg_dir = shop.get_package_dir(pkg)
// Deduplicate based on the entire LDFLAGS string for this package
@@ -362,28 +353,29 @@ Build.build_static = function(packages, target = Build.detect_host_target(), out
if (!seen_flags[ldflags_key]) {
seen_flags[ldflags_key] = true
arrfor(ldflags, function(flag) {
// Resolve relative -L paths
if (starts_with(flag, '-L') && !starts_with(flag, '-L/')) {
flag = '-L"' + pkg_dir + '/' + text(flag, 2) + '"'
var f = flag
if (starts_with(f, '-L') && !starts_with(f, '-L/')) {
f = '-L"' + pkg_dir + '/' + text(f, 2) + '"'
}
push(all_ldflags, flag)
push(all_ldflags, f)
})
}
})
if (length(all_objects) == 0) {
throw Error('No object files to link')
print('No object files to link'); disrupt
}
// Link
var cc = toolchains[target].c
var target_ldflags = toolchains[target].c_link_args || []
var exe_ext = toolchains[target].system == 'windows' ? '.exe' : ''
var cc = toolchains[_target].c
var target_ldflags = toolchains[_target].c_link_args || []
var exe_ext = toolchains[_target].system == 'windows' ? '.exe' : ''
if (!ends_with(output, exe_ext) && exe_ext) {
output = output + exe_ext
var out_path = output
if (!ends_with(out_path, exe_ext) && exe_ext) {
out_path = out_path + exe_ext
}
var cmd_parts = [cc]
arrfor(all_objects, function(obj) {
@@ -398,17 +390,240 @@ Build.build_static = function(packages, target = Build.detect_host_target(), out
push(cmd_parts, flag)
})
push(cmd_parts, '-o', '"' + output + '"')
push(cmd_parts, '-o', '"' + out_path + '"')
var cmd_str = text(cmd_parts, ' ')
log.console('Linking ' + output)
log.console('Linking ' + out_path)
var ret = os.system(cmd_str)
if (ret != 0) {
throw Error('Linking failed with command: ' + cmd_str)
print('Linking failed: ' + cmd_str); disrupt
}
log.console('Built ' + output)
log.console('Built ' + out_path)
return out_path
}
// ============================================================================
// Native .cm compilation (source → mcode → QBE IL → .o → .dylib)
// ============================================================================
// Post-process QBE IL: insert dead labels after ret/jmp (QBE requirement)
function qbe_insert_dead_labels(il_text) {
var lines = array(il_text, "\n")
var result = []
var dead_id = 0
var need_label = false
var i = 0
var line = null
var trimmed = null
while (i < length(lines)) {
line = lines[i]
trimmed = trim(line)
if (need_label && !starts_with(trimmed, '@') && !starts_with(trimmed, '}') && length(trimmed) > 0) {
push(result, "@_dead_" + text(dead_id))
dead_id = dead_id + 1
need_label = false
}
if (starts_with(trimmed, '@') || starts_with(trimmed, '}') || length(trimmed) == 0) {
need_label = false
}
if (starts_with(trimmed, 'ret ') || starts_with(trimmed, 'jmp ')) {
need_label = true
}
push(result, line)
i = i + 1
}
return text(result, "\n")
}
// Compile a .cm source file to a native .dylib via QBE
// Returns the content-addressed dylib path
Build.compile_native = function(src_path, target, buildtype, pkg) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var qbe_rt_path = null
var native_stem = null
var native_install_dir = null
var native_install_path = null
if (!fd.is_file(src_path)) {
print('Source file not found: ' + src_path); disrupt
}
var tc = toolchains[_target]
var dylib_ext = tc.system == 'windows' ? '.dll' : (tc.system == 'darwin' ? '.dylib' : '.so')
var cc = tc.c
// Step 1: Read source and compile through pipeline
var content = fd.slurp(src_path)
var src = text(content)
var tokenize = use('tokenize')
var parse = use('parse')
var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var qbe_macros = use('qbe')
var qbe_emit = use('qbe_emit')
var tok_result = tokenize(src, src_path)
var ast = parse(tok_result.tokens, src, src_path, tokenize)
var folded = fold(ast)
var compiled = mcode_mod(folded)
var optimized = streamline_mod(compiled)
// Step 2: Generate QBE IL
var sym_name = null
if (pkg) {
sym_name = shop.c_symbol_for_file(pkg, fd.basename(src_path))
}
var il = qbe_emit(optimized, qbe_macros, sym_name)
// Step 3: Post-process (insert dead labels)
il = qbe_insert_dead_labels(il)
// Content hash for cache key
var hash = content_hash(src + '\n' + _target + '\nnative')
var build_dir = get_build_dir()
ensure_dir(build_dir)
var dylib_path = build_dir + '/' + hash + '.' + _target + dylib_ext
if (fd.is_file(dylib_path))
return dylib_path
// Step 4: Write QBE IL to temp file
var tmp = '/tmp/cell_native_' + hash
var ssa_path = tmp + '.ssa'
var s_path = tmp + '.s'
var o_path = tmp + '.o'
var rt_o_path = '/tmp/cell_qbe_rt.o'
fd.slurpwrite(ssa_path, stone(blob(il)))
// Step 5: QBE compile to assembly
var rc = os.system('qbe -o ' + s_path + ' ' + ssa_path)
if (rc != 0) {
print('QBE compilation failed for: ' + src_path); disrupt
}
// Step 6: Assemble
rc = os.system(cc + ' -c ' + s_path + ' -o ' + o_path)
if (rc != 0) {
print('Assembly failed for: ' + src_path); disrupt
}
// Step 7: Compile QBE runtime stubs if needed
if (!fd.is_file(rt_o_path)) {
qbe_rt_path = shop.get_package_dir('core') + '/qbe_rt.c'
rc = os.system(cc + ' -c ' + qbe_rt_path + ' -o ' + rt_o_path + ' -fPIC')
if (rc != 0) {
print('QBE runtime stubs compilation failed'); disrupt
}
}
// Step 8: Link dylib
var link_cmd = cc + ' -shared -fPIC'
if (tc.system == 'darwin') {
link_cmd = link_cmd + ' -undefined dynamic_lookup'
} else if (tc.system == 'linux') {
link_cmd = link_cmd + ' -Wl,--allow-shlib-undefined'
}
link_cmd = link_cmd + ' ' + o_path + ' ' + rt_o_path + ' -o ' + dylib_path
rc = os.system(link_cmd)
if (rc != 0) {
print('Linking native dylib failed for: ' + src_path); disrupt
}
log.console('Built native: ' + fd.basename(dylib_path))
// Install to deterministic lib/<pkg>/<stem>.dylib
if (pkg) {
native_stem = fd.basename(src_path)
native_install_dir = shop.get_lib_dir() + '/' + shop.lib_name_for_package(pkg)
ensure_dir(native_install_dir)
native_install_path = native_install_dir + '/' + native_stem + dylib_ext
fd.slurpwrite(native_install_path, fd.slurp(dylib_path))
}
return dylib_path
}
// ============================================================================
// Module table generation (for static builds)
// ============================================================================
// Compile a .cm module to mach bytecode blob
// Returns the raw mach bytes as a blob
Build.compile_cm_to_mach = function(src_path) {
if (!fd.is_file(src_path)) {
print('Source file not found: ' + src_path); disrupt
}
var src = text(fd.slurp(src_path))
var tokenize = use('tokenize')
var parse = use('parse')
var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var json = use('json')
var tok_result = tokenize(src, src_path)
var ast = parse(tok_result.tokens, src, src_path, tokenize)
var folded = fold(ast)
var compiled = mcode_mod(folded)
var optimized = streamline_mod(compiled)
return mach_compile_mcode_bin(src_path, json.encode(optimized))
}
// Generate a module_table.c file that embeds mach bytecode for .cm modules
// modules: array of {name, src_path} — name is the module name, src_path is the .cm file
// output: path to write the generated .c file
Build.generate_module_table = function(modules, output) {
var lines = []
var json = use('json')
push(lines, '/* Generated module table — do not edit */')
push(lines, '#include <stddef.h>')
push(lines, '#include <string.h>')
push(lines, '')
push(lines, 'struct cell_embedded_entry {')
push(lines, ' const char *name;')
push(lines, ' const unsigned char *data;')
push(lines, ' size_t size;')
push(lines, '};')
push(lines, '')
var entries = []
arrfor(modules, function(mod) {
var safe = replace(replace(replace(mod.name, '/', '_'), '.', '_'), '-', '_')
var mach = Build.compile_cm_to_mach(mod.src_path)
var bytes = array(mach)
var hex = []
arrfor(bytes, function(b) {
push(hex, '0x' + text(b, 'h2'))
})
push(lines, 'static const unsigned char mod_' + safe + '_data[] = {')
push(lines, ' ' + text(hex, ', '))
push(lines, '};')
push(lines, '')
push(entries, safe)
log.console('Embedded: ' + mod.name + ' (' + text(length(bytes)) + ' bytes)')
})
// Lookup function
push(lines, 'const struct cell_embedded_entry *cell_embedded_module_lookup(const char *name) {')
arrfor(modules, function(mod, i) {
var safe = entries[i]
push(lines, ' if (strcmp(name, "' + mod.name + '") == 0) {')
push(lines, ' static const struct cell_embedded_entry e = {"' + mod.name + '", mod_' + safe + '_data, sizeof(mod_' + safe + '_data)};')
push(lines, ' return &e;')
push(lines, ' }')
})
push(lines, ' return (void *)0;')
push(lines, '}')
var c_text = text(lines, '\n')
fd.slurpwrite(output, stone(blob(c_text)))
log.console('Generated ' + output)
return output
}
@@ -417,38 +632,27 @@ Build.build_static = function(packages, target = Build.detect_host_target(), out
// ============================================================================
// Build dynamic libraries for all installed packages
Build.build_all_dynamic = function(target, buildtype = 'release') {
target = target || Build.detect_host_target()
Build.build_all_dynamic = function(target, buildtype) {
var _target = target || Build.detect_host_target()
var _buildtype = buildtype || 'release'
var packages = shop.list_packages()
var results = []
var core_mods = null
// Build core first
if (find(packages, 'core') != null) {
try {
var lib = Build.build_dynamic('core', target, buildtype)
push(results, { package: 'core', library: lib })
} catch (e) {
log.error('Failed to build core: ' + text(e))
push(results, { package: 'core', error: e })
}
if (find(packages, function(p) { return p == 'core' }) != null) {
core_mods = Build.build_dynamic('core', _target, _buildtype)
push(results, {package: 'core', modules: core_mods})
}
// Build other packages
arrfor(packages, function(pkg) {
if (pkg == 'core') return
try {
var lib = Build.build_dynamic(pkg, target, buildtype)
push(results, { package: pkg, library: lib })
} catch (e) {
log.error('Failed to build ' + pkg + ': ')
log.console(e.message)
log.console(e.stack)
push(results, { package: pkg, error: e })
}
var pkg_mods = Build.build_dynamic(pkg, _target, _buildtype)
push(results, {package: pkg, modules: pkg_mods})
})
return results
}

View File

@@ -22,55 +22,55 @@ function normalize_path(path) {
// Check if a file exists in a specific mount
function mount_exists(mount, path) {
var result = false
var _check = null
if (mount.type == 'zip') {
try {
_check = function() {
mount.handle.mod(path)
return true
} catch (e) {
return false
}
result = true
} disruption {}
_check()
} else if (mount.type == 'qop') {
try {
return mount.handle.stat(path) != null
} catch (e) {
return false
}
} else { // fs
_check = function() {
result = mount.handle.stat(path) != null
} disruption {}
_check()
} else {
var full_path = fd.join_paths(mount.source, path)
try {
_check = function() {
var st = fd.stat(full_path)
return st.isFile || st.isDirectory
} catch (e) {
return false
}
result = st.isFile || st.isDirectory
} disruption {}
_check()
}
return result
}
// Check if a path refers to a directory in a specific mount
function is_directory(path) {
var res = resolve(path)
var mount = res.mount
var result = false
var _check = null
if (mount.type == 'zip') {
try {
return mount.handle.is_directory(path);
} catch (e) {
return false;
}
_check = function() {
result = mount.handle.is_directory(path)
} disruption {}
_check()
} else if (mount.type == 'qop') {
try {
return mount.handle.is_directory(path);
} catch (e) {
return false;
}
} else { // fs
_check = function() {
result = mount.handle.is_directory(path)
} disruption {}
_check()
} else {
var full_path = fd.join_paths(mount.source, path)
try {
_check = function() {
var st = fd.stat(full_path)
return st.isDirectory
} catch (e) {
return false
}
result = st.isDirectory
} disruption {}
_check()
}
return result
}
// Resolve a path to a specific mount and relative path
@@ -102,7 +102,7 @@ function resolve(path, must_exist) {
}, false, true)
if (!mount) {
throw Error("Unknown mount point: @" + mount_name)
print("Unknown mount point: @" + mount_name); disrupt
}
return { mount: mount, path: rel_path }
@@ -122,7 +122,7 @@ function resolve(path, must_exist) {
}
if (must_exist) {
throw Error("File not found in any mount: " + path)
print("File not found in any mount: " + path); disrupt
}
}
@@ -144,12 +144,11 @@ function mount(source, name) {
} else if (st.isFile) {
var blob = fd.slurp(source)
// Try QOP first (it's likely faster to fail?) or Zip?
// QOP open checks magic.
var qop_archive = null
try {
qop_archive = qop.open(blob)
} catch(e) {}
var _try_qop = function() {
qop_archive = qop.open(blob)
} disruption {}
_try_qop()
if (qop_archive) {
mount_info.type = 'qop'
@@ -158,7 +157,7 @@ function mount(source, name) {
} else {
var zip = miniz.read(blob)
if (!is_object(zip) || !is_function(zip.count)) {
throw Error("Invalid archive file (not zip or qop): " + source)
print("Invalid archive file (not zip or qop): " + source); disrupt
}
mount_info.type = 'zip'
@@ -166,7 +165,7 @@ function mount(source, name) {
mount_info.zip_blob = blob // keep blob alive
}
} else {
throw Error("Unsupported mount source type: " + source)
print("Unsupported mount source type: " + source); disrupt
}
push(mounts, mount_info)
@@ -182,13 +181,13 @@ function unmount(name_or_source) {
// Read file
function slurp(path) {
var res = resolve(path, true)
if (!res) throw Error("File not found: " + path)
if (!res) { print("File not found: " + path); disrupt }
if (res.mount.type == 'zip') {
return res.mount.handle.slurp(res.path)
} else if (res.mount.type == 'qop') {
var data = res.mount.handle.read(res.path)
if (!data) throw Error("File not found in qop: " + path)
if (!data) { print("File not found in qop: " + path); disrupt }
return data
} else {
var full_path = fd.join_paths(res.mount.source, res.path)
@@ -217,8 +216,8 @@ function exists(path) {
// Stat
function stat(path) {
var res = resolve(path, true)
if (!res) throw Error("File not found: " + path)
if (!res) { print("File not found: " + path); disrupt }
if (res.mount.type == 'zip') {
var mod = res.mount.handle.mod(res.path)
return {
@@ -228,7 +227,7 @@ function stat(path) {
}
} else if (res.mount.type == 'qop') {
var s = res.mount.handle.stat(res.path)
if (!s) throw Error("File not found in qop: " + path)
if (!s) { print("File not found in qop: " + path); disrupt }
return {
filesize: s.size,
modtime: s.modtime,
@@ -261,7 +260,7 @@ function mount_package(name) {
var dir = shop.get_package_dir(name)
if (!dir) {
throw Error("Package not found: " + name)
print("Package not found: " + name); disrupt
}
mount(dir, name)
@@ -275,7 +274,7 @@ function match(str, pattern) {
function rm(path) {
var res = resolve(path, true)
if (res.mount.type != 'fs') throw Error("Cannot delete from non-fs mount")
if (res.mount.type != 'fs') { print("Cannot delete from non-fs mount"); disrupt }
var full_path = fd.join_paths(res.mount.source, res.path)
var st = fd.stat(full_path)

View File

@@ -23,8 +23,11 @@ var clean_build = false
var clean_fetch = false
var deep = false
var dry_run = false
var i = 0
var resolved = null
var deps = null
for (var i = 0; i < length(args); i++) {
for (i = 0; i < length(args); i++) {
if (args[i] == '--build') {
clean_build = true
} else if (args[i] == '--fetch') {
@@ -74,7 +77,7 @@ var is_world_scope = (scope == 'world')
if (!is_shop_scope && !is_world_scope) {
if (scope == '.' || starts_with(scope, './') || starts_with(scope, '../') || fd.is_dir(scope)) {
var resolved = fd.realpath(scope)
resolved = fd.realpath(scope)
if (resolved) {
scope = resolved
}
@@ -86,6 +89,7 @@ var dirs_to_delete = []
// Gather packages to clean
var packages_to_clean = []
var _gather = null
if (is_shop_scope) {
packages_to_clean = shop.list_packages()
@@ -97,14 +101,15 @@ if (is_shop_scope) {
push(packages_to_clean, scope)
if (deep) {
try {
var deps = pkg.gather_dependencies(scope)
_gather = function() {
deps = pkg.gather_dependencies(scope)
arrfor(deps, function(dep) {
push(packages_to_clean, dep)
})
} catch (e) {
} disruption {
// Skip if can't read dependencies
}
_gather()
}
}
@@ -168,6 +173,7 @@ if (clean_fetch) {
}
// Execute or report
var deleted_count = 0
if (dry_run) {
log.console("Would delete:")
if (length(files_to_delete) == 0 && length(dirs_to_delete) == 0) {
@@ -181,20 +187,19 @@ if (dry_run) {
})
}
} else {
var deleted_count = 0
arrfor(files_to_delete, function(f) {
try {
var _del = function() {
fd.unlink(f)
log.console("Deleted: " + f)
deleted_count++
} catch (e) {
log.error("Failed to delete " + f + ": " + e)
} disruption {
log.error("Failed to delete " + f)
}
_del()
})
arrfor(dirs_to_delete, function(d) {
try {
var _del = function() {
if (fd.is_link(d)) {
fd.unlink(d)
} else {
@@ -202,9 +207,10 @@ if (dry_run) {
}
log.console("Deleted: " + d)
deleted_count++
} catch (e) {
log.error("Failed to delete " + d + ": " + e)
} disruption {
log.error("Failed to delete " + d)
}
_del()
})
if (deleted_count == 0) {

View File

@@ -7,11 +7,14 @@ var fd = use('fd')
var http = use('http')
var miniz = use('miniz')
var resolved = null
var cwd = null
var parent = null
if (length(args) < 2) {
log.console("Usage: cell clone <origin> <path>")
log.console("Clones a cell package to a local path and links it.")
$stop()
return
}
var origin = args[0]
@@ -19,19 +22,19 @@ var target_path = args[1]
// Resolve target path to absolute
if (target_path == '.' || starts_with(target_path, './') || starts_with(target_path, '../')) {
var resolved = fd.realpath(target_path)
resolved = fd.realpath(target_path)
if (resolved) {
target_path = resolved
} else {
// Path doesn't exist yet, resolve relative to cwd
var cwd = fd.realpath('.')
cwd = fd.realpath('.')
if (target_path == '.') {
target_path = cwd
} else if (starts_with(target_path, './')) {
target_path = cwd + text(target_path, 1)
} else if (starts_with(target_path, '../')) {
// Go up one directory from cwd
var parent = fd.dirname(cwd)
parent = fd.dirname(cwd)
target_path = parent + text(target_path, 2)
}
}
@@ -41,7 +44,6 @@ if (target_path == '.' || starts_with(target_path, './') || starts_with(target_p
if (fd.is_dir(target_path)) {
log.console("Error: " + target_path + " already exists")
$stop()
return
}
log.console("Cloning " + origin + " to " + target_path + "...")
@@ -51,7 +53,6 @@ var info = shop.resolve_package_info(origin)
if (!info || info == 'local') {
log.console("Error: " + origin + " is not a remote package")
$stop()
return
}
// Update to get the commit hash
@@ -59,7 +60,6 @@ var update_result = shop.update(origin)
if (!update_result) {
log.console("Error: Could not fetch " + origin)
$stop()
return
}
// Fetch and extract to the target path
@@ -68,54 +68,61 @@ var entry = lock[origin]
if (!entry || !entry.commit) {
log.console("Error: No commit found for " + origin)
$stop()
return
}
var download_url = shop.get_download_url(origin, entry.commit)
log.console("Downloading from " + download_url)
try {
var zip_blob = http.fetch(download_url)
var zip_blob = null
var zip = null
var count = 0
var i = 0
var filename = null
var first_slash = null
var rel_path = null
var full_path = null
var dir_path = null
var _clone = function() {
zip_blob = http.fetch(download_url)
// Extract zip to target path
var zip = miniz.read(zip_blob)
zip = miniz.read(zip_blob)
if (!zip) {
log.console("Error: Failed to read zip archive")
$stop()
return
}
// Create target directory
fd.mkdir(target_path)
var count = zip.count()
for (var i = 0; i < count; i++) {
count = zip.count()
for (i = 0; i < count; i++) {
if (zip.is_directory(i)) continue
var filename = zip.get_filename(i)
var first_slash = search(filename, '/')
filename = zip.get_filename(i)
first_slash = search(filename, '/')
if (first_slash == null) continue
if (first_slash + 1 >= length(filename)) continue
var rel_path = text(filename, first_slash + 1)
var full_path = target_path + '/' + rel_path
var dir_path = fd.dirname(full_path)
rel_path = text(filename, first_slash + 1)
full_path = target_path + '/' + rel_path
dir_path = fd.dirname(full_path)
// Ensure directory exists
if (!fd.is_dir(dir_path)) {
fd.mkdir(dir_path)
}
fd.slurpwrite(full_path, zip.slurp(filename))
}
log.console("Extracted to " + target_path)
// Link the origin to the cloned path
link.add(origin, target_path, shop)
log.console("Linked " + origin + " -> " + target_path)
} catch (e) {
log.console("Error: " + e.message)
if (e.stack) log.console(e.stack)
} disruption {
log.console("Error during clone")
}
_clone()
$stop()

92
compare_aot.ce Normal file
View File

@@ -0,0 +1,92 @@
// compare_aot.ce — compile a .cm module via both paths and compare results
//
// Usage:
// cell --dev compare_aot.ce <module.cm>
var build = use('build')
var fd_mod = use('fd')
var os = use('os')
var json = use('json')
var show = function(v) {
return json.encode(v)
}
if (length(args) < 1) {
print('usage: cell --dev compare_aot.ce <module.cm>')
return
}
var file = args[0]
if (!fd_mod.is_file(file)) {
if (!ends_with(file, '.cm') && fd_mod.is_file(file + '.cm'))
file = file + '.cm'
else {
print('file not found: ' + file)
return
}
}
var abs = fd_mod.realpath(file)
// Shared compilation front-end
var tokenize = use('tokenize')
var parse_mod = use('parse')
var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var src = text(fd_mod.slurp(abs))
var tok = tokenize(src, abs)
var ast = parse_mod(tok.tokens, src, abs, tokenize)
var folded = fold(ast)
var compiled = mcode_mod(folded)
var optimized = streamline_mod(compiled)
// --- Interpreted (mach VM) ---
print('--- interpreted ---')
var mcode_json = json.encode(optimized)
var mach_blob = mach_compile_mcode_bin(abs, mcode_json)
var result_interp = mach_load(mach_blob, stone({}))
print('result: ' + show(result_interp))
// --- Native (AOT via QBE) ---
print('\n--- native ---')
var dylib_path = build.compile_native(abs, null, null, null)
print('dylib: ' + dylib_path)
var handle = os.dylib_open(dylib_path)
if (!handle) {
print('failed to open dylib')
return
}
// Build env with runtime functions. Must include starts_with etc. because
// the GC can lose global object properties after compaction.
var env = stone({
logical: logical,
some: some,
every: every,
starts_with: starts_with,
ends_with: ends_with,
log: log,
fallback: fallback,
parallel: parallel,
race: race,
sequence: sequence
})
var result_native = os.native_module_load(handle, env)
print('result: ' + show(result_native))
// --- Comparison ---
print('\n--- comparison ---')
var s_interp = show(result_interp)
var s_native = show(result_native)
if (s_interp == s_native) {
print('MATCH')
} else {
print('MISMATCH')
print(' interp: ' + s_interp)
print(' native: ' + s_native)
}

27
compile.ce Normal file
View File

@@ -0,0 +1,27 @@
// compile.ce — compile a .cm or .ce file to native .dylib via QBE
//
// Usage:
// cell compile <file.cm|file.ce>
//
// Installs the dylib to .cell/lib/<pkg>/<stem>.dylib
var shop = use('internal/shop')
var build = use('build')
var fd = use('fd')
if (length(args) < 1) {
print('usage: cell compile <file.cm|file.ce>')
return
}
var file = args[0]
if (!fd.is_file(file)) {
print('file not found: ' + file)
return
}
var abs = fd.realpath(file)
var file_info = shop.file_info(abs)
var pkg = file_info.package
build.compile_native(abs, null, null, pkg)

98
compile_seed.ce Normal file
View File

@@ -0,0 +1,98 @@
// compile_seed.ce — compile a .cm module to native .dylib via QBE (seed mode)
// Usage: ./cell --dev --seed compile_seed <file.cm>
var fd = use("fd")
var os = use("os")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
var qbe_macros = use("qbe")
var qbe_emit = use("qbe_emit")
if (length(args) < 1) {
print("usage: cell --dev --seed compile_seed <file.cm>")
disrupt
}
var file = args[0]
var base = file
if (ends_with(base, ".cm")) {
base = text(base, 0, length(base) - 3)
} else if (ends_with(base, ".ce")) {
base = text(base, 0, length(base) - 3)
}
var safe = replace(replace(replace(base, "/", "_"), "-", "_"), ".", "_")
var symbol = "js_" + safe + "_use"
var tmp = "/tmp/qbe_" + safe
var ssa_path = tmp + ".ssa"
var s_path = tmp + ".s"
var o_path = tmp + ".o"
var rt_o_path = "/tmp/qbe_rt.o"
var dylib_path = file + ".dylib"
var rc = 0
// Step 1: compile to QBE IL
print("compiling " + file + " to QBE IL...")
var src = text(fd.slurp(file))
var result = tokenize(src, file)
var ast = parse(result.tokens, src, file, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
var il = qbe_emit(optimized, qbe_macros)
// Step 2: append wrapper function
var wrapper = `
export function l $${symbol}(l %ctx) {
@entry
%result =l call $cell_rt_module_entry(l %ctx)
ret %result
}
`
il = il + wrapper
// Write IL to file — remove old file first to avoid leftover content
if (fd.is_file(ssa_path)) fd.unlink(ssa_path)
var out_fd = fd.open(ssa_path, 1537, 420)
fd.write(out_fd, il)
fd.close(out_fd)
print("wrote " + ssa_path + " (" + text(length(il)) + " bytes)")
// Step 3: compile QBE IL to assembly
print("qbe compile...")
rc = os.system("qbe -o " + s_path + " " + ssa_path)
if (rc != 0) {
print("qbe compilation failed")
disrupt
}
// Step 4: assemble
print("assemble...")
rc = os.system("cc -c " + s_path + " -o " + o_path)
if (rc != 0) {
print("assembly failed")
disrupt
}
// Step 5: compile runtime stubs
if (!fd.is_file(rt_o_path)) {
print("compile runtime stubs...")
rc = os.system("cc -c source/qbe_helpers.c -o " + rt_o_path + " -fPIC -Isource")
if (rc != 0) {
print("runtime stubs compilation failed")
disrupt
}
}
// Step 6: link dylib
print("link...")
rc = os.system("cc -shared -fPIC -undefined dynamic_lookup " + o_path + " " + rt_o_path + " -o " + dylib_path)
if (rc != 0) {
print("linking failed")
disrupt
}
print("built: " + dylib_path)

261
config.ce
View File

@@ -47,8 +47,10 @@ function get_nested(obj, path) {
// Set a value in nested object using path
function set_nested(obj, path, value) {
var current = obj
for (var i = 0; i < length(path) - 1; i++) {
var segment = path[i]
var i = 0
var segment = null
for (i = 0; i < length(path) - 1; i++) {
segment = path[i]
if (is_null(current[segment]) || !is_object(current[segment])) {
current[segment] = {}
}
@@ -59,15 +61,17 @@ function set_nested(obj, path, value) {
// Parse value string into appropriate type
function parse_value(str) {
var num_str = null
var n = null
// Boolean
if (str == 'true') return true
if (str == 'false') return false
// Number (including underscores)
var num_str = replace(str, /_/g, '')
if (/^-?\d+$/.test(num_str)) return parseInt(num_str)
if (/^-?\d*\.\d+$/.test(num_str)) return parseFloat(num_str)
// Number
num_str = replace(str, /_/g, '')
n = number(num_str)
if (n != null) return n
// String
return str
}
@@ -75,22 +79,19 @@ function parse_value(str) {
// Format value for display
function format_value(val) {
if (is_text(val)) return '"' + val + '"'
if (is_number(val) && val >= 1000) {
// Add underscores to large numbers
return replace(val.toString(), /\B(?=(\d{3})+(?!\d))/g, '_')
}
return text(val)
}
// Print configuration tree recursively
function print_config(obj, prefix = '') {
function print_config(obj, pfx) {
var p = pfx || ''
arrfor(array(obj), function(key) {
var val = obj[key]
var full_key = prefix ? prefix + '.' + key : key
var full_key = p ? p + '.' + key : key
if (is_object(val))
print_config(val, full_key)
else
else if (!is_null(val))
log.console(full_key + ' = ' + format_value(val))
})
}
@@ -99,151 +100,123 @@ function print_config(obj, prefix = '') {
if (length(args) == 0) {
print_help()
$stop()
return
}
var config = pkg.load_config()
if (!config) {
log.error("Failed to load cell.toml")
$stop()
return
}
var command = args[0]
var key
var path
var value
var key = null
var path = null
var value = null
var value_str = null
var valid_system_keys = null
var actor_name = null
var actor_cmd = null
switch (command) {
case 'help':
case '-h':
case '--help':
print_help()
break
case 'list':
log.console("# Cell Configuration")
log.console("")
print_config(config)
break
case 'get':
if (length(args) < 2) {
log.error("Usage: cell config get <key>")
if (command == 'help' || command == '-h' || command == '--help') {
print_help()
} else if (command == 'list') {
log.console("# Cell Configuration")
log.console("")
print_config(config)
} else if (command == 'get') {
if (length(args) < 2) {
log.error("Usage: cell config get <key>")
$stop()
}
key = args[1]
path = parse_key(key)
value = get_nested(config, path)
if (value == null) {
log.error("Key not found: " + key)
} else if (is_object(value)) {
print_config(value, key)
} else {
log.console(key + ' = ' + format_value(value))
}
} else if (command == 'set') {
if (length(args) < 3) {
log.error("Usage: cell config set <key> <value>")
$stop()
}
key = args[1]
value_str = args[2]
path = parse_key(key)
value = parse_value(value_str)
if (path[0] == 'system') {
valid_system_keys = [
'ar_timer', 'actor_memory', 'net_service',
'reply_timeout', 'actor_max', 'stack_max'
]
if (find(valid_system_keys, path[1]) == null) {
log.error("Invalid system key. Valid keys: " + text(valid_system_keys, ', '))
$stop()
return
}
key = args[1]
path = parse_key(key)
value = get_nested(config, path)
if (value == null) {
log.error("Key not found: " + key)
} else if (isa(value, object)) {
// Print all nested values
print_config(value, key)
}
set_nested(config, path, value)
pkg.save_config(config)
log.console("Set " + key + " = " + format_value(value))
} else if (command == 'actor') {
if (length(args) < 3) {
log.error("Usage: cell config actor <name> <command> [options]")
$stop()
}
actor_name = args[1]
actor_cmd = args[2]
config.actors = config.actors || {}
config.actors[actor_name] = config.actors[actor_name] || {}
if (actor_cmd == 'list') {
if (length(array(config.actors[actor_name])) == 0) {
log.console("No configuration for actor: " + actor_name)
} else {
log.console(key + ' = ' + format_value(value))
log.console("# Configuration for actor: " + actor_name)
log.console("")
print_config(config.actors[actor_name], 'actors.' + actor_name)
}
break
case 'set':
if (length(args) < 3) {
log.error("Usage: cell config set <key> <value>")
} else if (actor_cmd == 'get') {
if (length(args) < 4) {
log.error("Usage: cell config actor <name> get <key>")
$stop()
return
}
var key = args[1]
var value_str = args[2]
var path = parse_key(key)
var value = parse_value(value_str)
// Validate system keys
if (path[0] == 'system') {
var valid_system_keys = [
'ar_timer', 'actor_memory', 'net_service',
'reply_timeout', 'actor_max', 'stack_max'
]
if (find(valid_system_keys, path[1]) == null) {
log.error("Invalid system key. Valid keys: " + text(valid_system_keys, ', '))
$stop()
return
}
key = args[3]
path = parse_key(key)
value = get_nested(config.actors[actor_name], path)
if (value == null) {
log.error("Key not found for actor " + actor_name + ": " + key)
} else {
log.console('actors.' + actor_name + '.' + key + ' = ' + format_value(value))
}
set_nested(config, path, value)
} else if (actor_cmd == 'set') {
if (length(args) < 5) {
log.error("Usage: cell config actor <name> set <key> <value>")
$stop()
}
key = args[3]
value_str = args[4]
path = parse_key(key)
value = parse_value(value_str)
set_nested(config.actors[actor_name], path, value)
pkg.save_config(config)
log.console("Set " + key + " = " + format_value(value))
break
case 'actor':
// Handle actor-specific configuration
if (length(args) < 3) {
log.error("Usage: cell config actor <name> <command> [options]")
$stop()
return
}
var actor_name = args[1]
var actor_cmd = args[2]
// Initialize actors section if needed
config.actors = config.actors || {}
config.actors[actor_name] = config.actors[actor_name] || {}
switch (actor_cmd) {
case 'list':
if (length(array(config.actors[actor_name])) == 0) {
log.console("No configuration for actor: " + actor_name)
} else {
log.console("# Configuration for actor: " + actor_name)
log.console("")
print_config(config.actors[actor_name], 'actors.' + actor_name)
}
break
case 'get':
if (length(args) < 4) {
log.error("Usage: cell config actor <name> get <key>")
$stop()
return
}
key = args[3]
path = parse_key(key)
value = get_nested(config.actors[actor_name], path)
if (value == null) {
log.error("Key not found for actor " + actor_name + ": " + key)
} else {
log.console('actors.' + actor_name + '.' + key + ' = ' + format_value(value))
}
break
case 'set':
if (length(args) < 5) {
log.error("Usage: cell config actor <name> set <key> <value>")
$stop()
return
}
key = args[3]
var value_str = args[4]
path = parse_key(key)
value = parse_value(value_str)
set_nested(config.actors[actor_name], path, value)
pkg.save_config(config)
log.console("Set actors." + actor_name + "." + key + " = " + format_value(value))
break
default:
log.error("Unknown actor command: " + actor_cmd)
log.console("Valid commands: list, get, set")
}
break
default:
log.error("Unknown command: " + command)
print_help()
log.console("Set actors." + actor_name + "." + key + " = " + format_value(value))
} else {
log.error("Unknown actor command: " + actor_cmd)
log.console("Valid commands: list, get, set")
}
} else {
log.error("Unknown command: " + command)
print_help()
}
$stop()
$stop()

View File

@@ -238,9 +238,10 @@ static const JSCFunctionListEntry js_crypto_funcs[] = {
JS_CFUNC_DEF("unlock", 3, js_crypto_unlock),
};
JSValue js_crypto_use(JSContext *js)
JSValue js_core_crypto_use(JSContext *js)
{
JSValue obj = JS_NewObject(js);
JS_SetPropertyFunctionList(js, obj, js_crypto_funcs, sizeof(js_crypto_funcs)/sizeof(js_crypto_funcs[0]));
return obj;
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_crypto_funcs, sizeof(js_crypto_funcs)/sizeof(js_crypto_funcs[0]));
JS_RETURN(mod.val);
}

View File

@@ -1,27 +1,15 @@
#include "cell.h"
// Return the current stack depth.
JSC_CCALL(debug_stack_depth, return number2js(js,js_debugger_stack_depth(js)))
// TODO: Reimplement stack depth for register VM
JSC_CCALL(debug_stack_depth, return number2js(js, 0))
// Return a backtrace of the current call stack.
JSC_CCALL(debug_build_backtrace, return js_debugger_build_backtrace(js,NULL))
// Return the closure variables for a given function.
JSC_CCALL(debug_closure_vars, return js_debugger_closure_variables(js,argv[0]))
JSC_CCALL(debug_set_closure_var,
js_debugger_set_closure_variable(js,argv[0],argv[1],argv[2]);
return JS_NULL;
)
// Return the local variables for a specific stack frame.
JSC_CCALL(debug_local_vars, return js_debugger_local_variables(js, js2number(js,argv[0])))
// Return metadata about a given function.
JSC_CCALL(debug_fn_info, return js_debugger_fn_info(js, argv[0]))
// Return an array of functions in the current backtrace.
JSC_CCALL(debug_backtrace_fns, return js_debugger_backtrace_fns(js,NULL))
// TODO: Reimplement debug introspection for register VM
JSC_CCALL(debug_build_backtrace, return JS_NewArray(js))
JSC_CCALL(debug_closure_vars, return JS_NewObject(js))
JSC_CCALL(debug_set_closure_var, return JS_NULL;)
JSC_CCALL(debug_local_vars, return JS_NewObject(js))
JSC_CCALL(debug_fn_info, return JS_NewObject(js))
JSC_CCALL(debug_backtrace_fns, return JS_NewArray(js))
static const JSCFunctionListEntry js_debug_funcs[] = {
MIST_FUNC_DEF(debug, stack_depth, 0),
@@ -33,8 +21,9 @@ static const JSCFunctionListEntry js_debug_funcs[] = {
MIST_FUNC_DEF(debug, backtrace_fns,0),
};
JSValue js_debug_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js,mod,js_debug_funcs,countof(js_debug_funcs));
return mod;
}
JSValue js_core_debug_use(JSContext *js) {
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_debug_funcs, countof(js_debug_funcs));
JS_RETURN(mod.val);
}

View File

@@ -1,108 +1,28 @@
#include "cell.h"
JSC_CCALL(os_mem_limit, JS_SetMemoryLimit(JS_GetRuntime(js), js2number(js,argv[0])))
JSC_CCALL(os_max_stacksize, JS_SetMaxStackSize(JS_GetRuntime(js), js2number(js,argv[0])))
JSC_CCALL(os_max_stacksize, JS_SetMaxStackSize(js, js2number(js,argv[0])))
// Compute the approximate size of a single JS value in memory.
// TODO: Reimplement memory usage reporting for new allocator
JSC_CCALL(os_calc_mem,
JSMemoryUsage mu;
JS_ComputeMemoryUsage(JS_GetRuntime(js),&mu);
ret = JS_NewObject(js);
JS_SetPropertyStr(js,ret,"malloc_size",number2js(js,mu.malloc_size));
JS_SetPropertyStr(js,ret,"malloc_limit",number2js(js,mu.malloc_limit));
JS_SetPropertyStr(js,ret,"memory_used_size",number2js(js,mu.memory_used_size));
JS_SetPropertyStr(js,ret,"malloc_count",number2js(js,mu.malloc_count));
JS_SetPropertyStr(js,ret,"memory_used_count",number2js(js,mu.memory_used_count));
/* atom_count and atom_size removed - atoms are now just strings */
JS_SetPropertyStr(js,ret,"str_count",number2js(js,mu.str_count));
JS_SetPropertyStr(js,ret,"str_size",number2js(js,mu.str_size));
JS_SetPropertyStr(js,ret,"obj_count",number2js(js,mu.obj_count));
JS_SetPropertyStr(js,ret,"obj_size",number2js(js,mu.obj_size));
JS_SetPropertyStr(js,ret,"prop_count",number2js(js,mu.prop_count));
JS_SetPropertyStr(js,ret,"prop_size",number2js(js,mu.prop_size));
JS_SetPropertyStr(js,ret,"shape_count",number2js(js,mu.shape_count));
JS_SetPropertyStr(js,ret,"shape_size",number2js(js,mu.shape_size));
JS_SetPropertyStr(js,ret,"js_func_count",number2js(js,mu.js_func_count));
JS_SetPropertyStr(js,ret,"js_func_size",number2js(js,mu.js_func_size));
JS_SetPropertyStr(js,ret,"js_func_code_size",number2js(js,mu.js_func_code_size));
JS_SetPropertyStr(js,ret,"js_func_pc2line_count",number2js(js,mu.js_func_pc2line_count));
JS_SetPropertyStr(js,ret,"js_func_pc2line_size",number2js(js,mu.js_func_pc2line_size));
JS_SetPropertyStr(js,ret,"c_func_count",number2js(js,mu.c_func_count));
JS_SetPropertyStr(js,ret,"array_count",number2js(js,mu.array_count));
JS_SetPropertyStr(js,ret,"fast_array_count",number2js(js,mu.fast_array_count));
JS_SetPropertyStr(js,ret,"fast_array_elements",number2js(js,mu.fast_array_elements));
JS_SetPropertyStr(js,ret,"binary_object_count",number2js(js,mu.binary_object_count));
JS_SetPropertyStr(js,ret,"binary_object_size",number2js(js,mu.binary_object_size));
)
// Evaluate a string of JavaScript code in the current QuickJS context.
JSC_SSCALL(os_eval,
if (!str2) return JS_ThrowReferenceError(js, "Second argument should be the script.");
if (!str) return JS_ThrowReferenceError(js, "First argument should be the name of the script.");
JSValue bytecode = JS_Compile(js, str2, strlen(str2), str);
if (JS_IsException(bytecode)) return bytecode;
ret = JS_Integrate(js, bytecode, JS_NULL);
)
// Compile a string of JavaScript code into a function object.
JSC_SSCALL(js_compile,
if (!str2) return JS_ThrowReferenceError(js, "Second argument should be the script.");
if (!str) return JS_ThrowReferenceError(js, "First argument should be the name of the script.");
ret = JS_Compile(js, str2, strlen(str2), str);
)
// Link compiled bytecode with environment and execute.
JSC_CCALL(js_integrate,
JSValue env = (argc > 1 && !JS_IsNull(argv[1])) ? argv[1] : JS_NULL;
ret = JS_Integrate(js, argv[0], env);
)
// Compile a function object into a bytecode blob.
JSC_CCALL(js_compile_blob,
size_t size;
uint8_t *data = JS_WriteObject(js, &size, argv[0], JS_WRITE_OBJ_BYTECODE);
if (!data) {
return JS_ThrowInternalError(js, "Failed to serialize bytecode");
}
ret = js_new_blob_stoned_copy(js, data, size);
js_free(js, data);
)
// Compile a bytecode blob into a function object.
JSC_CCALL(js_compile_unblob,
size_t size;
void *data = js_get_blob_data(js, &size, argv[0]);
if (data == -1) return JS_EXCEPTION;
if (!data) return JS_ThrowReferenceError(js, "No data present in blob.");
return JS_ReadObject(js, data, size, JS_READ_OBJ_BYTECODE);
)
// Disassemble a function object into a string.
JSC_CCALL(js_disassemble,
return js_debugger_fn_bytecode(js, argv[0]);
)
// Return metadata about a given function.
JSC_CCALL(js_fn_info,
return js_debugger_fn_info(js, argv[0]);
)
// TODO: Reimplement for register VM
JSC_CCALL(js_disassemble, return JS_NewArray(js);)
JSC_CCALL(js_fn_info, return JS_NewObject(js);)
static const JSCFunctionListEntry js_js_funcs[] = {
MIST_FUNC_DEF(os, calc_mem, 0),
MIST_FUNC_DEF(os, mem_limit, 1),
MIST_FUNC_DEF(os, max_stacksize, 1),
MIST_FUNC_DEF(os, eval, 2),
MIST_FUNC_DEF(js, compile, 2),
MIST_FUNC_DEF(js, integrate, 2),
MIST_FUNC_DEF(js, compile_blob, 1),
MIST_FUNC_DEF(js, compile_unblob, 1),
MIST_FUNC_DEF(js, disassemble, 1),
MIST_FUNC_DEF(js, fn_info, 1),
};
JSValue js_js_use(JSContext *js) {
JSValue mod = JS_NewObject(js);
JS_SetPropertyFunctionList(js,mod,js_js_funcs,countof(js_js_funcs));
return mod;
}
JSValue js_core_js_use(JSContext *js) {
JS_FRAME(js);
JS_ROOT(mod, JS_NewObject(js));
JS_SetPropertyFunctionList(js, mod.val, js_js_funcs, countof(js_js_funcs));
JS_RETURN(mod.val);
}

264
diff.ce Normal file
View File

@@ -0,0 +1,264 @@
// diff.ce — differential testing: run tests optimized vs unoptimized, compare results
//
// Usage:
// cell diff - diff all test files in current package
// cell diff suite - diff a specific test file (tests/suite.cm)
// cell diff tests/foo - diff a specific test file by path
var shop = use('internal/shop')
var pkg = use('package')
var fd = use('fd')
var time = use('time')
var _args = args == null ? [] : args
var analyze = use('os').analyze
var run_ast_fn = use('os').run_ast_fn
var run_ast_noopt_fn = use('os').run_ast_noopt_fn
if (!run_ast_noopt_fn) {
log.console("error: run_ast_noopt_fn not available (rebuild bootstrap)")
$stop()
return
}
// Parse arguments: diff [test_path]
var target_test = null
if (length(_args) > 0) {
target_test = _args[0]
}
function is_valid_package(dir) {
var _dir = dir == null ? '.' : dir
return fd.is_file(_dir + '/cell.toml')
}
if (!is_valid_package('.')) {
log.console('No cell.toml found in current directory')
$stop()
return
}
// Collect test files
function collect_tests(specific_test) {
var files = pkg.list_files(null)
var test_files = []
var i = 0
var f = null
var test_name = null
var match_name = null
var match_base = null
for (i = 0; i < length(files); i++) {
f = files[i]
if (starts_with(f, "tests/") && ends_with(f, ".cm")) {
if (specific_test) {
test_name = text(f, 0, -3)
match_name = specific_test
if (!starts_with(match_name, 'tests/')) match_name = 'tests/' + match_name
match_base = ends_with(match_name, '.cm') ? text(match_name, 0, -3) : match_name
if (test_name != match_base) continue
}
push(test_files, f)
}
}
return test_files
}
// Deep comparison of two values
function values_equal(a, b) {
var i = 0
var ka = null
var kb = null
if (a == b) return true
if (is_null(a) && is_null(b)) return true
if (is_null(a) || is_null(b)) return false
if (is_array(a) && is_array(b)) {
if (length(a) != length(b)) return false
i = 0
while (i < length(a)) {
if (!values_equal(a[i], b[i])) return false
i = i + 1
}
return true
}
if (is_object(a) && is_object(b)) {
ka = array(a)
kb = array(b)
if (length(ka) != length(kb)) return false
i = 0
while (i < length(ka)) {
if (!values_equal(a[ka[i]], b[ka[i]])) return false
i = i + 1
}
return true
}
return false
}
function describe(val) {
if (is_null(val)) return "null"
if (is_text(val)) return `"${val}"`
if (is_number(val)) return text(val)
if (is_logical(val)) return text(val)
if (is_function(val)) return "<function>"
if (is_array(val)) return `[array length=${text(length(val))}]`
if (is_object(val)) return `{record keys=${text(length(array(val)))}}`
return "<unknown>"
}
// Run a single test file through both paths
function diff_test_file(file_path) {
var mod_path = text(file_path, 0, -3)
var src_path = fd.realpath('.') + '/' + file_path
var src = null
var ast = null
var mod_opt = null
var mod_noopt = null
var results = {file: file_path, tests: [], passed: 0, failed: 0, errors: []}
var use_pkg = fd.realpath('.')
var opt_error = null
var noopt_error = null
var keys = null
var i = 0
var k = null
var opt_result = null
var noopt_result = null
var opt_err = null
var noopt_err = null
var _run_one_opt = null
var _run_one_noopt = null
// Build env for module loading
var make_env = function() {
return stone({
use: function(path) {
return shop.use(path, use_pkg)
}
})
}
// Read and parse
var _read = function() {
src = text(fd.slurp(src_path))
ast = analyze(src, src_path)
} disruption {
push(results.errors, `failed to parse ${file_path}`)
return results
}
_read()
if (length(results.errors) > 0) return results
// Run optimized
var _run_opt = function() {
mod_opt = run_ast_fn(mod_path, ast, make_env())
} disruption {
opt_error = "disrupted"
}
_run_opt()
// Run unoptimized
var _run_noopt = function() {
mod_noopt = run_ast_noopt_fn(mod_path, ast, make_env())
} disruption {
noopt_error = "disrupted"
}
_run_noopt()
// Compare module-level behavior
if (opt_error != noopt_error) {
push(results.errors, `module load mismatch: opt=${opt_error != null ? opt_error : "ok"} noopt=${noopt_error != null ? noopt_error : "ok"}`)
results.failed = results.failed + 1
return results
}
if (opt_error != null) {
// Both disrupted during load — that's consistent
results.passed = results.passed + 1
push(results.tests, {name: "<module>", status: "passed"})
return results
}
// If module returns a record of functions, test each one
if (is_object(mod_opt) && is_object(mod_noopt)) {
keys = array(mod_opt)
while (i < length(keys)) {
k = keys[i]
if (is_function(mod_opt[k]) && is_function(mod_noopt[k])) {
opt_result = null
noopt_result = null
opt_err = null
noopt_err = null
_run_one_opt = function() {
opt_result = mod_opt[k]()
} disruption {
opt_err = "disrupted"
}
_run_one_opt()
_run_one_noopt = function() {
noopt_result = mod_noopt[k]()
} disruption {
noopt_err = "disrupted"
}
_run_one_noopt()
if (opt_err != noopt_err) {
push(results.tests, {name: k, status: "failed"})
push(results.errors, `${k}: disruption mismatch opt=${opt_err != null ? opt_err : "ok"} noopt=${noopt_err != null ? noopt_err : "ok"}`)
results.failed = results.failed + 1
} else if (!values_equal(opt_result, noopt_result)) {
push(results.tests, {name: k, status: "failed"})
push(results.errors, `${k}: result mismatch opt=${describe(opt_result)} noopt=${describe(noopt_result)}`)
results.failed = results.failed + 1
} else {
push(results.tests, {name: k, status: "passed"})
results.passed = results.passed + 1
}
}
i = i + 1
}
} else {
// Compare direct return values
if (!values_equal(mod_opt, mod_noopt)) {
push(results.tests, {name: "<return>", status: "failed"})
push(results.errors, `return value mismatch: opt=${describe(mod_opt)} noopt=${describe(mod_noopt)}`)
results.failed = results.failed + 1
} else {
push(results.tests, {name: "<return>", status: "passed"})
results.passed = results.passed + 1
}
}
return results
}
// Main
var test_files = collect_tests(target_test)
log.console(`Differential testing: ${text(length(test_files))} file(s)`)
var total_passed = 0
var total_failed = 0
var i = 0
var result = null
var j = 0
while (i < length(test_files)) {
result = diff_test_file(test_files[i])
log.console(` ${result.file}: ${text(result.passed)} passed, ${text(result.failed)} failed`)
j = 0
while (j < length(result.errors)) {
log.console(` MISMATCH: ${result.errors[j]}`)
j = j + 1
}
total_passed = total_passed + result.passed
total_failed = total_failed + result.failed
i = i + 1
}
log.console(`----------------------------------------`)
log.console(`Diff: ${text(total_passed)} passed, ${text(total_failed)} failed, ${text(total_passed + total_failed)} total`)
if (total_failed > 0) {
log.console(`DIFFERENTIAL FAILURES DETECTED`)
}
$stop()

View File

@@ -1,9 +0,0 @@
nav:
- index.md
- cellscript.md
- actors.md
- packages.md
- cli.md
- c-modules.md
- Standard Library: library

92
docs/_index.md Normal file
View File

@@ -0,0 +1,92 @@
---
title: "Documentation"
description: "ƿit language documentation"
type: "docs"
---
![image](/images/wizard.png)
ƿit is an actor-based scripting language for building concurrent applications. It combines a familiar C-like syntax with the actor model of computation, optimized for low memory usage and simplicity.
## Key Features
- **Actor Model** — isolated memory, message passing, no shared state
- **Immutability** — `stone()` makes values permanently frozen
- **Prototype Inheritance** — objects without classes
- **C Integration** — seamlessly extend with native code
- **Cross-Platform** — deploy to desktop, web, and embedded
## Quick Start
```javascript
// hello.ce - A simple actor
print("Hello, ƿit!")
$stop()
```
```bash
pit hello
```
## Language
- [**ƿit Language**](/docs/language/) — syntax, types, and operators
- [**Actors and Modules**](/docs/actors/) — the execution model
- [**Requestors**](/docs/requestors/) — asynchronous composition
- [**Packages**](/docs/packages/) — code organization and sharing
- [**Shop Architecture**](/docs/shop/) — module resolution, compilation, and caching
## Reference
- [**Built-in Functions**](/docs/functions/) — intrinsics reference
- [text](/docs/library/text/) — text conversion and manipulation
- [number](/docs/library/number/) — numeric conversion and operations
- [array](/docs/library/array/) — array creation and manipulation
- [object](/docs/library/object/) — object creation, prototypes, and serialization
## Standard Library
Modules loaded with `use()`:
- [blob](/docs/library/blob/) — binary data
- [time](/docs/library/time/) — time and dates
- [math](/docs/library/math/) — trigonometry and math
- [json](/docs/library/json/) — JSON encoding/decoding
- [random](/docs/library/random/) — random numbers
## Tools
- [**Command Line**](/docs/cli/) — the `pit` tool
- [**Testing**](/docs/testing/) — writing and running tests
- [**Writing C Modules**](/docs/c-modules/) — native extensions
## Architecture
ƿit programs are organized into **packages**. Each package contains:
- **Modules** (`.cm`) — return a value, cached and frozen
- **Actors** (`.ce`) — run independently, communicate via messages
- **C files** (`.c`) — compiled to native libraries
Actors never share memory. They communicate by sending messages, which are automatically serialized. This makes concurrent programming safe and predictable.
## Installation
```bash
# Clone and bootstrap
git clone https://gitea.pockle.world/john/cell
cd cell
make bootstrap
```
The ƿit shop is stored at `~/.pit/`.
## Development
After making changes, recompile with:
```bash
make
```
Run `cell --help` to see all available CLI flags.

View File

@@ -1,10 +1,15 @@
# Actors and Modules
---
title: "Actors and Modules"
description: "The ƿit execution model"
weight: 20
type: "docs"
---
Cell organizes code into two types of scripts: **modules** (`.cm`) and **actors** (`.ce`).
ƿit organizes code into two types of scripts: **modules** (`.cm`) and **actors** (`.ce`).
## The Actor Model
Cell is built on the actor model of computation. Each actor:
ƿit is built on the actor model of computation. Each actor:
- Has its own **isolated memory** — actors never share state
- Runs to completion each **turn** — no preemption
@@ -21,13 +26,13 @@ A module is a script that **returns a value**. The returned value is cached and
// math_utils.cm
var math = use('math/radians')
function distance(x1, y1, x2, y2) {
var distance = function(x1, y1, x2, y2) {
var dx = x2 - x1
var dy = y2 - y1
return math.sqrt(dx * dx + dy * dy)
}
function midpoint(x1, y1, x2, y2) {
var midpoint = function(x1, y1, x2, y2) {
return {
x: (x1 + x2) / 2,
y: (y1 + y2) / 2
@@ -60,12 +65,12 @@ An actor is a script that **does not return a value**. It runs as an independent
```javascript
// worker.ce
log.console("Worker started")
print("Worker started")
$on_message = function(msg) {
log.console("Received:", msg)
$receiver(function(msg, reply) {
print("Received:", msg)
// Process message...
}
})
```
**Key properties:**
@@ -83,7 +88,7 @@ Actors have access to special functions prefixed with `$`:
Reference to the current actor.
```javascript
log.console($me) // actor reference
print($me) // actor reference
```
### $stop()
@@ -100,7 +105,7 @@ Send a message to another actor.
```javascript
$send(other_actor, {type: "ping", data: 42}, function(reply) {
log.console("Got reply:", reply)
print("Got reply:", reply)
})
```
@@ -112,7 +117,7 @@ Start a new actor from a script.
```javascript
$start(function(new_actor) {
log.console("Started:", new_actor)
print("Started:", new_actor)
}, "worker")
```
@@ -122,7 +127,7 @@ Schedule a callback after a delay.
```javascript
$delay(function() {
log.console("5 seconds later")
print("5 seconds later")
}, 5)
```
@@ -169,19 +174,47 @@ $contact(function(connection) {
### $time_limit(requestor, seconds)
Wrap a requestor with a timeout.
Wrap a requestor with a timeout. See [Requestors](/docs/requestors/) for details.
```javascript
$time_limit(my_requestor, 10) // 10 second timeout
```
### $couple(actor)
Couple the current actor to another actor. When the coupled actor dies, the current actor also dies. Coupling is automatic between an actor and its overling (parent).
```javascript
$couple(other_actor)
```
### $unneeded(callback, seconds)
Schedule the actor for removal after a specified time.
```javascript
$unneeded(function() {
// cleanup before removal
}, 30)
```
### $connection(callback, actor, config)
Get information about the connection to another actor, such as latency, bandwidth, and activity.
```javascript
$connection(function(info) {
print(info.latency)
}, other_actor, {})
```
## Module Resolution
When you call `use('name')`, Cell searches:
When you call `use('name')`, ƿit searches:
1. **Current package** — files relative to package root
2. **Dependencies** — packages declared in `cell.toml`
3. **Core** — built-in Cell modules
2. **Dependencies** — packages declared in `pit.toml`
3. **Core** — built-in ƿit modules
```javascript
// From within package 'myapp':
@@ -191,7 +224,7 @@ use('json') // core json module
use('otherlib/foo') // dependency 'otherlib', file foo.cm
```
Files starting with underscore (`_helper.cm`) are private to the package.
Files in the `internal/` directory are private to the package.
## Example: Simple Actor System
@@ -199,14 +232,14 @@ Files starting with underscore (`_helper.cm`) are private to the package.
// main.ce - Entry point
var config = use('config')
log.console("Starting application...")
print("Starting application...")
$start(function(worker) {
$send(worker, {task: "process", data: [1, 2, 3]})
}, "worker")
$delay(function() {
log.console("Shutting down")
print("Shutting down")
$stop()
}, 10)
```

View File

@@ -1,6 +1,11 @@
# Writing C Modules
---
title: "Writing C Modules"
description: "Extending ƿit with native code"
weight: 50
type: "docs"
---
Cell makes it easy to extend functionality with C code. C files in a package are compiled into a dynamic library and can be imported like any other module.
ƿit makes it easy to extend functionality with C code. C files in a package are compiled into a dynamic library and can be imported like any other module.
## Basic Structure
@@ -45,12 +50,17 @@ Where:
- `<filename>` is the C file name without extension
Examples:
- `mypackage/math.c` `js_mypackage_math_use`
- `gitea.pockle.world/john/lib/render.c` `js_gitea_pockle_world_john_lib_render_use`
- `mypackage/math.c` -> `js_mypackage_math_use`
- `gitea.pockle.world/john/lib/render.c` -> `js_gitea_pockle_world_john_lib_render_use`
- `mypackage/game.ce` (AOT actor) -> `js_mypackage_game_program`
Actor files (`.ce`) use the `_program` suffix instead of `_use`.
**Note:** Having both a `.cm` and `.c` file with the same stem at the same scope is a build error.
## Required Headers
Include `cell.h` for all Cell integration:
Include `cell.h` for all ƿit integration:
```c
#include "cell.h"
@@ -63,7 +73,7 @@ This provides:
## Conversion Functions
### JavaScript C
### JavaScript <-> C
```c
// Numbers
@@ -201,7 +211,7 @@ static const JSCFunctionListEntry js_funcs[] = {
CELL_USE_FUNCS(js_funcs)
```
Usage in Cell:
Usage in ƿit:
```javascript
var vector = use('vector')
@@ -211,44 +221,16 @@ var n = vector.normalize(3, 4) // {x: 0.6, y: 0.8}
var d = vector.dot(1, 0, 0, 1) // 0
```
## Combining C and Cell
A common pattern is to have a C file provide low-level functions and a `.cm` file provide a higher-level API:
```c
// _vector_native.c
// ... raw C functions ...
```
```javascript
// vector.cm
var native = this // C module passed as 'this'
function Vector(x, y) {
return {x: x, y: y}
}
Vector.length = function(v) {
return native.length(v.x, v.y)
}
Vector.normalize = function(v) {
return native.normalize(v.x, v.y)
}
return Vector
```
## Build Process
C files are automatically compiled when you run:
```bash
cell build
cell update
pit build
pit update
```
The resulting dynamic library is placed in `~/.cell/lib/`.
Each C file is compiled into a per-file dynamic library at `~/.pit/lib/<pkg>/<stem>.dylib`.
## Platform-Specific Code
@@ -260,7 +242,7 @@ audio_playdate.c # Playdate
audio_emscripten.c # Web/Emscripten
```
Cell selects the appropriate file based on the target platform.
ƿit selects the appropriate file based on the target platform.
## Static Declarations

View File

@@ -1,288 +0,0 @@
# Cell Language
Cell is a scripting language for actor-based programming. It combines a familiar syntax with a prototype-based object system and strict immutability semantics.
## Basics
### Variables and Constants
```javascript
var x = 10 // mutable variable (block-scoped like let)
def PI = 3.14159 // constant (cannot be reassigned)
```
### Data Types
Cell has six fundamental types:
- **number** — DEC64 decimal floating point (no rounding errors)
- **text** — Unicode strings
- **logical** — `true` or `false`
- **null** — the absence of a value (no `undefined`)
- **array** — ordered, numerically-indexed sequences
- **object** — key-value records with prototype inheritance
- **blob** — binary data (bits, not bytes)
- **function** — first-class callable values
### Literals
```javascript
// Numbers
42
3.14
1_000_000 // underscores for readability
// Text
"hello"
'world'
`template ${x}` // string interpolation
// Logical
true
false
// Null
null
// Arrays
[1, 2, 3]
["a", "b", "c"]
// Objects
{name: "cell", version: 1}
{x: 10, y: 20}
```
### Operators
```javascript
// Arithmetic
+ - * / %
** // exponentiation
// Comparison (always strict)
== // equals (like === in JS)
!= // not equals (like !== in JS)
< > <= >=
// Logical
&& || !
// Assignment
= += -= *= /=
```
### Control Flow
```javascript
// Conditionals
if (x > 0) {
log.console("positive")
} else if (x < 0) {
log.console("negative")
} else {
log.console("zero")
}
// Ternary
var sign = x > 0 ? 1 : -1
// Loops
for (var i = 0; i < 10; i++) {
log.console(i)
}
for (var item of items) {
log.console(item)
}
for (var key in obj) {
log.console(key, obj[key])
}
while (condition) {
// body
}
// Control
break
continue
return value
throw "error message"
```
### Functions
```javascript
// Named function
function add(a, b) {
return a + b
}
// Anonymous function
var multiply = function(a, b) {
return a * b
}
// Arrow function
var square = x => x * x
var sum = (a, b) => a + b
// Rest parameters
function log_all(...args) {
for (var arg of args) log.console(arg)
}
// Default parameters
function greet(name, greeting = "Hello") {
return `${greeting}, ${name}!`
}
```
All closures capture `this` (like arrow functions in JavaScript).
## Arrays
Arrays are **distinct from objects**. They are ordered, numerically-indexed sequences. You cannot add arbitrary string keys to an array.
```javascript
var arr = [1, 2, 3]
arr[0] // 1
arr[2] = 10 // [1, 2, 10]
length(arr) // 3
// Array spread
var more = [...arr, 4, 5] // [1, 2, 10, 4, 5]
```
## Objects
Objects are key-value records with prototype-based inheritance.
```javascript
var point = {x: 10, y: 20}
point.x // 10
point["y"] // 20
// Object spread
var point3d = {...point, z: 30}
// Prototype inheritance
var colored_point = {__proto__: point, color: "red"}
colored_point.x // 10 (inherited)
```
### Prototypes
```javascript
// Create object with prototype
var child = meme(parent)
// Get prototype
var p = proto(child)
// Check prototype chain
isa(child, parent) // true
```
## Immutability with Stone
The `stone()` function makes values permanently immutable.
```javascript
var config = stone({
debug: true,
maxRetries: 3
})
config.debug = false // Error! Stone objects cannot be modified
```
Stone is **deep** — all nested objects and arrays are also frozen. This cannot be reversed.
```javascript
stone.p(value) // returns true if value is stone
```
## Built-in Functions
### length(value)
Returns the length of arrays (elements), text (codepoints), blobs (bits), or functions (arity).
```javascript
length([1, 2, 3]) // 3
length("hello") // 5
length(function(a,b){}) // 2
```
### use(path)
Import a module. Returns the cached, stone value.
```javascript
var math = use('math/radians')
var json = use('json')
```
### isa(value, type)
Check type or prototype chain.
```javascript
is_number(42) // true
is_text("hi") // true
is_array([1,2]) // true
is_object({}) // true
isa(child, parent) // true if parent is in prototype chain
```
### reverse(array)
Returns a new array with elements in reverse order.
```javascript
reverse([1, 2, 3]) // [3, 2, 1]
```
### logical(value)
Convert to boolean.
```javascript
logical(0) // false
logical(1) // true
logical("true") // true
logical("false") // false
logical(null) // false
```
## Logging
```javascript
log.console("message") // standard output
log.error("problem") // error output
```
## Pattern Matching
Cell supports regex patterns in string functions, but not standalone regex objects.
```javascript
text.search("hello world", /world/)
replace("hello", /l/g, "L")
```
## Error Handling
```javascript
try {
riskyOperation()
} catch (e) {
log.error(e)
}
throw "something went wrong"
```
If an actor has an uncaught error, it crashes.

View File

@@ -1,138 +1,232 @@
# Command Line Interface
---
title: "Command Line Interface"
description: "The pit tool"
weight: 40
type: "docs"
---
Cell provides a command-line interface for managing packages, running scripts, and building applications.
ƿit provides a command-line interface for managing packages, running scripts, and building applications.
## Basic Usage
```bash
cell <command> [arguments]
pit <command> [arguments]
```
## Commands
### cell version
### pit version
Display the Cell version.
Display the ƿit version.
```bash
cell version
pit version
# 0.1.0
```
### cell install
### pit install
Install a package to the shop.
```bash
cell install gitea.pockle.world/john/prosperon
cell install /Users/john/local/mypackage # local path
pit install gitea.pockle.world/john/prosperon
pit install /Users/john/local/mypackage # local path
```
### cell update
### pit update
Update packages from remote sources.
```bash
cell update # update all packages
cell update <package> # update specific package
pit update # update all packages
pit update <package> # update specific package
```
### cell remove
### pit remove
Remove a package from the shop.
```bash
cell remove gitea.pockle.world/john/oldpackage
pit remove gitea.pockle.world/john/oldpackage
```
### cell list
### pit list
List installed packages.
```bash
cell list # list all installed packages
cell list <package> # list dependencies of a package
pit list # list all installed packages
pit list <package> # list dependencies of a package
```
### cell ls
### pit ls
List modules and actors in a package.
```bash
cell ls # list files in current project
cell ls <package> # list files in specified package
pit ls # list files in current project
pit ls <package> # list files in specified package
```
### cell build
### pit build
Build the current package.
Build the current package. Compiles C files into per-file dynamic libraries and installs them to `~/.pit/lib/<pkg>/<stem>.dylib`.
```bash
cell build
pit build # build current package
pit build <package> # build specific package
```
### cell test
### pit test
Run tests.
Run tests. See [Testing](/docs/testing/) for the full guide.
```bash
cell test # run tests in current package
cell test all # run all tests
cell test <package> # run tests in specific package
pit test # run tests in current package
pit test all # run all tests
pit test <package> # run tests in specific package
pit test suite --verify --diff # with IR verification and differential testing
```
### cell link
### pit link
Manage local package links for development.
```bash
cell link add <canonical> <local_path> # link a package
cell link list # show all links
cell link delete <canonical> # remove a link
cell link clear # remove all links
pit link add <canonical> <local_path> # link a package
pit link list # show all links
pit link delete <canonical> # remove a link
pit link clear # remove all links
```
### cell fetch
### pit fetch
Fetch package sources without extracting.
```bash
cell fetch <package>
pit fetch <package>
```
### cell upgrade
### pit upgrade
Upgrade the Cell installation itself.
Upgrade the ƿit installation itself.
```bash
cell upgrade
pit upgrade
```
### cell clean
### pit clean
Clean build artifacts.
```bash
cell clean
pit clean
```
### cell help
### pit add
Add a dependency to the current package. Updates `cell.toml` and installs the package to the shop.
```bash
pit add gitea.pockle.world/john/prosperon # default alias
pit add gitea.pockle.world/john/prosperon myalias # custom alias
```
### pit clone
Clone a package to a local path and link it for development.
```bash
pit clone gitea.pockle.world/john/prosperon ./prosperon
```
### pit unlink
Remove a link created by `pit link` or `pit clone` and restore the original package.
```bash
pit unlink gitea.pockle.world/john/prosperon
```
### pit search
Search for packages, actors, or modules matching a query.
```bash
pit search math
```
### pit why
Show which installed packages depend on a given package (reverse dependency lookup).
```bash
pit why gitea.pockle.world/john/prosperon
```
### pit resolve
Print the fully resolved dependency closure for a package.
```bash
pit resolve # resolve current package
pit resolve <package> # resolve specific package
pit resolve --locked # show lock state without links
```
### pit graph
Emit a dependency graph.
```bash
pit graph # tree of current package
pit graph --format dot # graphviz dot output
pit graph --format json # json output
pit graph --world # graph all installed packages
pit graph --locked # show lock view without links
```
### pit verify
Verify integrity and consistency of packages, links, and builds.
```bash
pit verify # verify current package
pit verify shop # verify entire shop
pit verify --deep # traverse full dependency closure
pit verify --target <triple>
```
### pit pack
Build a statically linked binary from a package and all its dependencies.
```bash
pit pack <package> # build static binary (output: app)
pit pack <package> -o myapp # specify output name
pit pack <package> -t <triple> # cross-compile for target
```
### pit config
Manage system and actor configuration values in `cell.toml`.
```bash
pit config list # list all config
pit config get system.ar_timer # get a value
pit config set system.ar_timer 5.0 # set a value
pit config actor <name> list # list actor config
pit config actor <name> get <key> # get actor config
pit config actor <name> set <key> <val> # set actor config
```
### pit help
Display help information.
```bash
cell help
cell help <command>
```
## Running Scripts
Any `.ce` file in the Cell core can be run as a command:
```bash
cell version # runs version.ce
cell build # runs build.ce
cell test # runs test.ce
pit help
pit help <command>
```
## Package Locators
@@ -143,19 +237,21 @@ Packages are identified by locators:
- **Local**: `/absolute/path/to/package`
```bash
cell install gitea.pockle.world/john/prosperon
cell install /Users/john/work/mylib
pit install gitea.pockle.world/john/prosperon
pit install /Users/john/work/mylib
```
## Configuration
Cell stores its data in `~/.cell/`:
ƿit stores its data in `~/.pit/`:
```
~/.cell/
├── packages/ # installed packages
├── lib/ # compiled dynamic libraries
├── build/ # build cache
~/.pit/
├── packages/ # installed package sources
├── lib/ # installed per-file dylibs and mach (persistent)
│ ├── core/ # core package: .dylib and .mach files
│ └── <pkg>/ # per-package subdirectories
├── build/ # ephemeral build cache (safe to delete)
├── cache/ # downloaded archives
├── lock.toml # installed package versions
└── link.toml # local development links
@@ -163,7 +259,7 @@ Cell stores its data in `~/.cell/`:
## Environment
Cell reads the `HOME` environment variable to locate the shop directory.
ƿit reads the `HOME` environment variable to locate the shop directory.
## Exit Codes

264
docs/compiler-tools.md Normal file
View File

@@ -0,0 +1,264 @@
---
title: "Compiler Inspection Tools"
description: "Tools for inspecting and debugging the compiler pipeline"
weight: 50
type: "docs"
---
ƿit includes a set of tools for inspecting the compiler pipeline at every stage. These are useful for debugging, testing optimizations, and understanding what the compiler does with your code.
## Pipeline Overview
The compiler runs in stages:
```
source → tokenize → parse → fold → mcode → streamline → output
```
Each stage has a corresponding dump tool that lets you see its output.
| Stage | Tool | What it shows |
|-------------|-------------------|----------------------------------------|
| fold | `dump_ast.cm` | Folded AST as JSON |
| mcode | `dump_mcode.cm` | Raw mcode IR before optimization |
| streamline | `dump_stream.cm` | Before/after instruction counts + IR |
| streamline | `dump_types.cm` | Optimized IR with type annotations |
| streamline | `streamline.ce` | Full optimized IR as JSON |
| all | `ir_report.ce` | Structured optimizer flight recorder |
All tools take a source file as input and run the pipeline up to the relevant stage.
## Quick Start
```bash
# see raw mcode IR
./cell --core . dump_mcode.cm myfile.ce
# see what the optimizer changed
./cell --core . dump_stream.cm myfile.ce
# full optimizer report with events
./cell --core . ir_report.ce --full myfile.ce
```
## dump_ast.cm
Prints the folded AST as JSON. This is the output of the parser and constant folder, before mcode generation.
```bash
./cell --core . dump_ast.cm <file.ce|file.cm>
```
## dump_mcode.cm
Prints the raw mcode IR before any optimization. Shows the instruction array as formatted text with opcode, operands, and program counter.
```bash
./cell --core . dump_mcode.cm <file.ce|file.cm>
```
## dump_stream.cm
Shows a before/after comparison of the optimizer. For each function, prints:
- Instruction count before and after
- Number of eliminated instructions
- The streamlined IR (nops hidden by default)
```bash
./cell --core . dump_stream.cm <file.ce|file.cm>
```
## dump_types.cm
Shows the optimized IR with type annotations. Each instruction is followed by the known types of its slot operands, inferred by walking the instruction stream.
```bash
./cell --core . dump_types.cm <file.ce|file.cm>
```
## streamline.ce
Runs the full pipeline (tokenize, parse, fold, mcode, streamline) and outputs the optimized IR as JSON. Useful for piping to `jq` or saving for comparison.
```bash
./cell --core . streamline.ce <file.ce|file.cm>
```
## ir_report.ce
The optimizer flight recorder. Runs the full pipeline with structured logging and outputs machine-readable, diff-friendly JSON. This is the most detailed tool for understanding what the optimizer did and why.
```bash
./cell --core . ir_report.ce [options] <file.ce|file.cm>
```
### Options
| Flag | Description |
|------|-------------|
| `--summary` | Per-pass JSON summaries with instruction counts and timing (default) |
| `--events` | Include rewrite events showing each optimization applied |
| `--types` | Include type delta records showing inferred slot types |
| `--ir-before=PASS` | Print canonical IR before a specific pass |
| `--ir-after=PASS` | Print canonical IR after a specific pass |
| `--ir-all` | Print canonical IR before and after all passes |
| `--full` | Everything: summary + events + types + ir-all |
With no flags, `--summary` is the default.
### Output Format
Output is line-delimited JSON. Each line is a self-contained JSON object with a `type` field:
**`type: "pass"`** — Per-pass summary with categorized instruction counts before and after:
```json
{
"type": "pass",
"pass": "eliminate_type_checks",
"fn": "fib",
"ms": 0.12,
"changed": true,
"before": {"instr": 77, "nop": 0, "guard": 16, "branch": 28, ...},
"after": {"instr": 77, "nop": 1, "guard": 15, "branch": 28, ...},
"changes": {"guards_removed": 1, "nops_added": 1}
}
```
**`type: "event"`** — Individual rewrite event with before/after instructions and reasoning:
```json
{
"type": "event",
"pass": "eliminate_type_checks",
"rule": "incompatible_type_forces_jump",
"at": 3,
"before": [["is_int", 5, 2, 4, 9], ["jump_false", 5, "rel_ni_2", 4, 9]],
"after": ["_nop_tc_1", ["jump", "rel_ni_2", 4, 9]],
"why": {"slot": 2, "known_type": "float", "checked_type": "int"}
}
```
**`type: "types"`** — Inferred type information for a function:
```json
{
"type": "types",
"fn": "fib",
"param_types": {},
"slot_types": {"25": "null"}
}
```
**`type: "ir"`** — Canonical IR text for a function at a specific point:
```json
{
"type": "ir",
"when": "before",
"pass": "all",
"fn": "fib",
"text": "fn fib (args=1, slots=26)\n @0 access s2, 2\n ..."
}
```
### Rewrite Rules
Each pass records events with named rules:
**eliminate_type_checks:**
- `known_type_eliminates_guard` — type already known, guard removed
- `incompatible_type_forces_jump` — type conflicts, conditional jump becomes unconditional
- `num_subsumes_int_float` — num check satisfied by int or float
- `dynamic_to_field` — load_dynamic/store_dynamic narrowed to field access
- `dynamic_to_index` — load_dynamic/store_dynamic narrowed to index access
**simplify_algebra:**
- `add_zero`, `sub_zero`, `mul_one`, `div_one` — identity operations become moves
- `mul_zero` — multiplication by zero becomes constant
- `self_eq`, `self_ne` — same-slot comparisons become constants
**simplify_booleans:**
- `not_jump_false_fusion` — not + jump_false fused into jump_true
- `not_jump_true_fusion` — not + jump_true fused into jump_false
- `double_not` — not + not collapsed to move
**eliminate_moves:**
- `self_move` — move to same slot becomes nop
**eliminate_dead_jumps:**
- `jump_to_next` — jump to immediately following label becomes nop
### Canonical IR Format
The `--ir-all`, `--ir-before`, and `--ir-after` flags produce a deterministic text representation of the IR:
```
fn fib (args=1, slots=26)
@0 access s2, 2
@1 is_int s4, s1 ; [guard]
@2 jump_false s4, "rel_ni_2" ; [branch]
@3 --- nop (tc) ---
@4 jump "rel_ni_2" ; [branch]
@5 lt_int s3, s1, s2
@6 jump "rel_done_4" ; [branch]
rel_ni_2:
@8 is_num s4, s1 ; [guard]
```
Properties:
- `@N` is the raw array index, stable across passes (passes replace, never insert or delete)
- `sN` prefix distinguishes slot operands from literal values
- String operands are quoted
- Labels appear as indented headers with a colon
- Category tags in brackets: `[guard]`, `[branch]`, `[load]`, `[store]`, `[call]`, `[arith]`, `[move]`, `[const]`
- Nops shown as `--- nop (reason) ---` with reason codes: `tc` (type check), `bl` (boolean), `mv` (move), `dj` (dead jump), `ur` (unreachable)
### Examples
```bash
# what passes changed something?
./cell --core . ir_report.ce --summary myfile.ce | jq 'select(.changed)'
# list all rewrite rules that fired
./cell --core . ir_report.ce --events myfile.ce | jq 'select(.type == "event") | .rule'
# diff IR before and after optimization
./cell --core . ir_report.ce --ir-all myfile.ce | jq -r 'select(.type == "ir") | .text'
# full report for analysis
./cell --core . ir_report.ce --full myfile.ce > report.json
```
## ir_stats.cm
A utility module used by `ir_report.ce` and available for custom tooling. Not a standalone tool.
```javascript
var ir_stats = use("ir_stats")
ir_stats.detailed_stats(func) // categorized instruction counts
ir_stats.ir_fingerprint(func) // djb2 hash of instruction array
ir_stats.canonical_ir(func, name, opts) // deterministic text representation
ir_stats.type_snapshot(slot_types) // frozen copy of type map
ir_stats.type_delta(before_types, after_types) // compute type changes
ir_stats.category_tag(op) // classify an opcode
```
### Instruction Categories
`detailed_stats` classifies each instruction into one of these categories:
| Category | Opcodes |
|----------|---------|
| load | `load_field`, `load_index`, `load_dynamic`, `get`, `access` (non-constant) |
| store | `store_field`, `store_index`, `store_dynamic`, `set_var`, `put`, `push` |
| branch | `jump`, `jump_true`, `jump_false`, `jump_not_null` |
| call | `invoke`, `goinvoke` |
| guard | `is_int`, `is_text`, `is_num`, `is_bool`, `is_null`, `is_array`, `is_func`, `is_record`, `is_stone` |
| arith | `add_int`, `sub_int`, ..., `add_float`, ..., `concat`, `neg_int`, `neg_float`, bitwise ops |
| move | `move` |
| const | `int`, `true`, `false`, `null`, `access` (with constant value) |
| label | string entries that are not nops |
| nop | strings starting with `_nop_` |
| other | everything else (`frame`, `setarg`, `array`, `record`, `function`, `return`, etc.) |

File diff suppressed because it is too large Load Diff

View File

@@ -1,66 +0,0 @@
# Cell
![image](wizard.png)
Cell is an actor-based scripting language for building concurrent applications. It combines a familiar C-like syntax with the actor model of computation, optimized for low memory usage and simplicity.
## Key Features
- **Actor Model** — isolated memory, message passing, no shared state
- **Immutability** — `stone()` makes values permanently frozen
- **Prototype Inheritance** — objects without classes
- **C Integration** — seamlessly extend with native code
- **Cross-Platform** — deploy to desktop, web, and embedded
## Quick Start
```javascript
// hello.ce - A simple actor
log.console("Hello, Cell!")
$stop()
```
```bash
cell hello
```
## Documentation
- [**Cell Language**](cellscript.md) — syntax, types, and built-in functions
- [**Actors and Modules**](actors.md) — the execution model
- [**Packages**](packages.md) — code organization and sharing
- [**Command Line**](cli.md) — the `cell` tool
- [**Writing C Modules**](c-modules.md) — native extensions
## Standard Library
- [text](library/text.md) — string manipulation
- [number](library/number.md) — numeric operations (functions are global: `floor()`, `max()`, etc.)
- [array](library/array.md) — array utilities
- [object](library/object.md) — object utilities
- [blob](library/blob.md) — binary data
- [time](library/time.md) — time and dates
- [math](library/math.md) — trigonometry and math
- [json](library/json.md) — JSON encoding/decoding
- [random](library/random.md) — random numbers
## Architecture
Cell programs are organized into **packages**. Each package contains:
- **Modules** (`.cm`) — return a value, cached and frozen
- **Actors** (`.ce`) — run independently, communicate via messages
- **C files** (`.c`) — compiled to native libraries
Actors never share memory. They communicate by sending messages, which are automatically serialized. This makes concurrent programming safe and predictable.
## Installation
```bash
# Clone and bootstrap
git clone https://gitea.pockle.world/john/cell
cd cell
make bootstrap
```
The Cell shop is stored at `~/.cell/`.

94
docs/kim.md Normal file
View File

@@ -0,0 +1,94 @@
---
title: "Kim Encoding"
description: "Compact character and count encoding"
weight: 80
type: "docs"
---
Kim is a character and count encoding designed by Douglas Crockford. It encodes Unicode characters and variable-length integers using continuation bytes. Kim is simpler and more compact than UTF-8 for most text.
## Continuation Bytes
The fundamental idea in Kim is the continuation byte:
```
C D D D D D D D
```
- **C** — continue bit. If 1, read another byte. If 0, this is the last byte.
- **D** (7 bits) — data bits.
To decode: shift the accumulator left by 7 bits, add the 7 data bits. If the continue bit is 1, repeat with the next byte. If 0, the value is complete.
To encode: take the value, emit 7 bits at a time from most significant to least significant, setting the continue bit on all bytes except the last.
## Character Encoding
Kim encodes Unicode codepoints directly as continuation byte sequences:
| Range | Bytes | Characters |
|-------|-------|------------|
| U+0000 to U+007F | 1 | ASCII |
| U+0080 to U+3FFF | 2 | First quarter of BMP |
| U+4000 to U+10FFFF | 3 | All other Unicode |
Unlike UTF-8, there is no need for surrogate pairs or escapement. Every Unicode character, including emoji and characters from extended planes, is encoded in at most 3 bytes.
### Examples
```
'A' (U+0041) → 41
'é' (U+00E9) → 81 69
'💩' (U+1F4A9) → 87 E9 29
```
## Count Encoding
Kim is also used for encoding counts (lengths, sizes). The same continuation byte format represents non-negative integers of arbitrary size:
| Range | Bytes |
|-------|-------|
| 0 to 127 | 1 |
| 128 to 16383 | 2 |
| 16384 to 2097151 | 3 |
## Comparison with UTF-8
| Property | Kim | UTF-8 |
|----------|-----|-------|
| ASCII | 1 byte | 1 byte |
| BMP (first quarter) | 2 bytes | 2-3 bytes |
| Full Unicode | 3 bytes | 3-4 bytes |
| Self-synchronizing | No | Yes |
| Sortable | No | Yes |
| Simpler to implement | Yes | No |
| Byte count for counts | Variable (7 bits/byte) | Not applicable |
Kim trades self-synchronization (the ability to find character boundaries from any position) for simplicity and compactness. In practice, Kim text is accessed sequentially, so self-synchronization is not needed.
## Usage in ƿit
Kim is used internally by blobs and by the Nota message format.
### In Blobs
The `blob.write_text` and `blob.read_text` functions use Kim to encode text into binary data:
```javascript
var blob = use('blob')
var b = blob.make()
blob.write_text(b, "hello") // Kim-encoded length + characters
stone(b)
var text = blob.read_text(b, 0) // "hello"
```
### In Nota
Nota uses Kim for two purposes:
1. **Counts** — array lengths, text lengths, blob sizes, record pair counts
2. **Characters** — text content within Nota messages
The preamble byte of each Nota value incorporates the first few bits of a Kim-encoded count, with the continue bit indicating whether more bytes follow.
See [Nota Format](#nota) for the full specification.

649
docs/language.md Normal file
View File

@@ -0,0 +1,649 @@
---
title: "ƿit Language"
description: "Syntax, types, operators, and built-in functions"
weight: 10
type: "docs"
---
ƿit is a scripting language for actor-based programming. It combines a familiar syntax with a prototype-based object system and strict immutability semantics.
## Basics
### Variables and Constants
Variables are declared with `var`, constants with `def`. All declarations must be initialized and must appear at the function body level — not inside `if`, `while`, `for`, or `do` blocks.
```javascript
var x = 10
var name = "pit"
var empty = null
def PI = 3.14159 // constant, cannot be reassigned
var a = 1, b = 2, c = 3 // multiple declarations
```
### Data Types
ƿit has eight fundamental types:
- **number** — DEC64 decimal floating point (no rounding errors)
- **text** — Unicode strings
- **logical** — `true` or `false`
- **null** — the absence of a value (no `undefined`)
- **array** — ordered, numerically-indexed sequences
- **object** — key-value records with prototype inheritance
- **blob** — binary data (bits, not bytes)
- **function** — first-class callable values
### Literals
```javascript
// Numbers
42
3.14
-5
0
1e3 // scientific notation (1000)
// Text
"hello"
`template ${x}` // string interpolation
`${1 + 2}` // expression interpolation
// Logical
true
false
// Null
null
// Arrays
[1, 2, 3]
[]
// Objects
{a: 1, b: "two"}
{}
// Regex
/\d+/
/hello/i // with flags
```
## Operators
### Arithmetic
```javascript
2 + 3 // 5
5 - 3 // 2
3 * 4 // 12
12 / 4 // 3
10 % 3 // 1
2 ** 3 // 8 (exponentiation)
```
### Comparison
All comparisons are strict — there is no type coercion.
```javascript
5 == 5 // true
5 != 6 // true
3 < 5 // true
5 > 3 // true
3 <= 3 // true
5 >= 5 // true
```
### Logical
```javascript
true && true // true
true && false // false
false || true // true
false || false // false
!true // false
!false // true
```
Logical operators short-circuit:
```javascript
var called = false
var fn = function() { called = true; return true }
var r = false && fn() // fn() not called
r = true || fn() // fn() not called
```
### Bitwise
```javascript
5 & 3 // 1 (AND)
5 | 3 // 7 (OR)
5 ^ 3 // 6 (XOR)
~0 // -1 (NOT)
1 << 3 // 8 (left shift)
8 >> 3 // 1 (right shift)
-1 >>> 1 // 2147483647 (unsigned right shift)
```
### Unary
```javascript
+5 // 5
-5 // -5
-(-5) // 5
```
### Increment and Decrement
```javascript
var x = 5
x++ // returns 5, x becomes 6 (postfix)
++x // returns 7, x becomes 7 (prefix)
x-- // returns 7, x becomes 6 (postfix)
--x // returns 5, x becomes 5 (prefix)
```
### Compound Assignment
```javascript
var x = 10
x += 3 // 13
x -= 3 // 10
x *= 2 // 20
x /= 4 // 5
x %= 3 // 2
```
### Ternary
```javascript
var a = true ? 1 : 2 // 1
var b = false ? 1 : 2 // 2
var c = true ? (false ? 1 : 2) : 3 // 2 (nested)
```
### Comma
The comma operator evaluates all expressions and returns the last.
```javascript
var x = (1, 2, 3) // 3
```
### In
Test whether a key exists in an object.
```javascript
var o = {a: 1}
"a" in o // true
"b" in o // false
```
### Delete
Remove a key from an object.
```javascript
var o = {a: 1, b: 2}
delete o.a
"a" in o // false
o.b // 2
```
## Property Access
### Dot and Bracket
```javascript
var o = {x: 10}
o.x // 10 (dot read)
o.x = 20 // dot write
o["x"] // 20 (bracket read)
var key = "x"
o[key] // 20 (computed bracket)
o["y"] = 30 // bracket write
```
### Object as Key
Objects can be used as keys in other objects.
```javascript
var k = {}
var o = {}
o[k] = 42
o[k] // 42
o[{}] // null (different object)
k in o // true
delete o[k]
k in o // false
```
### Chained Access
```javascript
var d = {a: {b: [1, {c: 99}]}}
d.a.b[1].c // 99
```
## Arrays
Arrays are **distinct from objects**. They are ordered, numerically-indexed sequences.
```javascript
var arr = [1, 2, 3]
arr[0] // 1
arr[2] = 10 // [1, 2, 10]
length(arr) // 3
```
### Push and Pop
```javascript
var a = [1, 2]
a[] = 3 // push: [1, 2, 3]
length(a) // 3
var v = a[] // pop: v is 3, a is [1, 2]
length(a) // 2
```
## Objects
Objects are key-value records with prototype-based inheritance.
```javascript
var point = {x: 10, y: 20}
point.x // 10
point["y"] // 20
```
### Prototypes
```javascript
// Create object with prototype
var parent = {x: 10}
var child = meme(parent)
child.x // 10 (inherited)
proto(child) // parent
// Override does not mutate parent
child.x = 20
parent.x // 10
```
### Mixins
```javascript
var p = {a: 1}
var m1 = {b: 2}
var m2 = {c: 3}
var child = meme(p, [m1, m2])
child.a // 1 (from prototype)
child.b // 2 (from mixin)
child.c // 3 (from mixin)
```
## Control Flow
### If / Else
```javascript
var x = 0
if (true) x = 1
if (false) x = 2 else x = 3
if (false) x = 4
else if (true) x = 5
else x = 6
```
### While
```javascript
var i = 0
while (i < 5) i++
// break
i = 0
while (true) {
if (i >= 3) break
i++
}
// continue
var sum = 0
i = 0
while (i < 5) {
i++
if (i % 2 == 0) continue
sum += i
}
```
### For
Variables cannot be declared in the for initializer. Declare them at the function body level.
```javascript
var sum = 0
var i = 0
for (i = 0; i < 5; i++) sum += i
// break
sum = 0
i = 0
for (i = 0; i < 10; i++) {
if (i == 5) break
sum += i
}
// continue
sum = 0
i = 0
for (i = 0; i < 5; i++) {
if (i % 2 == 0) continue
sum += i
}
// nested
sum = 0
var j = 0
for (i = 0; i < 3; i++) {
for (j = 0; j < 3; j++) {
sum++
}
}
```
## Functions
### Function Expressions
```javascript
var add = function(a, b) { return a + b }
add(2, 3) // 5
```
### Arrow Functions
```javascript
var double = x => x * 2
double(5) // 10
var sum = (a, b) => a + b
sum(2, 3) // 5
var block = x => {
var y = x * 2
return y + 1
}
block(5) // 11
```
### Return
A function with no `return` returns `null`. An early `return` exits immediately.
```javascript
var fn = function() { var x = 1 }
fn() // null
var fn2 = function() { return 1; return 2 }
fn2() // 1
```
### Arguments
Extra arguments are ignored. Missing arguments are `null`.
```javascript
var fn = function(a, b) { return a + b }
fn(1, 2, 3) // 3 (extra arg ignored)
var fn2 = function(a, b) { return a }
fn2(1) // 1 (b is null)
```
### Immediately Invoked Function Expression
```javascript
var r = (function(x) { return x * 2 })(21) // 42
```
### Closures
Functions capture variables from their enclosing scope.
```javascript
var make = function(x) {
return function(y) { return x + y }
}
var add5 = make(5)
add5(3) // 8
```
Captured variables can be mutated:
```javascript
var counter = function() {
var n = 0
return function() { n = n + 1; return n }
}
var c = counter()
c() // 1
c() // 2
```
### Recursion
```javascript
var fact = function(n) {
if (n <= 1) return 1
return n * fact(n - 1)
}
fact(5) // 120
```
### This Binding
When a function is called as a method, `this` refers to the object.
```javascript
var obj = {
val: 10,
get: function() { return this.val }
}
obj.get() // 10
```
### Currying
```javascript
var f = function(a) {
return function(b) {
return function(c) { return a + b + c }
}
}
f(1)(2)(3) // 6
```
## Identifiers
Identifiers can contain `?` and `!` characters, both as suffixes and mid-name.
```javascript
var nil? = (x) => x == null
nil?(null) // true
nil?(42) // false
var set! = (x) => x + 1
set!(5) // 6
var is?valid = (x) => x > 0
is?valid(3) // true
var do!stuff = () => 42
do!stuff() // 42
```
The `?` in an identifier is not confused with the ternary operator:
```javascript
var nil? = (x) => x == null
var a = nil?(null) ? "yes" : "no" // "yes"
```
## Type Checking
### Type Functions
```javascript
is_number(42) // true
is_text("hi") // true
is_logical(true) // true
is_object({}) // true
is_array([]) // true
is_function(function(){}) // true
is_null(null) // true
is_object([]) // false (array is not object)
is_array({}) // false (object is not array)
```
### Truthiness
Falsy values: `false`, `0`, `""`, `null`. Everything else is truthy.
```javascript
if (0) ... // not entered
if ("") ... // not entered
if (null) ... // not entered
if (1) ... // entered
if ("hi") ... // entered
if ({}) ... // entered
if ([]) ... // entered
```
## Immutability with Stone
The `stone()` function makes values permanently immutable.
```javascript
var o = {x: 1}
is_stone(o) // false
stone(o)
is_stone(o) // true
o.x = 2 // disrupts!
```
Stone is **deep** — all nested objects and arrays are also frozen. This cannot be reversed.
## Function Proxy
A function with two parameters (`name`, `args`) acts as a proxy when properties are accessed on it. Any method call on the function dispatches through the proxy.
```javascript
var proxy = function(name, args) {
return `${name}:${length(args)}`
}
proxy.hello() // "hello:0"
proxy.add(1, 2) // "add:2"
proxy["method"]() // "method:0"
var m = "dynamic"
proxy[m]() // "dynamic:0"
```
For non-proxy functions, property access disrupts:
```javascript
var fn = function() { return 1 }
fn.foo // disrupts
fn.foo = 1 // disrupts
```
## Regex
Regex literals are written with forward slashes, with optional flags.
```javascript
var r = /\d+/
var result = extract("abc123", r)
result[0] // "123"
var ri = /hello/i
var result2 = extract("Hello", ri)
result2[0] // "Hello"
```
## Error Handling
ƿit uses `disrupt` and `disruption` for error handling. A `disrupt` signals that something went wrong. The `disruption` block attached to a function catches it.
```javascript
var safe_divide = function(a, b) {
if (b == 0) disrupt
return a / b
} disruption {
print("something went wrong")
}
```
`disrupt` is a bare keyword — it does not carry a value. The `disruption` block knows that something went wrong, but not what.
### Re-raising
A `disruption` block can re-raise by calling `disrupt` again:
```javascript
var outer = function() {
var inner = function() { disrupt } disruption { disrupt }
inner()
} disruption {
// caught here after re-raise
}
outer()
```
### Testing for Disruption
```javascript
var should_disrupt = function(fn) {
var caught = false
var wrapper = function() {
fn()
} disruption {
caught = true
}
wrapper()
return caught
}
```
If an actor has an unhandled disruption, it crashes.
## Self-Referencing Structures
Objects can reference themselves:
```javascript
var o = {name: "root"}
o.self = o
o.self.self.name // "root"
```
## Variable Shadowing
Inner functions can shadow outer variables:
```javascript
var x = 10
var fn = function() {
var x = 20
return x
}
fn() // 20
x // 10
```

View File

@@ -1,10 +0,0 @@
nav:
- text.md
- number.md
- array.md
- object.md
- blob.md
- time.md
- math.md
- json.md
- random.md

18
docs/library/_index.md Normal file
View File

@@ -0,0 +1,18 @@
---
title: "Standard Library"
description: "ƿit standard library modules"
weight: 90
type: "docs"
---
The standard library provides modules loaded with `use()`.
| Module | Description |
|--------|-------------|
| [blob](/docs/library/blob/) | Binary data (bits, not bytes) |
| [time](/docs/library/time/) | Time constants and conversions |
| [math](/docs/library/math/) | Trigonometry, logarithms, roots |
| [json](/docs/library/json/) | JSON encoding and decoding |
| [random](/docs/library/random/) | Random number generation |
The `text`, `number`, `array`, and `object` functions are intrinsics — they are always available without `use`. See [Built-in Functions](/docs/functions/) for the full list, and the individual reference pages for [text](/docs/library/text/), [number](/docs/library/number/), [array](/docs/library/array/), and [object](/docs/library/object/).

View File

@@ -1,12 +1,19 @@
# array
---
title: "array"
description: "Array creation and manipulation"
weight: 30
type: "docs"
---
The `array` function and its methods handle array creation and manipulation.
The `array` function is an intrinsic (always available, no `use()` needed). It is **polymorphic** — its behavior depends on the type of the first argument.
## Creation
## From a Number
Create an array of a given size.
### array(number)
Create an array of specified size, filled with `null`.
All elements initialized to `null`.
```javascript
array(3) // [null, null, null]
@@ -14,24 +21,36 @@ array(3) // [null, null, null]
### array(number, initial)
Create an array with initial values.
All elements initialized to a value. If initial is a function, it is called for each element (passed the index if arity >= 1).
```javascript
array(3, 0) // [0, 0, 0]
array(3, i => i * 2) // [0, 2, 4]
```
## From an Array
Copy, map, concat, or slice.
### array(array)
Copy an array.
Copy an array (mutable).
```javascript
var copy = array(original)
```
### array(array, function)
Map — call function with each element, collect results.
```javascript
array([1, 2, 3], x => x * 2) // [2, 4, 6]
```
### array(array, from, to)
Slice an array.
Slice — extract a sub-array. Negative indices count from end.
```javascript
array([1, 2, 3, 4, 5], 1, 4) // [2, 3, 4]
@@ -40,32 +59,36 @@ array([1, 2, 3], -2) // [2, 3]
### array(array, another)
Concatenate arrays.
Concatenate two arrays.
```javascript
array([1, 2], [3, 4]) // [1, 2, 3, 4]
```
### array(object)
## From a Record
Get keys of an object.
### array(record)
Get the keys of a record as an array of text.
```javascript
array({a: 1, b: 2}) // ["a", "b"]
```
## From Text
### array(text)
Split text into grapheme clusters.
Split text into individual characters (grapheme clusters). This is the standard way to iterate over characters in a string.
```javascript
array("hello") // ["h", "e", "l", "l", "o"]
array("👨‍👩‍👧") // ["👨‍👩‍👧"]
array("hello") // ["h", "e", "l", "l", "o"]
array("ƿit") // ["ƿ", "i", "t"]
```
### array(text, separator)
Split text by separator.
Split text by a separator string.
```javascript
array("a,b,c", ",") // ["a", "b", "c"]
@@ -73,7 +96,7 @@ array("a,b,c", ",") // ["a", "b", "c"]
### array(text, length)
Split text into chunks.
Dice text into chunks of a given length.
```javascript
array("abcdef", 2) // ["ab", "cd", "ef"]
@@ -87,13 +110,13 @@ Iterate over elements.
```javascript
array.for([1, 2, 3], function(el, i) {
log.console(i, el)
print(i, el)
})
// With early exit
array.for([1, 2, 3, 4], function(el) {
if (el > 2) return true
log.console(el)
print(el)
}, false, true) // prints 1, 2
```

View File

@@ -1,4 +1,9 @@
# blob
---
title: "blob"
description: "Binary data containers (bits, not bytes)"
weight: 50
type: "docs"
---
Blobs are binary large objects — containers of bits (not bytes). They're used for encoding data, messages, images, network payloads, and more.

View File

@@ -1,4 +1,9 @@
# json
---
title: "json"
description: "JSON encoding and decoding"
weight: 80
type: "docs"
---
JSON encoding and decoding.
@@ -86,5 +91,5 @@ var config_text = json.encode(config, 2)
// Load configuration
var loaded = json.decode(config_text)
log.console(loaded.debug) // true
print(loaded.debug) // true
```

View File

@@ -1,10 +1,15 @@
# math
---
title: "math"
description: "Trigonometry, logarithms, and roots"
weight: 70
type: "docs"
---
Cell provides three math modules with identical functions but different angle representations:
ƿit provides three math modules with identical functions but different angle representations:
```javascript
var math = use('math/radians') // angles in radians
var math = use('math/degrees') // angles in degrees
var math = use('math/degrees') // angles in degrees
var math = use('math/cycles') // angles in cycles (0-1)
```
@@ -35,7 +40,7 @@ math.tangent(math.pi / 4) // 1 (radians)
Inverse sine.
```javascript
math.arc_sine(1) // π/2 (radians)
math.arc_sine(1) // pi/2 (radians)
```
### arc_cosine(n)
@@ -43,7 +48,7 @@ math.arc_sine(1) // π/2 (radians)
Inverse cosine.
```javascript
math.arc_cosine(0) // π/2 (radians)
math.arc_cosine(0) // pi/2 (radians)
```
### arc_tangent(n, denominator)
@@ -51,9 +56,9 @@ math.arc_cosine(0) // π/2 (radians)
Inverse tangent. With two arguments, computes atan2.
```javascript
math.arc_tangent(1) // π/4 (radians)
math.arc_tangent(1, 1) // π/4 (radians)
math.arc_tangent(-1, -1) // -3π/4 (radians)
math.arc_tangent(1) // pi/4 (radians)
math.arc_tangent(1, 1) // pi/4 (radians)
math.arc_tangent(-1, -1) // -3pi/4 (radians)
```
## Exponentials and Logarithms
@@ -64,7 +69,7 @@ Euler's number raised to a power. Default power is 1.
```javascript
math.e() // 2.718281828...
math.e(2) // e²
math.e(2) // e^2
```
### ln(n)
@@ -130,21 +135,21 @@ math.e() // 2.71828...
var math = use('math/radians')
// Distance between two points
function distance(x1, y1, x2, y2) {
var distance = function(x1, y1, x2, y2) {
var dx = x2 - x1
var dy = y2 - y1
return math.sqrt(dx * dx + dy * dy)
}
// Angle between two points
function angle(x1, y1, x2, y2) {
var angle = function(x1, y1, x2, y2) {
return math.arc_tangent(y2 - y1, x2 - x1)
}
// Rotate a point
function rotate(x, y, angle) {
var c = math.cosine(angle)
var s = math.sine(angle)
var rotate = function(x, y, a) {
var c = math.cosine(a)
var s = math.sine(a)
return {
x: x * c - y * s,
y: x * s + y * c

View File

@@ -1,6 +1,11 @@
# number
---
title: "number"
description: "Numeric conversion and operations"
weight: 20
type: "docs"
---
The `number` function and its methods handle numeric conversion and operations.
The `number` function is an intrinsic (always available, no `use()` needed). It is **polymorphic** — its behavior depends on the type of the first argument.
## Conversion
@@ -29,15 +34,15 @@ Parse formatted numbers.
| Format | Description |
|--------|-------------|
| `""` | Standard decimal |
| `"u"` | Underbar separator (1_000) |
| `"d"` | Comma separator (1,000) |
| `"s"` | Space separator (1 000) |
| `"v"` | European (1.000,50) |
| `"b"` | Binary |
| `"o"` | Octal |
| `"h"` | Hexadecimal |
| `"j"` | JavaScript style (0x, 0o, 0b prefixes) |
| `""` | Standard decimal |
| `"u"` | Underbar separator (1_000) |
| `"d"` | Comma separator (1,000) |
| `"s"` | Space separator (1 000) |
| `"v"` | European (1.000,50) |
| `"b"` | Binary |
| `"o"` | Octal |
| `"h"` | Hexadecimal |
| `"j"` | JavaScript style (0x, 0o, 0b prefixes) |
```javascript
number("1,000", "d") // 1000
@@ -118,20 +123,20 @@ Get the fractional part.
fraction(4.75) // 0.75
```
### min(...values)
### min(a, b)
Return the smallest value.
Return the smaller of two numbers.
```javascript
min(3, 1, 4, 1, 5) // 1
min(3, 5) // 3
```
### max(...values)
### max(a, b)
Return the largest value.
Return the larger of two numbers.
```javascript
max(3, 1, 4, 1, 5) // 5
max(3, 5) // 5
```
### remainder(dividend, divisor)

View File

@@ -1,8 +1,13 @@
# object
---
title: "object"
description: "Object creation and manipulation"
weight: 40
type: "docs"
---
The `object` function and related utilities handle object creation and manipulation.
The `object` function is an intrinsic (always available, no `use()` needed). It is **polymorphic** — its behavior depends on the types of its arguments.
## Creation
## From a Record
### object(obj)
@@ -29,6 +34,8 @@ Select specific keys.
object({a: 1, b: 2, c: 3}, ["a", "c"]) // {a: 1, c: 3}
```
## From an Array of Keys
### object(keys)
Create object from keys (values are `true`).
@@ -60,9 +67,9 @@ object(["a", "b", "c"], (k, i) => i) // {a: 0, b: 1, c: 2}
Create a new object with the given prototype.
```javascript
var animal = {speak: function() { log.console("...") }}
var animal = {speak: function() { print("...") }}
var dog = meme(animal)
dog.speak = function() { log.console("woof") }
dog.speak = function() { print("woof") }
```
### proto(obj)
@@ -104,9 +111,4 @@ var obj = {a: 1, b: 2, c: 3}
// Get all keys
var keys = array(obj) // ["a", "b", "c"]
// Iterate
for (var key in obj) {
log.console(key, obj[key])
}
```

View File

@@ -1,4 +1,9 @@
# random
---
title: "random"
description: "Random number generation"
weight: 90
type: "docs"
---
Random number generation.
@@ -43,7 +48,7 @@ var random = use('random')
var coin_flip = random.random() < 0.5
// Random element from array
function pick(arr) {
var pick = function(arr) {
return arr[random.random_whole(length(arr))]
}
@@ -51,11 +56,14 @@ var colors = ["red", "green", "blue"]
var color = pick(colors)
// Shuffle array
function shuffle(arr) {
var shuffle = function(arr) {
var result = array(arr) // copy
for (var i = length(result) - 1; i > 0; i--) {
var j = random.random_whole(i + 1)
var temp = result[i]
var i = length(result) - 1
var j = 0
var temp = null
for (i = length(result) - 1; i > 0; i--) {
j = random.random_whole(i + 1)
temp = result[i]
result[i] = result[j]
result[j] = temp
}
@@ -63,8 +71,8 @@ function shuffle(arr) {
}
// Random in range
function random_range(min, max) {
return min + random.random() * (max - min)
var random_range = function(lo, hi) {
return lo + random.random() * (hi - lo)
}
var x = random_range(-10, 10) // -10 to 10

View File

@@ -1,19 +1,28 @@
# text
---
title: "text"
description: "String conversion and manipulation"
weight: 10
type: "docs"
---
The `text` function and its methods handle string conversion and manipulation.
The `text` function is an intrinsic (always available, no `use()` needed). It is **polymorphic** — its behavior depends on the type of the first argument.
## Conversion
To split text into characters, use `array(text)` — see [array](/docs/library/array/).
## From an Array
### text(array, separator)
Convert an array to text, joining elements with a separator (default: space).
Join array elements into text with a separator (default: empty string).
```javascript
text([1, 2, 3]) // "1 2 3"
text([1, 2, 3], ", ") // "1, 2, 3"
text(["a", "b"], "-") // "a-b"
text(["h", "e", "l", "l", "o"]) // "hello"
text([1, 2, 3], ", ") // "1, 2, 3"
text(["a", "b"], "-") // "a-b"
```
## From a Number
### text(number, radix)
Convert a number to text. Radix is 2-36 (default: 10).
@@ -24,13 +33,16 @@ text(255, 16) // "ff"
text(255, 2) // "11111111"
```
## From Text
### text(text, from, to)
Extract a substring from index `from` to `to`.
Extract a substring from index `from` to `to`. Negative indices count from end.
```javascript
text("hello world", 0, 5) // "hello"
text("hello world", 6) // "world"
text("hello", -3) // "llo"
```
## Methods
@@ -101,7 +113,7 @@ text.format("{0} + {1} = {2}", [1, 2, 3])
Unicode normalize the text (NFC form).
```javascript
text.normalize("café") // normalized form
text.normalize("cafe\u0301") // normalized form
```
### text.codepoint(text)
@@ -109,8 +121,7 @@ text.normalize("café") // normalized form
Get the Unicode codepoint of the first character.
```javascript
text.codepoint("A") // 65
text.codepoint("😀") // 128512
text.codepoint("A") // 65
```
### text.extract(text, pattern, from, to)

View File

@@ -1,4 +1,9 @@
# time
---
title: "time"
description: "Time constants and conversion functions"
weight: 60
type: "docs"
---
The time module provides time constants and conversion functions.
@@ -96,7 +101,7 @@ var last_week = now - time.week
var later = now + (2 * time.hour)
// Format future time
log.console(time.text(tomorrow))
print(time.text(tomorrow))
```
## Example
@@ -108,9 +113,9 @@ var time = use('time')
var start = time.number()
// ... do work ...
var elapsed = time.number() - start
log.console(`Took ${elapsed} seconds`)
print(`Took ${elapsed} seconds`)
// Schedule for tomorrow
var tomorrow = time.number() + time.day
log.console(`Tomorrow: ${time.text(tomorrow, "yyyy-MM-dd")}`)
print(`Tomorrow: ${time.text(tomorrow, "yyyy-MM-dd")}`)
```

View File

@@ -1,248 +0,0 @@
# Cell actor scripting language
Cell is a Misty [https://mistysystem.com](https://mistysystem.com) implementation.
## Memory
Values are 32 bit for 32 bit builds and 64 bit for 64 bit builds.
### 32 bit value
LSB = 0
payload is a 31 bit signed int
LSB = 01
payload is a 30 bit pointer
LSB = 11
next 3 bits = special tag. 27 bits of payload.
### 64 bit value
LSB = 0
payload is a 32 bit signed int, using high 32 bits
LSB = 01
payload is a 61 bit pointer
LSB = 101
Short float: a 61 bit double, with 3 less exponent bits
LSB = 11
Special tag: next 3 bits. 5 bits total. 59 bits of payload. 8 total special tags.
Special tags:
1: Bool. Payload is 0 or 1.
2: null. payload is 0.
3: exception.
4: string.
Immediate string. Next 3 low bits = length in bytes. Rest is string data. This allows for strings up to 7 ascii letters. Encoded in utf8.
## Numbers and math
Cell can be compiled with different levels of exactness for numeracy. Any number which cannot be represented exactly becomes "null". Any numeric operation which includes "null" results in "null".
Using short floats in a 64 bit system means you have doubles in the range of +- 10^38, not the full range of double. If you create a number out of that range, it's null.
You can also compile a 64 bit system with full precision doubles, but this will use more memory and may be slower.
You can also compile a 64 bit system with 32 bit floats, stored as a 32 bit int is. Again, out of the 32 bit float range = null.
You can compile without floating point support at all; 32 bit ints are then used for fixed point calculations.
Or, you can compile using Dec64, which is a 64 bit decimal floating point format, for exact precision.
## Objects
Objects are heap allocated, referenced by a pointer value. They are all preceded by an object header, the length of a word on the system.
### 64 bit build
56 bits capacity
1 bit memory reclamation flag: note that this obj has already been moved
2 bit reserved (per object)
1 bit stone: note that this obj is immutable
3 bit type: note the type of the object
1 bit: fwd: note that this obj is a forward linkage
Last bit ..1:
The forward type indicates that the object (an array, blob, pretext, or record) has grown beyond its capacity and is now residing at a new address. The remaining 63 bits contain the address of the enlarged object. Forward linkages are cleaned up by the memory reclaimer.
Type 7: C light C object
Header
Pointer
Capacity is an ID of a registered C type.
Pointer is a pointer to the opaque C object.
Type 0: Array
Header
Length
Element[]
Capacity is number of elements the array can hold. Length is number of elements in use. Number of words used by an array is capacity + 2.
Type 1: blob
Header
Length
Bit[]
Capacity is number of bits the blob can hold. Length is number of bits in use. Bits follow, from [0] to [capacity - 1], with [0] bit in the most significant position of word 2, and [63] in the least significant position of word 2. The last word is zero filled, if necessary.
Number of words used is (capacity + 63) // 64 + 2
Type 2: Text
Text has two forms, depending on if it is stone or not, which changes the meaning of its length word.
Header
Length(pretext) or Hash(text)
Character[0] and character[1]
Capacity of pretex is the number of characters it can hold. During stoning and reclamation, capacity is set to the length.
The capacity of a text is its length.
The length of a pretext is the number of characters it contains; it is not greater than the capacity.
Hash of a text is used for organizing records. If the hash is zero, it's not been computed yet. All texts in the immutable memory have hashes.
A text object contains UTF32 characters, packed two per word. If the number of characters is odd, the least significant half of the last word is zero filled.
The number of words used by a text is (capacity + 1) // 2 + 2
Type 3: Record
A record is an array of fields represented as key/value pairs. Fields are located by hashes of texts, using open addressing with linear probing and lazy deletion. The load factor is less than 0.5.
Header
Prototype
Length
Key[0]
Value[0]
Key[1]
Value[1]
...
The capacity is the number of fields the record can hold. It is a power of two minus one. It is at least twice the length.
The length is the number of fields that the record currently contains.
A field candidate number is identified by and(key.hash, capacity). In case of hash collision, advance to the next field. If this goes past the end, continue with field 1. Field 0 is reserved.
The "exception" special tag is used to mark deleted entries in the object map.
The number of words used by a record is (capacity + 1) * 2.
Prototypes are searched for for properties if one cannot be found on the record itself. Prototypes can have prototypes.
#### key[0] and value[0]
These are reserved for internal use, and skipped over during key probing.
The first 32 bits of key are used as a 32 bit integer key, if this object has ever been used as a key itself.
The last 32 bits are used as an opaque C class key. C types can be registered with the system, and each are assigned a monotonically increasing number. In the case that this object has a C type, then the bottom 32 bits of key[0] are not 0. If that is the case, then a pointer to its C object is stored in value[0].
#### Valid keys & Hashing
Keys are stored directly in object maps. There are three possibilities for a vaild key: an object text, an object record, or an immediate text.
In the case of an immediate text, the hash is computed on the fly using the fash64_hash_one function, before being used to look up the key in the object map. Direct value comparison is used to confirm the key.
For object texts (texts longer than 7 ascii chars), the hash is stored in the text object itself. When an object text is used as a key, a stone version is created and interned. Any program static texts reference this stoned, interned text. When looking up a heap text as a key, it is first discovered if it's in the interned table. If it's not, the key is not in the object (since all keys are interned). If it is, the interned version is returned to check against the object map. The hash of the interned text is used to look up the key in the object map, and then direct pointer comparison is used to confirm the key.
For record keys, these are unique; once a record is used as a key, it gets assigned a monotonically increasing 32 bit integer, stored in key[0]. When checking it in an object map, the integer is used directly as the key. If key[0] is 0, the record has not been used as a key yet. If it's not 0, fash64_hash_one is used to compute a hash of its ID, and then direct value pointer comparison is used to confirm.
### Text interning
Texts that cannot fit in an immediate, and which are used as an object key, create a stoned and interned version (the pointer which is used as the key). Any text literals are also stoned and interned.
The interning table is an open addressed hash, with a load of 0.8, using a robin hood value. Probing is done using the text hash, confirmation is done using length, and then memcmp of the text.
When the GC run, a new interned text table is created. Each text literal, and each text used as a key, is added to the new table, as the live objects are copied. This keeps the interning table from becoming a graveyard. Interned values are never deleted until a GC.
Type 4: Function
Header
Code
Outer
A function object has zero capacity and is always stone.
Code is a pointer to the code object that the function executes.
Outer is a pointer to the frame that created this function object.
Size is 3 words.
Type 5: Frame
Header
Function
Caller
Return address
The activation frame is created when a function is invoked to hold its linkages and state.
The capacity is the number of slots, including the inputs, variables, temporaries, and the four words of overhead. A frame, unlike the other types, is never stone.
The function is the address of the function object being called.
The caller is the address of the frame that is invoking the function.
The return address is the address of the instruction in the code that should be executed upon return.
Next come the input arguments, if any.
Then the variables closed over by the inner functions.
Then the variables that are not closed over, followed by the temporaries.
When a function returns, the caller is set to zero. This is a signal to the memory reclaimer that the frame can be reduced.
Type 6: Code
Header
Arity
Size
Closure size
Entry point
Disruption point
A code object exists in the actor's immutable memory. A code object never exists in mutable memory.
A code object has a zero capacity and is always stone.
The arity is the maximum number of inputs.
The size is the capacity of an activation frame that will execute this code.
The closure size is a reduced capacity for returned frames that survive memory reclamation.
The entry point is the address at which to begin execution.
The disruption point is the address of the disruption clause.
### opaque C objects
Records can have opaque C data attached to them.
A C class can register a GC clean up, and a GC trace function. The trace function is called when the record is encountered in the live object graph; and it should mark any values it wants to keep alive in that function.
The system maintains an array of live opaque C objects. When such an object is encountered, it marks it as live in the array. When the GC completes, it iterates this array and calls the GC clean up function for each C object in the array with alive=0. Alive is then cleared for the next GC cycle.
## 32 bit build
~3 bit type
1 bit stone
1 bit memory reclamation flag
27 bit capacity
Key differences here are
blob max capacity is 2**27 bits = 2**24 bytes = 16 MB [this likely needs addressed]
fwd is type ...0, and the pointer is 31 bits
other types are
111 array
101 object
011 blob
001
## Memory
Cell uses a single block of memory that it doles out as needed to the actors in its system.
Actors are given a block of memory in standard sizes using a doubling buddy memory manager. An actor is given an immutable data section on birth, as well as a mutable data section. When its mutable data becomes full, it requests a new one. Actors utilize their mutable memory with a simple bump allocation. If there is not sufficient memory available, the actor suspends and its status changes to exhausted.
The smallest block size is determined per platform, but it can be as small as 4KB on 64 bit systems.
The actor is then given a new block of memory of the same size, and it runs a garbage collector to reclaim memory. It uses the cheney copying algorithm. If a disappointing amount of memory was reclaimed, it is noted, and the actor is given a larger block of memory on the next request.

156
docs/nota.md Normal file
View File

@@ -0,0 +1,156 @@
---
title: "Nota Format"
description: "Network Object Transfer Arrangement"
weight: 85
type: "docs"
---
Nota is a binary message format developed for use in the Procession Protocol. It provides a compact, JSON-like encoding that supports blobs, text, arrays, records, numbers, and symbols.
Nota stands for Network Object Transfer Arrangement.
## Design Philosophy
JSON had three design rules: minimal, textual, and subset of JavaScript. The textual and JavaScript rules are no longer necessary. Nota maintains JSON's philosophy of being at the intersection of most programming languages and most data types, but departs by using counts instead of brackets and binary encoding instead of text.
Nota uses Kim continuation bytes for counts and character encoding. See [Kim Encoding](#kim) for details.
## Type Summary
| Bits | Type |
|------|------|
| `000` | Blob |
| `001` | Text |
| `010` | Array |
| `011` | Record |
| `100` | Floating Point (positive exponent) |
| `101` | Floating Point (negative exponent) |
| `110` | Integer (zero exponent) |
| `111` | Symbol |
## Preambles
Every Nota value starts with a preamble byte that is a Kim value with the three most significant bits used for type information.
Most types provide 3 or 4 data bits in the preamble. If the Kim encoding of the data fits in those bits, it is incorporated directly and the continue bit is off. Otherwise the continue bit is on and the continuation follows.
## Blob
```
C 0 0 0 D D D D
```
- **C** — continue the number of bits
- **DDDD** — the number of bits
A blob is a string of bits. The data produces the number of bits. The number of bytes that follow: `floor((number_of_bits + 7) / 8)`. The final byte is padded with 0 if necessary.
Example: A blob containing 25 bits `1111000011100011001000001`:
```
80 19 F0 E3 20 80
```
## Text
```
C 0 0 1 D D D D
```
- **C** — continue the number of characters
- **DDDD** — the number of characters
The data produces the number of characters. Kim-encoded characters follow. ASCII characters are 1 byte, first quarter BMP characters are 2 bytes, all other Unicode characters are 3 bytes. Unlike JSON, there is never a need for escapement.
Examples:
```
"" → 10
"cat" → 13 63 61 74
```
## Array
```
C 0 1 0 D D D D
```
- **C** — continue the number of elements
- **DDDD** — the number of elements
An array is an ordered sequence of values. Following the preamble are the elements, each beginning with its own preamble. Nesting is encouraged.
## Record
```
C 0 1 1 D D D D
```
- **C** — continue the number of pairs
- **DDDD** — the number of pairs
A record is an unordered collection of key/value pairs. Keys must be text and must be unique within the record. Values can be any Nota type.
## Floating Point
```
C 1 0 E S D D D
```
- **C** — continue the exponent
- **E** — sign of the exponent
- **S** — sign of the coefficient
- **DDD** — three bits of the exponent
Nota floating point represents numbers as `coefficient * 10^exponent`. The coefficient must be an integer. The preamble may contain the first three bits of the exponent, followed by the continuation of the exponent (if any), followed by the coefficient.
Use the integer type when the exponent is zero.
Examples:
```
-1.01 → 5A 65
98.6 → 51 87 5A
-0.5772156649 → D8 0A 95 C0 B0 BD 69
-10000000000000 → C8 0D 01
```
## Integer
```
C 1 1 0 S D D D
```
- **C** — continue the integer
- **S** — sign
- **DDD** — three bits of the integer
Integers in the range -7 to 7 fit in a single byte. Integers in the range -1023 to 1023 fit in two bytes. Integers in the range -131071 to 131071 fit in three bytes.
Examples:
```
0 → 60
2023 → E0 8F 67
-1 → 69
```
## Symbol
```
0 1 1 1 D D D D
```
- **DDDD** — the symbol
There are currently five symbols:
```
null → 70
false → 72
true → 73
private → 78
system → 79
```
The private prefix must be followed by a record containing a private process address. The system prefix must be followed by a record containing a system message. All other symbols are reserved.

View File

@@ -1,23 +1,29 @@
# Packages
---
title: "Packages"
description: "Code organization and sharing in ƿit"
weight: 30
type: "docs"
---
Packages are the fundamental unit of code organization and sharing in Cell.
Packages are the fundamental unit of code organization and sharing in ƿit.
## Package Structure
A package is a directory containing a `cell.toml` manifest:
A package is a directory containing a `pit.toml` manifest:
```
mypackage/
├── cell.toml # package manifest
├── pit.toml # package manifest
├── main.ce # entry point (optional)
├── utils.cm # module
├── helper/
│ └── math.cm # nested module
├── render.c # C extension
└── _internal.cm # private module (underscore prefix)
└── internal/
└── helpers.cm # private module (internal/ only)
```
## cell.toml
## pit.toml
The package manifest declares metadata and dependencies:
@@ -38,11 +44,11 @@ mylib = "/Users/john/work/mylib"
## Module Resolution
When importing with `use()`, Cell searches in order:
When importing with `use()`, ƿit searches in order:
1. **Local package** — relative to package root
2. **Dependencies** — via aliases in `cell.toml`
3. **Core** — built-in Cell modules
2. **Dependencies** — via aliases in `pit.toml`
3. **Core** — built-in ƿit modules
```javascript
// In package 'myapp' with dependency: renderer = "gitea.pockle.world/john/renderer"
@@ -55,12 +61,12 @@ use('json') // core module
### Private Modules
Files starting with underscore are private:
Files in the `internal/` directory are private to their package:
```javascript
// _internal.cm is only accessible within the same package
use('internal') // OK from same package
use('myapp/internal') // Error from other packages
// internal/helpers.cm is only accessible within the same package
use('internal/helpers') // OK from same package
use('myapp/internal/helpers') // Error from other packages
```
## Package Locators
@@ -85,10 +91,10 @@ Local packages are symlinked into the shop, making development seamless.
## The Shop
Cell stores all packages in the **shop** at `~/.cell/`:
ƿit stores all packages in the **shop** at `~/.pit/`:
```
~/.cell/
~/.pit/
├── packages/
│ ├── core -> gitea.pockle.world/john/cell
│ ├── gitea.pockle.world/
@@ -100,8 +106,11 @@ Cell stores all packages in the **shop** at `~/.cell/`:
│ └── work/
│ └── mylib -> /Users/john/work/mylib
├── lib/
│ ├── local.dylib
└── gitea_pockle_world_john_prosperon.dylib
│ ├── core/
│ ├── fd.dylib
│ │ └── time.mach
│ └── gitea_pockle_world_john_prosperon/
│ └── sprite.dylib
├── build/
│ └── <content-addressed cache>
├── cache/
@@ -134,20 +143,20 @@ target = "/Users/john/work/prosperon"
```bash
# Install from remote
cell install gitea.pockle.world/john/prosperon
pit install gitea.pockle.world/john/prosperon
# Install from local path
cell install /Users/john/work/mylib
pit install /Users/john/work/mylib
```
## Updating Packages
```bash
# Update all
cell update
pit update
# Update specific package
cell update gitea.pockle.world/john/prosperon
pit update gitea.pockle.world/john/prosperon
```
## Development Workflow
@@ -156,28 +165,28 @@ For active development, link packages locally:
```bash
# Link a package for development
cell link add gitea.pockle.world/john/prosperon /Users/john/work/prosperon
pit link add gitea.pockle.world/john/prosperon /Users/john/work/prosperon
# Changes to /Users/john/work/prosperon are immediately visible
# Remove link when done
cell link delete gitea.pockle.world/john/prosperon
pit link delete gitea.pockle.world/john/prosperon
```
## C Extensions
C files in a package are compiled into a dynamic library:
C files in a package are compiled into per-file dynamic libraries:
```
mypackage/
├── cell.toml
├── render.c # compiled to mypackage.dylib
└── render.cm # optional Cell wrapper
├── pit.toml
├── render.c # compiled to lib/mypackage/render.dylib
└── physics.c # compiled to lib/mypackage/physics.dylib
```
The library is named after the package and placed in `~/.cell/lib/`.
Each `.c` file gets its own `.dylib` in `~/.pit/lib/<pkg>/`. A `.c` file and `.cm` file with the same stem at the same scope is a build error — use distinct names.
See [Writing C Modules](c-modules.md) for details.
See [Writing C Modules](/docs/c-modules/) for details.
## Platform-Specific Files
@@ -190,4 +199,4 @@ mypackage/
└── audio_emscripten.c # Web-specific
```
Cell selects the appropriate file based on the build target.
ƿit selects the appropriate file based on the build target.

176
docs/requestors.md Normal file
View File

@@ -0,0 +1,176 @@
---
title: "Requestors"
description: "Asynchronous work with requestors"
weight: 25
type: "docs"
---
Requestors are functions that encapsulate asynchronous work. They provide a structured way to compose callbacks, manage cancellation, and coordinate concurrent operations between actors.
## What is a Requestor
A requestor is a function with this signature:
```javascript
var my_requestor = function(callback, value) {
// Do async work, then call callback with result
// Return a cancel function
}
```
- **callback** — called when the work completes: `callback(value, reason)`
- On success: `callback(result)` or `callback(result, null)`
- On failure: `callback(null, reason)` where reason explains the failure
- **value** — input passed from the previous step (or the initial caller)
- **return** — a cancel function, or null if cancellation is not supported
The cancel function, when called, should abort the in-progress work.
## Writing a Requestor
```javascript
var fetch_data = function(callback, url) {
$contact(function(connection) {
$send(connection, {get: url}, function(response) {
callback(response)
})
}, {host: url, port: 80})
return function() {
// clean up if needed
}
}
```
A requestor that always succeeds immediately:
```javascript
var constant = function(callback, value) {
callback(42)
}
```
A requestor that always fails:
```javascript
var broken = function(callback, value) {
callback(null, "something went wrong")
}
```
## Composing Requestors
ƿit provides four built-in functions for composing requestors into pipelines.
### sequence(requestor_array)
Run requestors one after another. Each result becomes the input to the next. The final result is passed to the callback.
```javascript
var pipeline = sequence([
fetch_user,
validate_permissions,
load_profile
])
pipeline(function(profile, reason) {
if (reason) {
print(reason)
} else {
print(profile.name)
}
}, user_id)
```
If any step fails, the remaining steps are skipped and the failure propagates.
### parallel(requestor_array, throttle, need)
Start all requestors concurrently. Results are collected into an array matching the input order.
```javascript
var both = parallel([
fetch_profile,
fetch_settings
])
both(function(results, reason) {
var profile = results[0]
var settings = results[1]
}, user_id)
```
- **throttle** — limit how many requestors run at once (null for no limit)
- **need** — minimum number of successes required (default: all)
### race(requestor_array, throttle, need)
Like `parallel`, but returns as soon as the needed number of results arrive. Unfinished requestors are cancelled.
```javascript
var fastest = race([
fetch_from_cache,
fetch_from_network,
fetch_from_backup
])
fastest(function(results) {
// results[0] is whichever responded first
}, request)
```
Default need is 1. Useful for redundant operations where only one result matters.
### fallback(requestor_array)
Try each requestor in order. If one fails, try the next. Return the first success.
```javascript
var resilient = fallback([
fetch_from_primary,
fetch_from_secondary,
use_cached_value
])
resilient(function(data, reason) {
if (reason) {
print("all sources failed")
}
}, key)
```
## Timeouts
Wrap any requestor with `$time_limit` to add a timeout:
```javascript
var timed = $time_limit(fetch_data, 5) // 5 second timeout
timed(function(result, reason) {
// reason will explain timeout if it fires
}, url)
```
If the requestor does not complete within the time limit, it is cancelled and the callback receives a failure.
## Requestors and Actors
Requestors are particularly useful with actor messaging. Since `$send` is callback-based, it fits naturally:
```javascript
var ask_worker = function(callback, task) {
$send(worker, task, function(reply) {
callback(reply)
})
}
var pipeline = sequence([
ask_worker,
process_result,
store_result
])
pipeline(function(stored) {
print("done")
$stop()
}, {type: "compute", data: [1, 2, 3]})
```

223
docs/shop.md Normal file
View File

@@ -0,0 +1,223 @@
---
title: "Shop Architecture"
description: "How the shop resolves, compiles, caches, and loads modules"
weight: 35
type: "docs"
---
The shop is the module resolution and loading engine behind `use()`. It handles finding modules, compiling them, caching the results, and loading C extensions. The shop lives in `internal/shop.cm`.
## Startup Pipeline
When `pit` runs a program, startup takes one of two paths:
### Fast path (warm cache)
```
C runtime → engine.cm (from cache) → shop.cm → user program
```
The C runtime hashes the source of `internal/engine.cm` with BLAKE2 and looks up the hash in the content-addressed cache (`~/.pit/build/<hash>`). On a cache hit, engine.cm loads directly — no bootstrap involved.
### Cold path (first run or cache cleared)
```
C runtime → bootstrap.cm → (seeds cache) → engine.cm (from cache) → shop.cm → user program
```
On a cache miss, the C runtime loads `boot/bootstrap.cm.mcode` (a pre-compiled seed). Bootstrap compiles engine.cm and the pipeline modules (tokenize, parse, fold, mcode, streamline) from source and caches the results. The C runtime then retries the engine cache lookup, which now succeeds.
### Engine
**engine.cm** is self-sufficient. It loads its own compilation pipeline from the content-addressed cache, with fallback to the pre-compiled seeds in `boot/`. It defines `analyze()` (source to AST), `compile_to_blob()` (AST to binary blob), and `use_core()` for loading core modules. It creates the actor runtime and loads shop.cm via `use_core('internal/shop')`.
### Shop
**shop.cm** receives its dependencies through the module environment — `analyze`, `run_ast_fn`, `use_cache`, `shop_path`, `runtime_env`, `content_hash`, `cache_path`, and others. It defines `Shop.use()`, which is the function behind every `use()` call in user code.
### Cache invalidation
All caching is content-addressed by BLAKE2 hash of the source. When any source file changes, its hash changes and the old cache entry is simply never looked up again. No manual invalidation is needed. To force a full rebuild, delete `~/.pit/build/`.
## Module Resolution
When `use('path')` is called from a package context, the shop resolves the module through a multi-layer search. Both the `.cm` script file and C symbol are resolved independently, and the one with the narrowest scope wins.
### Resolution Order
For a call like `use('sprite')` from package `myapp`:
1. **Own package**`~/.pit/packages/myapp/sprite.cm` and C symbol `js_myapp_sprite_use`
2. **Aliased dependencies** — if `myapp/pit.toml` has `renderer = "gitea.pockle.world/john/renderer"`, checks `renderer/sprite.cm` and its C symbols
3. **Core** — built-in core modules and internal C symbols
For calls without a package context (from core modules), only core is searched.
### Private Modules
Paths starting with `internal/` are private to their package:
```javascript
use('internal/helpers') // OK from within the same package
// Cannot be accessed from other packages
```
### Explicit Package Imports
Paths containing a dot in the first component are treated as explicit package references:
```javascript
use('gitea.pockle.world/john/renderer/sprite')
// Resolves directly to the renderer package's sprite.cm
```
## Compilation and Caching
Every module goes through a content-addressed caching pipeline. The cache key is the BLAKE2 hash of the source content, so changing the source automatically invalidates the cache.
### Cache Hierarchy
When loading a module, the shop checks (in order):
1. **In-memory cache**`use_cache[key]`, checked first on every `use()` call
2. **Installed dylib** — per-file `.dylib` in `~/.pit/lib/<pkg>/<stem>.dylib`
3. **Installed mach** — pre-compiled bytecode in `~/.pit/lib/<pkg>/<stem>.mach`
4. **Cached bytecode** — content-addressed in `~/.pit/build/<hash>` (no extension)
5. **Cached .mcode IR** — JSON IR in `~/.pit/build/<hash>.mcode`
6. **Internal symbols** — statically linked into the `pit` binary (fat builds)
7. **Source compilation** — full pipeline: analyze, mcode, streamline, serialize
When both a `.dylib` and `.mach` exist for the same module in `lib/`, the dylib is selected. Dylib resolution also wins over internal symbols, so a dylib in `lib/` can hot-patch a fat binary. Delete the dylib to fall back to mach or static.
Results from steps 5-7 are cached back to the content-addressed store for future loads.
Each loading method (except the in-memory cache) can be individually enabled or disabled via `shop.toml` policy flags — see [Shop Configuration](#shop-configuration) below.
### Content-Addressed Store
The build cache at `~/.pit/build/` stores ephemeral artifacts named by the BLAKE2 hash of their inputs:
```
~/.pit/build/
├── a1b2c3d4... # cached bytecode blob (no extension)
├── c9d0e1f2...mcode # cached JSON IR
└── f3a4b5c6... # compiled dylib (checked before copying to lib/)
```
This scheme provides automatic cache invalidation: when source changes, its hash changes, and the old cache entry is simply never looked up again. When building a dylib, the build cache is checked first — if a matching hash exists, it is copied to `lib/` without recompiling.
### Core Module Caching
Core modules loaded via `use_core()` in engine.cm follow the same content-addressed pattern. On first use, a module is compiled from source and cached by the BLAKE2 hash of its source content. Subsequent loads with unchanged source hit the cache directly.
User scripts (`.ce` files) are also cached. The first run compiles and caches; subsequent runs with unchanged source load from cache.
## C Extension Resolution
C extensions are resolved alongside script modules. A C module is identified by a symbol name derived from the package and file name:
```
package: gitea.pockle.world/john/prosperon
file: sprite.c
symbol: js_gitea_pockle_world_john_prosperon_sprite_use
```
### C Resolution Sources
1. **Installed dylibs** — per-file dylibs in `~/.pit/lib/<pkg>/<stem>.dylib` (deterministic paths, no manifests)
2. **Internal symbols** — statically linked into the `pit` binary (fat builds)
Dylibs are checked first at each resolution scope, so an installed dylib always wins over a statically linked symbol. This enables hot-patching fat binaries by placing a dylib in `lib/`.
### Name Collisions
Having both a `.cm` script and a `.c` file with the same stem at the same scope is a **build error**. For example, `render.cm` and `render.c` in the same directory will fail. Use distinct names — e.g., `render.c` for the C implementation and `render_utils.cm` for the script wrapper.
## Environment Injection
When a module is loaded, the shop builds an `env` object that becomes the module's set of free variables. This includes:
- **Runtime functions** — `logical`, `some`, `every`, `starts_with`, `ends_with`, `is_actor`, `log`, `send`, `fallback`, `parallel`, `race`, `sequence`
- **Capability injections** — actor intrinsics like `$self`, `$delay`, `$start`, `$receiver`, `$fd`, etc.
- **`use` function** — scoped to the module's package context
The set of injected capabilities is controlled by `script_inject_for()`, which can be tuned per package or file.
## Shop Configuration
The shop reads an optional `shop.toml` file from the shop root (`~/.pit/shop.toml`). This file controls which loading methods are permitted through policy flags.
### Policy Flags
All flags default to `true`. Set a flag to `false` to disable that loading method.
```toml
[policy]
allow_dylib = true # per-file .dylib loading (requires dlopen)
allow_static = true # statically linked C symbols (fat builds)
allow_mach = true # pre-compiled .mach bytecode (lib/ and build cache)
allow_compile = true # on-the-fly source compilation
```
### Example Configurations
**Production lockdown** — only use pre-compiled artifacts, never compile from source:
```toml
[policy]
allow_compile = false
```
**Pure-script mode** — bytecode only, no native code:
```toml
[policy]
allow_dylib = false
allow_static = false
```
**No dlopen platforms** — static linking and bytecode only:
```toml
[policy]
allow_dylib = false
```
If `shop.toml` is missing or has no `[policy]` section, all methods are enabled (default behavior).
## Shop Directory Layout
```
~/.pit/
├── packages/ # installed packages (directories and symlinks)
│ └── core -> ... # symlink to the ƿit core
├── lib/ # INSTALLED per-file artifacts (persistent, human-readable)
│ ├── core/
│ │ ├── fd.dylib
│ │ ├── time.mach
│ │ ├── time.dylib
│ │ └── internal/
│ │ └── os.dylib
│ └── gitea_pockle_world_john_prosperon/
│ ├── sprite.dylib
│ └── render.dylib
├── build/ # EPHEMERAL cache (safe to delete anytime)
│ ├── <hash> # cached bytecode or dylib blobs (no extension)
│ └── <hash>.mcode # cached JSON IR
├── cache/ # downloaded package zip archives
├── lock.toml # installed package versions and commit hashes
├── link.toml # local development link overrides
└── shop.toml # optional shop configuration and policy flags
```
## Key Files
| File | Role |
|------|------|
| `internal/bootstrap.cm` | Minimal cache seeder (cold start only) |
| `internal/engine.cm` | Self-sufficient entry point: compilation pipeline, actor runtime, `use_core()` |
| `internal/shop.cm` | Module resolution, compilation, caching, C extension loading |
| `internal/os.c` | OS intrinsics: dylib ops, internal symbol lookup, embedded modules |
| `package.cm` | Package directory detection, alias resolution, file listing |
| `link.cm` | Development link management (link.toml read/write) |
| `boot/*.cm.mcode` | Pre-compiled pipeline seeds (tokenize, parse, fold, mcode, bootstrap) |

3
docs/spec/.pages Normal file
View File

@@ -0,0 +1,3 @@
nav:
- pipeline.md
- mcode.md

296
docs/spec/c-runtime.md Normal file
View File

@@ -0,0 +1,296 @@
---
title: "C Runtime for Native Code"
description: "Minimum C runtime surface for QBE-generated native code"
---
## Overview
QBE-generated native code calls into a C runtime for anything that touches the heap, dispatches dynamically, or requires GC awareness. The design principle: **native code handles control flow and integer math directly; everything else is a runtime call.**
This document defines the runtime boundary — what must be in C, what QBE handles inline, and how to organize the C code to serve both the mcode interpreter and native code cleanly.
## The Boundary
### What native code does inline (no C calls)
These operations compile to straight QBE instructions with no runtime involvement:
- **Integer arithmetic**: `add`, `sub`, `mul` on NaN-boxed ints (shift right 1, operate, shift left 1)
- **Integer comparisons**: extract int with shift, compare, produce tagged bool
- **Control flow**: jumps, branches, labels, function entry/exit
- **Slot access**: load/store to frame slots via `%fp` + offset
- **NaN-box tagging**: integer tagging (`n << 1`), bool constants (`0x03`/`0x23`), null (`0x07`)
- **Type tests**: `JS_IsInt` (LSB check), `JS_IsNumber`, `JS_IsText`, `JS_IsNull` — these are bit tests on the value, no heap access needed
### What requires a C call
Anything that:
1. **Allocates** (arrays, records, strings, frames, function objects)
2. **Touches the heap** (property get/set, array indexing, closure access)
3. **Dispatches on type at runtime** (dynamic load/store, polymorphic arithmetic)
4. **Calls user functions** (frame setup, argument passing, invocation)
5. **Does string operations** (concatenation, comparison, conversion)
## Runtime Functions
### Tier 1: Essential (must exist for any program to run)
These are called by virtually every QBE program.
#### Intrinsic Lookup
```c
// Look up a built-in function by name. Called once per intrinsic per callsite.
JSValue cell_rt_get_intrinsic(JSContext *ctx, const char *name);
```
Maps name → C function pointer wrapped in JSValue. This is the primary entry point for all built-in functions (`print`, `text`, `length`, `is_array`, etc). The native code never calls intrinsics directly — it always goes through `get_intrinsic``frame``invoke`.
#### Function Calls
```c
// Allocate a call frame with space for nr_args arguments.
JSValue cell_rt_frame(JSContext *ctx, JSValue fn, int nr_args);
// Set argument idx in the frame.
void cell_rt_setarg(JSValue frame, int idx, JSValue val);
// Execute the function. Returns the result.
JSValue cell_rt_invoke(JSContext *ctx, JSValue frame);
```
This is the universal calling convention. Every function call — user functions, intrinsics, methods — goes through frame/setarg/invoke. The frame allocates a `JSFrameRegister` on the GC heap, setarg fills slots, invoke dispatches.
**Tail call variants:**
```c
JSValue cell_rt_goframe(JSContext *ctx, JSValue fn, int nr_args);
void cell_rt_goinvoke(JSContext *ctx, JSValue frame);
```
Same as frame/invoke but reuse the caller's stack position.
### Tier 2: Property Access (needed by any program using records or arrays)
```c
// Record field by constant name.
JSValue cell_rt_load_field(JSContext *ctx, JSValue obj, const char *name);
void cell_rt_store_field(JSContext *ctx, JSValue obj, JSValue val, const char *name);
// Array element by integer index.
JSValue cell_rt_load_index(JSContext *ctx, JSValue obj, JSValue idx);
void cell_rt_store_index(JSContext *ctx, JSValue obj, JSValue idx, JSValue val);
// Dynamic — type of key unknown at compile time.
JSValue cell_rt_load_dynamic(JSContext *ctx, JSValue obj, JSValue key);
void cell_rt_store_dynamic(JSContext *ctx, JSValue obj, JSValue key, JSValue val);
```
The typed variants (`load_field`/`load_index`) skip the key-type dispatch that `load_dynamic` must do. When parse and fold provide type information, QBE emit selects the typed variant and the streamline optimizer can narrow dynamic → typed.
**Implementation**: These are thin wrappers around existing `JS_GetPropertyStr`/`JS_GetPropertyNumber`/`JS_GetProperty` and their `Set` counterparts.
### Tier 3: Closures (needed by programs with nested functions)
```c
// Walk depth levels up the frame chain, read slot.
JSValue cell_rt_get_closure(JSContext *ctx, JSValue fp, int depth, int slot);
// Walk depth levels up, write slot.
void cell_rt_put_closure(JSContext *ctx, JSValue fp, JSValue val, int depth, int slot);
```
Closure variables live in outer frames. `depth` is how many `caller` links to follow; `slot` is the register index in that frame.
### Tier 4: Object Construction (needed by programs creating arrays/records/functions)
```c
// Create a function object from a compiled function index.
// The native code loader must maintain a function table.
JSValue cell_rt_make_function(JSContext *ctx, int fn_id);
```
Array and record literals are currently compiled as intrinsic calls (`array(...)`, direct `{...}` construction) which go through the frame/invoke path. A future optimization could add:
```c
// Fast paths (optional, not yet needed)
JSValue cell_rt_new_array(JSContext *ctx, int len);
JSValue cell_rt_new_record(JSContext *ctx);
```
### Tier 5: Collection Operations
```c
// a[] = val (push) and var v = a[] (pop)
void cell_rt_push(JSContext *ctx, JSValue arr, JSValue val);
JSValue cell_rt_pop(JSContext *ctx, JSValue arr);
```
### Tier 6: Error Handling
```c
// Trigger disruption. Jumps to the disrupt handler or unwinds.
void cell_rt_disrupt(JSContext *ctx);
```
### Tier 7: Miscellaneous
```c
JSValue cell_rt_delete(JSContext *ctx, JSValue obj, JSValue key);
JSValue cell_rt_typeof(JSContext *ctx, JSValue val);
```
### Tier 8: String and Float Helpers (called from QBE inline code, not from qbe_emit)
These are called from the QBE IL that `qbe.cm` generates inline for arithmetic and comparison operations. They're not `cell_rt_` prefixed — they're lower-level:
```c
// Float arithmetic (when operands aren't both ints)
JSValue qbe_float_add(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_sub(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_mul(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_div(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_mod(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_pow(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_float_neg(JSContext *ctx, JSValue v);
JSValue qbe_float_inc(JSContext *ctx, JSValue v);
JSValue qbe_float_dec(JSContext *ctx, JSValue v);
// Float comparison (returns C int 0/1 for QBE branching)
int qbe_float_cmp(JSContext *ctx, int op, JSValue a, JSValue b);
// Bitwise ops on non-int values (convert to int32 first)
JSValue qbe_bnot(JSContext *ctx, JSValue v);
JSValue qbe_bitwise_and(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_bitwise_or(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_bitwise_xor(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_shift_shl(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_shift_sar(JSContext *ctx, JSValue a, JSValue b);
JSValue qbe_shift_shr(JSContext *ctx, JSValue a, JSValue b);
// String operations
JSValue JS_ConcatString(JSContext *ctx, JSValue a, JSValue b);
int js_string_compare_value(JSContext *ctx, JSValue a, JSValue b, int eq_only);
JSValue JS_NewString(JSContext *ctx, const char *str);
JSValue __JS_NewFloat64(JSContext *ctx, double d);
int JS_ToBool(JSContext *ctx, JSValue v);
// String/number type tests (inline-able but currently calls)
int JS_IsText(JSValue v);
int JS_IsNumber(JSValue v);
// Tolerant equality (== on mixed types)
JSValue cell_rt_eq_tol(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_ne_tol(JSContext *ctx, JSValue a, JSValue b);
// Text ordering comparisons
JSValue cell_rt_lt_text(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_le_text(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_gt_text(JSContext *ctx, JSValue a, JSValue b);
JSValue cell_rt_ge_text(JSContext *ctx, JSValue a, JSValue b);
```
## What Exists vs What Needs Writing
### Already exists (in qbe_helpers.c)
All `qbe_float_*`, `qbe_bnot`, `qbe_bitwise_*`, `qbe_shift_*`, `qbe_to_bool` — these are implemented and working.
### Already exists (in runtime.c / quickjs.c) but not yet wrapped
The underlying operations exist but aren't exposed with the `cell_rt_` names:
| Runtime function | Underlying implementation |
|---|---|
| `cell_rt_load_field` | `JS_GetPropertyStr(ctx, obj, name)` |
| `cell_rt_load_index` | `JS_GetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx))` |
| `cell_rt_load_dynamic` | `JS_GetProperty(ctx, obj, key)` |
| `cell_rt_store_field` | `JS_SetPropertyStr(ctx, obj, name, val)` |
| `cell_rt_store_index` | `JS_SetPropertyNumber(ctx, obj, JS_VALUE_GET_INT(idx), val)` |
| `cell_rt_store_dynamic` | `JS_SetProperty(ctx, obj, key, val)` |
| `cell_rt_delete` | `JS_DeleteProperty(ctx, obj, key)` |
| `cell_rt_push` | `JS_ArrayPush(ctx, &arr, val)` |
| `cell_rt_pop` | `JS_ArrayPop(ctx, arr)` |
| `cell_rt_typeof` | type tag switch → `JS_NewString` |
| `cell_rt_disrupt` | `JS_Throw(ctx, ...)` |
| `cell_rt_eq_tol` / `cell_rt_ne_tol` | comparison logic in mcode.c `eq_tol`/`ne_tol` handler |
| `cell_rt_lt_text` etc. | `js_string_compare_value` + wrap result |
### Needs new code
| Runtime function | What's needed |
|---|---|
| `cell_rt_get_intrinsic` | Look up intrinsic by name string, return JSValue function. Currently scattered across `js_cell_intrinsic_get` and the mcode handler. Needs a clean single entry point. |
| `cell_rt_frame` | Allocate `JSFrameRegister`, set function slot, set argc. Exists in mcode.c `frame` handler but not as a callable function. |
| `cell_rt_setarg` | Write to frame slot. Trivial: `frame->slots[idx + 1] = val` (slot 0 is `this`). |
| `cell_rt_invoke` | Call the function in the frame. Needs to dispatch: native C function vs mach bytecode vs mcode. This is the critical piece — it must handle all function types. |
| `cell_rt_goframe` / `cell_rt_goinvoke` | Tail call variants. Similar to frame/invoke but reuse caller frame. |
| `cell_rt_make_function` | Create function object from index. Needs a function table (populated by the native loader). |
| `cell_rt_get_closure` / `cell_rt_put_closure` | Walk frame chain. Exists inline in mcode.c `get`/`put` handlers. |
## Recommended C File Organization
```
source/
cell_runtime.c — NEW: all cell_rt_* functions (the native code API)
qbe_helpers.c — existing: float/bitwise/shift helpers for inline QBE
runtime.c — existing: JS_GetProperty, JS_SetProperty, etc.
quickjs.c — existing: core VM, GC, value representation
mcode.c — existing: mcode interpreter (can delegate to cell_runtime.c)
```
**`cell_runtime.c`** is the single file that defines the native code contract. It should:
1. Include `quickjs-internal.h` for access to value representation and heap types
2. Export all `cell_rt_*` functions with C linkage (no `static`)
3. Keep each function thin — delegate to existing `JS_*` functions where possible
4. Handle GC safety: after any allocation (frame, string, array), callers' frames may have moved
### Implementation Priority
**Phase 1** — Get "hello world" running natively:
- `cell_rt_get_intrinsic` (to find `print` and `text`)
- `cell_rt_frame`, `cell_rt_setarg`, `cell_rt_invoke` (to call them)
- A loader that takes QBE output → assembles → links → calls `cell_main`
**Phase 2** — Variables and arithmetic:
- All property access (`load_field`, `load_index`, `store_*`, `load_dynamic`)
- `cell_rt_make_function`, `cell_rt_get_closure`, `cell_rt_put_closure`
**Phase 3** — Full language:
- `cell_rt_push`, `cell_rt_pop`, `cell_rt_delete`, `cell_rt_typeof`
- `cell_rt_disrupt`
- `cell_rt_goframe`, `cell_rt_goinvoke`
- Text comparison wrappers (`cell_rt_lt_text`, etc.)
- Tolerant equality (`cell_rt_eq_tol`, `cell_rt_ne_tol`)
## Calling Convention
All `cell_rt_*` functions follow the same pattern:
- First argument is always `JSContext *ctx`
- Values are passed/returned as `JSValue` (64-bit, by value)
- Frame pointers are `JSValue` (tagged pointer to `JSFrameRegister`)
- String names are `const char *` (pointer to data section label)
- Integer constants (slot indices, arg counts) are `int` / `long`
Native code maintains `%ctx` (JSContext) and `%fp` (current frame pointer) as persistent values across the function body. All slot reads/writes go through `%fp` + offset.
## What Should NOT Be in the C Runtime
These are handled entirely by QBE-generated code:
- **Integer arithmetic and comparisons** — bit operations on NaN-boxed values
- **Control flow** — branches, loops, labels, jumps
- **Boolean logic** — `and`/`or`/`not` on tagged values
- **Constant loading** — integer constants are immediate, strings are data labels
- **Type guard branches** — the `is_int`/`is_text`/`is_null` checks are inline bit tests; the branch to the float or text path is just a QBE `jnz`
The `qbe.cm` macros already handle all of this. The arithmetic path looks like:
```
check both ints? → yes → inline int add → done
→ no → call qbe_float_add (or JS_ConcatString for text)
```
The C runtime is only called on the slow paths (float, text, dynamic dispatch). The fast path (integer arithmetic, comparisons, branching) is fully native.

77
docs/spec/dec64.md Normal file
View File

@@ -0,0 +1,77 @@
---
title: "DEC64 Numbers"
description: "Decimal floating point representation"
---
## Overview
ƿit uses DEC64 as its number format. DEC64 represents numbers as `coefficient * 10^exponent` in a 64-bit word. This eliminates the rounding errors that plague IEEE 754 binary floating point — `0.1 + 0.2` is exactly `0.3`.
DEC64 was designed by Douglas Crockford as a general-purpose number type suitable for both business and scientific computation.
## Format
A DEC64 number is a 64-bit value:
```
[coefficient: 56 bits][exponent: 8 bits]
```
- **Coefficient** — a 56-bit signed integer (two's complement)
- **Exponent** — an 8-bit signed integer (range: -127 to 127)
The value of a DEC64 number is: `coefficient * 10^exponent`
### Examples
| Value | Coefficient | Exponent | Hex |
|-------|------------|----------|-----|
| `0` | 0 | 0 | `0000000000000000` |
| `1` | 1 | 0 | `0000000000000100` |
| `3.14159` | 314159 | -5 | `000000004CB2FFFB` |
| `-1` | -1 | 0 | `FFFFFFFFFFFFFF00` |
| `1000000` | 1 | 6 | `0000000000000106` |
## Special Values
### Null
The exponent `0x80` (-128) indicates null. This is the only special value — there is no infinity, no NaN, no negative zero. Operations that would produce undefined results (such as division by zero) return null.
```
coefficient: any, exponent: 0x80 → null
```
## Arithmetic Properties
- **Exact decimals**: All decimal fractions with up to 17 significant digits are represented exactly
- **No rounding**: `0.1 + 0.2 == 0.3` is true
- **Integer range**: Exact integers up to 2^55 (about 3.6 * 10^16)
- **Normalized on demand**: The runtime normalizes coefficients to remove trailing zeros when needed for comparison
## Comparison with IEEE 754
| Property | DEC64 | IEEE 754 double |
|----------|-------|----------------|
| Decimal fractions | Exact | Approximate |
| Significant digits | ~17 | ~15-16 |
| Special values | null only | NaN, ±Infinity, -0 |
| Rounding errors | None (decimal) | Common |
| Financial arithmetic | Correct | Requires libraries |
| Scientific range | ±10^127 | ±10^308 |
DEC64 trades a smaller exponent range for exact decimal arithmetic. Most applications never need exponents beyond ±127.
## In ƿit
All numbers in ƿit are DEC64. There is no separate integer type at the language level — the distinction is internal. The `is_integer` function checks whether a number has no fractional part.
```javascript
var x = 42 // coefficient: 42, exponent: 0
var y = 3.14 // coefficient: 314, exponent: -2
var z = 1000000 // coefficient: 1, exponent: 6 (normalized)
is_integer(x) // true
is_integer(y) // false
1 / 0 // null
```

82
docs/spec/gc.md Normal file
View File

@@ -0,0 +1,82 @@
---
title: "Garbage Collection"
description: "Cheney copying collector"
---
## Overview
ƿit uses a Cheney copying collector for automatic memory management. Each actor has its own independent heap — actors never share mutable memory, so garbage collection is per-actor with no global pauses.
## Algorithm
The Cheney algorithm is a two-space copying collector:
1. **Allocate new space** — a fresh memory block for the new heap
2. **Copy roots** — copy all live root objects from old space to new space
3. **Scan** — walk the new space, updating all internal references
4. **Free old space** — the entire old heap is freed at once
### Copying and Forwarding
When an object is copied from old space to new space:
1. The object's data is copied to the next free position in new space
2. The old object's header is overwritten with a **forwarding pointer** (`OBJ_FORWARD`) containing the new address
3. Future references to the old address find the forwarding pointer and follow it to the new location
```
Old space: New space:
┌──────────────┐ ┌──────────────┐
│ OBJ_FORWARD ─┼────────> │ copied object│
│ (new addr) │ │ │
└──────────────┘ └──────────────┘
```
### Scan Phase
After roots are copied, the collector scans new space linearly. For each object, it examines every JSValue field:
- If the field points to old space, copy the referenced object (or follow its forwarding pointer if already copied)
- If the field points to stone memory, skip it (stone objects are permanent)
- If the field is an immediate value (integer, boolean, null, immediate string), skip it
The scan continues until the scan pointer catches up with the allocation pointer — at that point, all live objects have been found and copied.
## Roots
The collector traces from these root sources:
- **Global object** — all global variables
- **Class prototypes** — built-in type prototypes
- **Exception** — the current exception value
- **Value stack** — all values on the operand stack
- **Frame stack** — all stack frames (register VM and mcode)
- **GC reference stack** — manually registered roots (via `JS_PUSH_VALUE` / `JS_POP_VALUE`)
- **Parser constant pool** — during compilation, constants being built
## Per-Actor Heaps
Each actor maintains its own heap with independent collection:
- No stop-the-world pauses across actors
- No synchronization between collectors
- Each actor's GC runs at the end of a turn (between message deliveries)
- Heap sizes adapt independently based on each actor's allocation patterns
## Heap Growth
The collector uses a buddy allocator for heap blocks. After each collection, if less than 20% of the heap was recovered, the next block size is doubled. The new space size is: `max(live_estimate + alloc_size, next_block_size)`.
All allocations within a heap block use bump allocation (advance a pointer), which is extremely fast.
## Alignment
All objects are aligned to 8-byte boundaries. Object sizes are rounded up to ensure this alignment, which guarantees that the low 3 bits of any heap pointer are always zero — available for JSValue tag bits.
## Interaction with Stone Memory
Stone memory objects (S bit set) are never copied by the collector. When the scanner encounters a pointer to stone memory, it leaves it unchanged. This means:
- Stone objects are effectively permanent GC roots
- No overhead for tracing through immutable object graphs
- Module return values and interned strings impose zero GC cost

95
docs/spec/mach.md Normal file
View File

@@ -0,0 +1,95 @@
---
title: "Register VM"
description: "Binary encoding of the Mach bytecode interpreter"
---
## Overview
The Mach VM is a register-based virtual machine that directly interprets the [Mcode IR](mcode.md) instruction set as compact 32-bit binary bytecode. It is modeled after Lua's register VM — operands are register indices rather than stack positions, reducing instruction count and improving performance.
The Mach serializer (`mach.c`) converts streamlined mcode JSON into binary instructions. Since the Mach bytecode is a direct encoding of the mcode, the [Mcode IR](mcode.md) reference is the authoritative instruction set documentation.
## Instruction Formats
All instructions are 32 bits wide. Four encoding formats are used:
### iABC — Three-Register
```
[op: 8][A: 8][B: 8][C: 8]
```
Used for operations on three registers: `R(A) = R(B) op R(C)`.
### iABx — Register + Constant
```
[op: 8][A: 8][Bx: 16]
```
Used for loading constants: `R(A) = K(Bx)`.
### iAsBx — Register + Signed Offset
```
[op: 8][A: 8][sBx: 16]
```
Used for conditional jumps: if `R(A)` then jump by `sBx`.
### isJ — Signed Jump
```
[op: 8][sJ: 24]
```
Used for unconditional jumps with a 24-bit signed offset.
## Registers
Each function frame has a fixed number of register slots, determined at compile time:
- **R(0)** — `this` binding
- **R(1)..R(arity)** — function arguments
- **R(arity+1)..** — local variables and temporaries
## JSCodeRegister
The compiled output for a function:
```c
struct JSCodeRegister {
uint16_t arity; // argument count
uint16_t nr_slots; // total register count
uint32_t cpool_count; // constant pool size
JSValue *cpool; // constant pool
uint32_t instr_count; // instruction count
MachInstr32 *instructions; // 32-bit instruction array
uint32_t func_count; // nested function count
JSCodeRegister **functions; // nested function table
JSValue name; // function name
uint16_t disruption_pc; // disruption handler offset
};
```
The constant pool holds all non-immediate values referenced by `LOADK` instructions: strings, large numbers, and other constants.
### Constant Pool Index Overflow
Named property instructions (`LOAD_FIELD`, `STORE_FIELD`, `DELETE`) use the iABC format where the constant pool key index occupies an 8-bit field (max 255). When a function references more than 256 unique property names, the serializer automatically falls back to a two-instruction sequence:
1. `LOADK tmp, key_index` — load the key string into a temporary register (iABx, 16-bit index)
2. `LOAD_DYNAMIC` / `STORE_DYNAMIC` / `DELETEINDEX` — use the register-based variant
This is transparent to the mcode compiler and streamline optimizer.
## Arithmetic Dispatch
Arithmetic ops (ADD, SUB, MUL, DIV, MOD, POW) are executed inline without calling the polymorphic `reg_vm_binop()` helper. Since mcode's type guard dispatch guarantees both operands are numbers:
1. **Int-int fast path**: `JS_VALUE_IS_BOTH_INT` → native integer arithmetic with int32 overflow check. Overflow promotes to float64.
2. **Float fallback**: `JS_ToFloat64` → native floating-point operation. Non-finite results produce null.
DIV and MOD check for zero divisor (→ null). POW uses `pow()` with non-finite handling for finite inputs.
Comparison ops (EQ through GE) and bitwise ops still use `reg_vm_binop()` for their slow paths, as they handle a wider range of type combinations (string comparisons, null equality, etc.).

374
docs/spec/mcode.md Normal file
View File

@@ -0,0 +1,374 @@
---
title: "Mcode IR"
description: "Instruction set reference for the JSON-based intermediate representation"
---
## Overview
Mcode is the intermediate representation at the center of the ƿit compilation pipeline. All source code is lowered to mcode before execution or native compilation. The mcode instruction set is the **authoritative reference** for the operations supported by the ƿit runtime — the Mach VM bytecode is a direct binary encoding of these same instructions.
```
Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine
```
Mcode is produced by `mcode.cm`, optimized by `streamline.cm`, then either serialized to 32-bit bytecode for the Mach VM (`mach.c`), or lowered to QBE/LLVM IL for native compilation (`qbe_emit.cm`). See [Compilation Pipeline](pipeline.md) for the full overview.
## Module Structure
An `.mcode` file is a JSON object representing a compiled module:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Module name (typically the source filename) |
| `filename` | string | Source filename |
| `data` | object | Constant pool — string and number literals used by instructions |
| `main` | function | The top-level function (module body) |
| `functions` | array | Nested function definitions (referenced by `function dest, id`) |
### Function Record
Each function (both `main` and entries in `functions`) has:
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Function name (`"<anonymous>"` for lambdas) |
| `filename` | string | Source filename |
| `nr_args` | integer | Number of parameters |
| `nr_slots` | integer | Total register slots needed (args + locals + temporaries) |
| `nr_close_slots` | integer | Number of closure slots captured from parent scope |
| `disruption_pc` | integer | Instruction index of the disruption handler (0 if none) |
| `instructions` | array | Instruction arrays and label strings |
Slot 0 is reserved. Slots 1 through `nr_args` hold parameters. Remaining slots up to `nr_slots - 1` are locals and temporaries.
## Instruction Format
Each instruction is a JSON array. The first element is the instruction name (string), followed by operands. The last two elements are line and column numbers for source mapping:
```json
["add_int", dest, a, b, line, col]
["load_field", dest, obj, "key", line, col]
["jump", "label_name"]
```
Operands are register slot numbers (integers), constant values (strings, numbers), or label names (strings).
## Instruction Reference
### Loading and Constants
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `access` | `dest, name` | Load variable by name (intrinsic or environment) |
| `int` | `dest, value` | Load integer constant |
| `true` | `dest` | Load boolean `true` |
| `false` | `dest` | Load boolean `false` |
| `null` | `dest` | Load `null` |
| `move` | `dest, src` | Copy register value |
| `function` | `dest, id` | Load nested function by index |
| `regexp` | `dest, pattern` | Create regexp object |
### Arithmetic — Integer
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `add_int` | `dest, a, b` | `dest = a + b` (integer) |
| `sub_int` | `dest, a, b` | `dest = a - b` (integer) |
| `mul_int` | `dest, a, b` | `dest = a * b` (integer) |
| `div_int` | `dest, a, b` | `dest = a / b` (integer) |
| `mod_int` | `dest, a, b` | `dest = a % b` (integer) |
| `neg_int` | `dest, src` | `dest = -src` (integer) |
### Arithmetic — Float
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `add_float` | `dest, a, b` | `dest = a + b` (float) |
| `sub_float` | `dest, a, b` | `dest = a - b` (float) |
| `mul_float` | `dest, a, b` | `dest = a * b` (float) |
| `div_float` | `dest, a, b` | `dest = a / b` (float) |
| `mod_float` | `dest, a, b` | `dest = a % b` (float) |
| `neg_float` | `dest, src` | `dest = -src` (float) |
### Arithmetic — Generic
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `pow` | `dest, a, b` | `dest = a ^ b` (exponentiation) |
### Text
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `concat` | `dest, a, b` | `dest = a ~ b` (text concatenation) |
### Comparison — Integer
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_int` | `dest, a, b` | `dest = a == b` (integer) |
| `ne_int` | `dest, a, b` | `dest = a != b` (integer) |
| `lt_int` | `dest, a, b` | `dest = a < b` (integer) |
| `le_int` | `dest, a, b` | `dest = a <= b` (integer) |
| `gt_int` | `dest, a, b` | `dest = a > b` (integer) |
| `ge_int` | `dest, a, b` | `dest = a >= b` (integer) |
### Comparison — Float
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_float` | `dest, a, b` | `dest = a == b` (float) |
| `ne_float` | `dest, a, b` | `dest = a != b` (float) |
| `lt_float` | `dest, a, b` | `dest = a < b` (float) |
| `le_float` | `dest, a, b` | `dest = a <= b` (float) |
| `gt_float` | `dest, a, b` | `dest = a > b` (float) |
| `ge_float` | `dest, a, b` | `dest = a >= b` (float) |
### Comparison — Text
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_text` | `dest, a, b` | `dest = a == b` (text) |
| `ne_text` | `dest, a, b` | `dest = a != b` (text) |
| `lt_text` | `dest, a, b` | `dest = a < b` (lexicographic) |
| `le_text` | `dest, a, b` | `dest = a <= b` (lexicographic) |
| `gt_text` | `dest, a, b` | `dest = a > b` (lexicographic) |
| `ge_text` | `dest, a, b` | `dest = a >= b` (lexicographic) |
### Comparison — Boolean
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `eq_bool` | `dest, a, b` | `dest = a == b` (boolean) |
| `ne_bool` | `dest, a, b` | `dest = a != b` (boolean) |
### Comparison — Special
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `is_identical` | `dest, a, b` | Object identity check (same reference) |
| `eq_tol` | `dest, a, b` | Equality with tolerance |
| `ne_tol` | `dest, a, b` | Inequality with tolerance |
### Type Checks
Inlined from intrinsic function calls. Each sets `dest` to `true` or `false`.
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `is_int` | `dest, src` | Check if integer |
| `is_num` | `dest, src` | Check if number (integer or float) |
| `is_text` | `dest, src` | Check if text |
| `is_bool` | `dest, src` | Check if logical |
| `is_null` | `dest, src` | Check if null |
| `is_array` | `dest, src` | Check if array |
| `is_func` | `dest, src` | Check if function |
| `is_record` | `dest, src` | Check if record (object) |
| `is_stone` | `dest, src` | Check if stone (immutable) |
| `is_proxy` | `dest, src` | Check if function proxy (arity 2) |
### Logical
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `not` | `dest, src` | Logical NOT |
| `and` | `dest, a, b` | Logical AND |
| `or` | `dest, a, b` | Logical OR |
### Bitwise
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `bitand` | `dest, a, b` | Bitwise AND |
| `bitor` | `dest, a, b` | Bitwise OR |
| `bitxor` | `dest, a, b` | Bitwise XOR |
| `bitnot` | `dest, src` | Bitwise NOT |
| `shl` | `dest, a, b` | Shift left |
| `shr` | `dest, a, b` | Arithmetic shift right |
| `ushr` | `dest, a, b` | Unsigned shift right |
### Property Access
Memory operations come in typed variants. The compiler selects the appropriate variant based on `type_tag` and `access_kind` annotations from parse and fold.
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `load_field` | `dest, obj, key` | Load record property by string key |
| `store_field` | `obj, val, key` | Store record property by string key |
| `load_index` | `dest, obj, idx` | Load array element by integer index |
| `store_index` | `obj, val, idx` | Store array element by integer index |
| `load_dynamic` | `dest, obj, key` | Load property (dispatches at runtime) |
| `store_dynamic` | `obj, val, key` | Store property (dispatches at runtime) |
| `delete` | `obj, key` | Delete property |
| `in` | `dest, obj, key` | Check if property exists |
| `length` | `dest, src` | Get length of array or text |
### Object and Array Construction
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `record` | `dest` | Create empty record `{}` |
| `array` | `dest, n` | Create empty array (elements added via `push`) |
| `push` | `arr, val` | Push value to array |
| `pop` | `dest, arr` | Pop value from array |
### Function Calls
Function calls are decomposed into three instructions:
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `frame` | `dest, fn, argc` | Allocate call frame for `fn` with `argc` arguments |
| `setarg` | `frame, idx, val` | Set argument `idx` in call frame |
| `invoke` | `frame, result` | Execute the call, store result |
| `goframe` | `dest, fn, argc` | Allocate frame for async/concurrent call |
| `goinvoke` | `frame, result` | Invoke async/concurrent call |
### Variable Resolution
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `access` | `dest, name` | Load variable (intrinsic or module environment) |
| `set_var` | `name, src` | Set top-level variable by name |
| `get` | `dest, level, slot` | Get closure variable from parent scope |
| `put` | `level, slot, src` | Set closure variable in parent scope |
### Control Flow
| Instruction | Operands | Description |
|-------------|----------|-------------|
| `LABEL` | `name` | Define a named label (not executed) |
| `jump` | `label` | Unconditional jump |
| `jump_true` | `cond, label` | Jump if `cond` is true |
| `jump_false` | `cond, label` | Jump if `cond` is false |
| `jump_not_null` | `val, label` | Jump if `val` is not null |
| `return` | `src` | Return value from function |
| `disrupt` | — | Trigger disruption (error) |
## Typed Instruction Design
A key design principle of mcode is that **every type check is an explicit instruction**. Arithmetic and comparison operations come in type-specialized variants (`add_int`, `add_float`, `eq_text`, etc.) rather than a single polymorphic instruction.
When type information is available from the fold stage, the compiler emits the typed variant directly. When the type is unknown, the compiler emits a type-check/dispatch pattern:
```json
["is_int", check, a]
["jump_false", check, "float_path"]
["add_int", dest, a, b]
["jump", "done"]
["LABEL", "float_path"]
["add_float", dest, a, b]
["LABEL", "done"]
```
The [Streamline Optimizer](streamline.md) eliminates dead branches when types are statically known, collapsing the dispatch to a single typed instruction.
## Intrinsic Inlining
The mcode compiler recognizes calls to built-in intrinsic functions and emits direct opcodes instead of the generic frame/setarg/invoke call sequence:
| Source call | Emitted instruction |
|-------------|-------------------|
| `is_array(x)` | `is_array dest, src` |
| `is_function(x)` | `is_func dest, src` |
| `is_object(x)` | `is_record dest, src` |
| `is_stone(x)` | `is_stone dest, src` |
| `is_integer(x)` | `is_int dest, src` |
| `is_text(x)` | `is_text dest, src` |
| `is_number(x)` | `is_num dest, src` |
| `is_logical(x)` | `is_bool dest, src` |
| `is_null(x)` | `is_null dest, src` |
| `length(x)` | `length dest, src` |
| `push(arr, val)` | `push arr, val` |
## Function Proxy Decomposition
When the compiler encounters a method call `obj.method(args)`, it emits a branching pattern to handle ƿit's function proxy protocol. An arity-2 function used as a proxy target receives the method name and argument array instead of a normal method call:
```json
["is_proxy", check, obj]
["jump_false", check, "record_path"]
["access", name_slot, "method"]
["array", args_arr, N, arg0, arg1]
["null", null_slot]
["frame", f, obj, 2]
["setarg", f, 0, null_slot]
["setarg", f, 1, name_slot]
["setarg", f, 2, args_arr]
["invoke", f, dest]
["jump", "done"]
["LABEL", "record_path"]
["load_field", method, obj, "method"]
["frame", f2, method, N]
["setarg", f2, 0, obj]
["setarg", f2, 1, arg0]
["invoke", f2, dest]
["LABEL", "done"]
```
## Labels and Control Flow
Control flow uses named labels instead of numeric offsets:
```json
["LABEL", "loop_start"]
["add_int", 1, 1, 2]
["jump_false", 3, "loop_end"]
["jump", "loop_start"]
["LABEL", "loop_end"]
```
Labels are collected into a name-to-index map during loading, enabling O(1) jump resolution. The Mach serializer converts label names to numeric offsets in the binary bytecode.
## Nop Convention
The streamline optimizer replaces eliminated instructions with nop strings (e.g., `_nop_tc_1`, `_nop_bl_2`). Nop strings are skipped during interpretation and native code emission but preserved in the instruction array to maintain positional stability for jump targets.
## Internal Structures
### JSMCode (Mcode Interpreter)
```c
struct JSMCode {
uint16_t nr_args; // argument count
uint16_t nr_slots; // register count
cJSON **instrs; // instruction array
uint32_t instr_count; // number of instructions
struct {
const char *name; // label name
uint32_t index; // instruction index
} *labels;
uint32_t label_count;
struct JSMCode **functions; // nested functions
uint32_t func_count;
cJSON *json_root; // keeps JSON alive
const char *name; // function name
const char *filename; // source file
uint16_t disruption_pc; // disruption handler offset
};
```
### JSCodeRegister (Mach VM Bytecode)
```c
struct JSCodeRegister {
uint16_t arity; // argument count
uint16_t nr_slots; // total register count
uint32_t cpool_count; // constant pool size
JSValue *cpool; // constant pool
uint32_t instr_count; // instruction count
MachInstr32 *instructions; // 32-bit instruction array
uint32_t func_count; // nested function count
JSCodeRegister **functions; // nested function table
JSValue name; // function name
uint16_t disruption_pc; // disruption handler offset
};
```
The Mach serializer (`mach.c`) converts the JSON mcode into compact 32-bit instructions with a constant pool. See [Register VM](mach.md) for the binary encoding formats.

143
docs/spec/objects.md Normal file
View File

@@ -0,0 +1,143 @@
---
title: "Object Types"
description: "Heap object header format and types"
---
## Object Header
Every heap-allocated object begins with a 64-bit header word (`objhdr_t`):
```
[capacity: 56 bits][flags: 5 bits][type: 3 bits]
```
### Type Field (bits 0-2)
| Value | Type | Description |
|-------|------|-------------|
| 0 | `OBJ_ARRAY` | Dynamic array of JSValues |
| 1 | `OBJ_BLOB` | Binary data (bits) |
| 2 | `OBJ_TEXT` | Unicode text string |
| 3 | `OBJ_RECORD` | Key-value object with prototype chain |
| 4 | `OBJ_FUNCTION` | Function (C, register, or mcode) |
| 5 | `OBJ_CODE` | Compiled code |
| 6 | `OBJ_FRAME` | Stack frame for closures |
| 7 | `OBJ_FORWARD` | Forwarding pointer (GC) |
### Flags (bits 3-7)
- **Bit 3 (S)** — Stone flag. If set, the object is immutable. Stone text in the constant table (ct) is not copied by GC since it lives outside the heap; stone objects on the GC heap are copied normally.
- **Bit 4 (P)** — Properties flag.
- **Bit 5 (A)** — Array flag.
- **Bit 7 (R)** — Reserved.
### Capacity (bits 8-63)
The interpretation of the 56-bit capacity field depends on the object type.
## Array
```c
struct JSArray {
objhdr_t header; // type=0, capacity=element slots
word_t len; // current number of elements
JSValue values[]; // inline flexible array
};
```
Capacity is the number of JSValue slots allocated. Length is the number currently in use. Arrays grow by reallocating with a larger capacity.
## Blob
```c
struct JSBlob {
objhdr_t header; // type=1, capacity=allocated bits
word_t length; // length in bits
uint8_t bits[]; // bit-packed data
};
```
Blobs are bit-addressable. The length field tracks the exact number of bits written. A blob starts as antestone (mutable) for writing, then becomes stone (immutable) for reading.
## Text
```c
struct JSText {
objhdr_t header; // type=2, capacity=character slots
word_t length; // length in codepoints (or hash if stoned)
word_t packed[]; // two UTF-32 chars per 64-bit word
};
```
Text is stored as UTF-32, with two 32-bit codepoints packed per 64-bit word.
A mutable text (pretext) uses capacity for the allocated slot count and length for the current codepoint count. When a pretext is stoned, the capacity field is set to the actual length (codepoint count), and the length field is zeroed for use as a lazy hash cache (computed via `fash64` on first use as a key). Since stoned text is immutable, the hash never changes. Stoning is done in-place — no new allocation is needed.
## Record
```c
struct JSRecord {
objhdr_t header; // type=3, capacity=hash table slots
JSRecord *proto; // prototype chain pointer
word_t len; // number of entries
slot slots[]; // key-value pairs (hash table)
};
```
Records use a hash table with linear probing. Slot 0 is reserved for internal metadata (class ID and record ID). Empty slots use `JS_NULL` as the key; deleted slots use `JS_EXCEPTION` as a tombstone.
The prototype chain is a linked list of JSRecord pointers, traversed during property lookup.
## Function
```c
struct JSFunction {
objhdr_t header; // type=4
JSValue name; // function name
int16_t length; // arity (-1 for variadic)
uint8_t kind; // C, register, or mcode
union {
struct { ... } cfunc; // C function pointer
struct { ... } regvm; // register VM code
struct { ... } mcode; // mcode IR
} u;
};
```
The kind field selects which union variant is active. Functions can be implemented in C (native), register code (mach VM), or mcode (JSON interpreter).
## Frame
```c
struct JSFrame {
objhdr_t header; // type=6, capacity=slot count
JSValue function; // owning function
JSValue caller; // parent frame
JSValue address; // return address
JSValue slots[]; // [this][args][captured][locals][temps]
};
```
Frames capture the execution context for closures. The slots array contains the function's `this` binding, arguments, captured upvalues, local variables, and temporaries. Frames are linked via the caller field for upvalue resolution across closure depth.
## Forwarding Pointer
```
[pointer: 61 bits][111]
```
During garbage collection, when an object is copied to the new heap, the old header is replaced with a forwarding pointer to the new location. This is type 7 (`OBJ_FORWARD`) and stores the new address in bits 3-63. See [Garbage Collection](#gc) for details.
## Object Sizing
All objects are aligned to 8 bytes. The total size in bytes for each type:
| Type | Size |
|------|------|
| Array | `8 + 8 + capacity * 8` |
| Blob | `8 + 8 + ceil(capacity / 8)` |
| Text | `8 + 8 + ceil(capacity / 2) * 8` |
| Record | `8 + 8 + 8 + (capacity + 1) * 16` |
| Function | `sizeof(JSFunction)` (fixed) |
| Code | `sizeof(JSFunctionBytecode)` (fixed) |
| Frame | `8 + 8 + 8 + 8 + capacity * 8` |

128
docs/spec/pipeline.md Normal file
View File

@@ -0,0 +1,128 @@
---
title: "Compilation Pipeline"
description: "Overview of the compilation stages and optimizations"
---
## Overview
The compilation pipeline transforms source code through several stages, each adding information or lowering the representation toward execution. All backends share the same path through mcode and streamline.
```
Source → Tokenize → Parse → Fold → Mcode → Streamline → Machine
```
The final **machine** stage has two targets:
- **Mach VM** — a register-based bytecode interpreter that directly executes the mcode instruction set as compact 32-bit binary
- **Native code** — lowers mcode to QBE or LLVM intermediate language, then compiles to machine code for the target CPU architecture
## Stages
### Tokenize (`tokenize.cm`)
Splits source text into tokens. Handles string interpolation by re-tokenizing template literal contents. Produces a token array with position information (line, column).
### Parse (`parse.cm`)
Converts tokens into an AST. Also performs semantic analysis:
- **Scope records**: For each scope (global, function), builds a record mapping variable names to their metadata: `make` (var/def/function/input), `function_nr`, `nr_uses`, `closure` flag, and `level`.
- **Type tags**: When the right-hand side of a `def` is a syntactically obvious type, stamps `type_tag` on the scope record entry. Derivable types: `"integer"`, `"number"`, `"text"`, `"array"`, `"record"`, `"function"`, `"logical"`. For `def` variables, type tags are also inferred from usage patterns: push (`x[] = v`) implies array, property access (`x.foo = v`) implies record, integer key implies array, text key implies record.
- **Type error detection**: For `def` variables with known type tags, provably wrong operations are reported as compile errors: property access on arrays, push on non-arrays, text keys on arrays, integer keys on records. Only `def` variables are checked because `var` can be reassigned.
- **Intrinsic resolution**: Names used but not locally bound are recorded in `ast.intrinsics`. Name nodes referencing intrinsics get `intrinsic: true`.
- **Access kind**: Subscript (`[`) nodes get `access_kind`: `"index"` for numeric subscripts, `"field"` for string subscripts, omitted otherwise.
- **Tail position**: Return statements where the expression is a call get `tail: true`.
### Fold (`fold.cm`)
Operates on the AST. Performs constant folding and type analysis:
- **Constant folding**: Evaluates arithmetic on known constants at compile time (e.g., `5 + 10` becomes `15`).
- **Constant propagation**: Tracks `def` bindings whose values are known constants.
- **Type propagation**: Extends `type_tag` through operations. When both operands of an arithmetic op have known types, the result type is known. Propagates type tags to reference sites.
- **Intrinsic specialization**: When an intrinsic call's argument types are known, stamps a `hint` on the call node. For example, `length(x)` where x is a known array gets `hint: "array_length"`. Type checks like `is_array(known_array)` are folded to `true`.
- **Purity analysis**: Expressions with no side effects are marked pure (literals, name references, arithmetic on pure operands, calls to pure intrinsics). The pure intrinsic set contains only `is_*` sensory functions — they are the only intrinsics guaranteed to never disrupt regardless of argument types. Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types and are excluded.
- **Dead code elimination**: Removes unreachable branches when conditions are known constants. Removes unused `var`/`def` declarations with pure initializers. Removes standalone calls to pure intrinsics where the result is discarded.
### Mcode (`mcode.cm`)
Lowers the AST to a JSON-based intermediate representation with explicit operations. Key design principle: **every type check is an explicit instruction** so downstream optimizers can see and eliminate them.
- **Typed load/store**: Emits `load_index` (array by integer), `load_field` (record by string), or `load_dynamic` (unknown) based on type information from fold.
- **Decomposed calls**: Function calls are split into `frame` (create call frame) + `setarg` (set arguments) + `invoke` (execute call).
- **Intrinsic access**: Intrinsic functions are loaded via `access` with an intrinsic marker rather than global lookup.
- **Intrinsic inlining**: Type-check intrinsics (`is_array`, `is_text`, `is_number`, `is_integer`, `is_logical`, `is_null`, `is_function`, `is_object`, `is_stone`), `length`, and `push` are emitted as direct opcodes instead of frame/setarg/invoke call sequences.
- **Disruption handler labels**: When a function has a disruption handler, a label is emitted before the handler code. This allows the streamline optimizer's unreachable code elimination to safely nop dead code after `return` without accidentally eliminating the handler.
- **Tail call marking**: When a return statement's expression is a call and the function has no disruption handler, the final `invoke` is renamed to `tail_invoke`. This marks the call site for future tail call optimization. Functions with disruption handlers cannot use TCO because the handler frame must remain on the stack.
See [Mcode IR](mcode.md) for the instruction format and complete instruction reference.
### Streamline (`streamline.cm`)
Optimizes the Mcode IR through a series of independent passes. Operates per-function:
1. **Backward type inference**: Infers parameter types from how they are used in typed operators (`add_int`, `store_index`, `load_field`, `push`, `pop`, etc.). Immutable `def` parameters keep their inferred type across label join points.
2. **Type-check elimination**: When a slot's type is known, eliminates `is_<type>` + conditional jump pairs. Narrows `load_dynamic`/`store_dynamic` to typed variants.
3. **Algebraic simplification**: Rewrites identity operations (add 0, multiply 1, divide 1) and folds same-slot comparisons.
4. **Boolean simplification**: Fuses `not` + conditional jump into a single jump with inverted condition.
5. **Move elimination**: Removes self-moves (`move a, a`).
6. **Unreachable elimination**: Nops dead code after `return` until the next label.
7. **Dead jump elimination**: Removes jumps to the immediately following label.
See [Streamline Optimizer](streamline.md) for detailed pass descriptions.
### Machine
The streamlined mcode is lowered to a machine target for execution.
#### Mach VM (default)
The Mach VM is a register-based virtual machine that directly interprets the mcode instruction set as 32-bit binary bytecode. The Mach serializer (`mach.c`) converts streamlined mcode JSON into compact 32-bit instructions with a constant pool. Since the mach bytecode is a direct encoding of the mcode, the [Mcode IR](mcode.md) reference serves as the authoritative instruction set documentation.
```
pit script.ce
```
#### Native Code (QBE / LLVM)
Lowers the streamlined mcode to QBE or LLVM intermediate language for compilation to native machine code. Each mcode function becomes a native function that calls into the ƿit runtime (`cell_rt_*` functions) for operations that require the runtime (allocation, intrinsic dispatch, etc.).
String constants are interned in a data section. Integer constants are encoded inline.
```
pit --emit-qbe script.ce > output.ssa
```
## Files
| File | Role |
|------|------|
| `tokenize.cm` | Lexer |
| `parse.cm` | Parser + semantic analysis |
| `fold.cm` | Constant folding + type analysis |
| `mcode.cm` | AST → Mcode IR lowering |
| `streamline.cm` | Mcode IR optimizer |
| `qbe_emit.cm` | Mcode IR → QBE IL emitter |
| `qbe.cm` | QBE IL operation templates |
| `internal/bootstrap.cm` | Cache seeder (cold start only) |
| `internal/engine.cm` | Self-sufficient pipeline loader and orchestrator |
## Debug Tools
| File | Purpose |
|------|---------|
| `dump_mcode.cm` | Print raw Mcode IR before streamlining |
| `dump_stream.cm` | Print IR after streamlining with before/after stats |
| `dump_types.cm` | Print streamlined IR with type annotations |
## Test Files
| File | Tests |
|------|-------|
| `parse_test.ce` | Type tags, access_kind, intrinsic resolution |
| `fold_test.ce` | Type propagation, purity, intrinsic hints |
| `mcode_test.ce` | Typed load/store, decomposed calls |
| `streamline_test.ce` | Optimization counts, IR before/after |
| `qbe_test.ce` | End-to-end QBE IL generation |
| `test_intrinsics.cm` | Inlined intrinsic opcodes (is_array, length, push, etc.) |
| `test_backward.cm` | Backward type propagation for parameters |

82
docs/spec/stone.md Normal file
View File

@@ -0,0 +1,82 @@
---
title: "Stone Memory"
description: "Immutable arena allocation"
---
## Overview
Stone memory is a separate allocation arena for immutable values. Objects in stone memory are permanent — they are never moved, never freed, and never touched by the garbage collector.
The `stone()` function in ƿit petrifies a value, deeply freezing it and all its descendants. Stoned objects have the S bit set in their object header.
## The Stone Arena
Stone memory uses bump allocation from a contiguous arena:
```
stone_base ──────── stone_free ──────── stone_end
[allocated objects] [free space ]
```
Allocation advances `stone_free` forward. When the arena is exhausted, overflow pages are allocated via the system allocator and linked together:
```c
struct StonePage {
struct StonePage *next;
size_t size;
uint8_t data[];
};
```
## The S Bit
Bit 3 of the object header is the stone flag. When set:
- The object is **immutable** — writes disrupt
- The object is **excluded from GC** — the collector skips it entirely
- For text objects, the length field caches the **hash** instead of the character count (since the text cannot change, the hash is computed once and reused)
## What Gets Stoned
When `stone(value)` is called:
1. If the value is already stone, return immediately
2. Recursively walk all nested values (array elements, record fields, etc.)
3. Copy each mutable object into the stone arena
4. Set the S bit on each copied object
5. Return the stoned value
The operation is deep — an entire object graph becomes permanently immutable.
## Text Interning
The stone arena maintains a hash table for text interning. When a text value is stoned, it is looked up in the intern table. If an identical string already exists in stone memory, the existing one is reused. This deduplicates strings and makes equality comparison O(1) for stoned text.
The hash is computed with `fash64` over the packed UTF-32 words.
## Usage Patterns
### Module Return Values
Every module's return value is automatically stoned:
```javascript
// config.cm
return {
debug: true,
timeout: 30
}
// The returned object is stone — shared safely between actors
```
### Message Passing
Messages between actors are stoned before delivery, ensuring actors never share mutable state.
### Constants
Literal objects and arrays that can be determined at compile time may be allocated directly in stone memory.
## Relationship to GC
The Cheney copying collector only operates on the mutable heap. During collection, when the collector encounters a pointer to stone memory (S bit set), it skips it — stone objects are roots that never move. This means stone memory acts as a permanent root set with zero GC overhead.

410
docs/spec/streamline.md Normal file
View File

@@ -0,0 +1,410 @@
---
title: "Streamline Optimizer"
description: "Mcode IR optimization passes"
---
## Overview
The streamline optimizer (`streamline.cm`) runs a series of independent passes over the Mcode IR to eliminate redundant operations. Each pass is a standalone function that can be enabled, disabled, or reordered. Passes communicate only through the instruction array they mutate in place, replacing eliminated instructions with nop strings (e.g., `_nop_tc_1`).
The optimizer runs after `mcode.cm` generates the IR and before the result is lowered to the Mach VM or emitted as QBE IL.
```
Fold (AST) → Mcode (JSON IR) → Streamline → Mach VM / QBE
```
## Type Lattice
The optimizer tracks a type for each slot in the register file:
| Type | Meaning |
|------|---------|
| `unknown` | No type information |
| `int` | Integer |
| `float` | Floating-point |
| `num` | Number (subsumes int and float) |
| `text` | String |
| `bool` | Logical (true/false) |
| `null` | Null value |
| `array` | Array |
| `record` | Record (object) |
| `function` | Function |
| `blob` | Binary blob |
Subsumption: `int` and `float` both satisfy a `num` check.
## Passes
### 1. infer_param_types (backward type inference)
Scans typed operators and generic arithmetic to determine what types their operands must be. For example, `subtract dest, a, b` implies both `a` and `b` are numbers.
When a parameter slot (1..nr_args) is consistently inferred as a single type, that type is recorded. Since parameters are immutable (`def`), the inferred type holds for the entire function and persists across label join points (loop headers, branch targets).
Backward inference rules:
| Operator class | Operand type inferred |
|---|---|
| `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate` | T_NUM |
| bitwise ops (`bitand`, `bitor`, `bitxor`, `shl`, `shr`, `ushr`, `bitnot`) | T_INT |
| `concat` | T_TEXT |
| `not`, `and`, `or` | T_BOOL |
| `store_index` (object operand) | T_ARRAY |
| `store_index` (index operand) | T_INT |
| `store_field` (object operand) | T_RECORD |
| `push` (array operand) | T_ARRAY |
| `load_index` (object operand) | T_ARRAY |
| `load_index` (index operand) | T_INT |
| `load_field` (object operand) | T_RECORD |
| `pop` (array operand) | T_ARRAY |
Typed comparison operators (`eq_int`, `lt_float`, `lt_text`, etc.) and typed boolean comparisons (`eq_bool`, `ne_bool`) are excluded from backward inference. These ops always appear inside guard dispatch patterns (`is_type` + `jump_false` + typed_op), where mutually exclusive branches use the same slot with different types. Including them would merge conflicting types (e.g., T_INT from `lt_int` + T_FLOAT from `lt_float` + T_TEXT from `lt_text`) into T_UNKNOWN, losing all type information. Only unconditionally executed ops contribute to backward inference.
Note: `add` infers T_NUM even though it is polymorphic (numeric addition or text concatenation). When `add` appears in the IR, both operands have already passed a `is_num` guard, so they are guaranteed to be numeric. The text concatenation path uses `concat` instead.
When a slot appears with conflicting type inferences, the merge widens: INT + FLOAT → NUM, INT + NUM → NUM, FLOAT + NUM → NUM. Incompatible types (e.g., NUM + TEXT) produce `unknown`.
**Nop prefix:** none (analysis only, does not modify instructions)
### 2. infer_slot_write_types (slot write-type invariance)
Scans all instructions to determine which non-parameter slots have a consistent write type. If every instruction that writes to a given slot produces the same type, that type is globally invariant and can safely persist across label join points.
This analysis is sound because:
- `alloc_slot()` in mcode.cm is monotonically increasing — temp slots are never reused
- All local variable declarations must be at function body level and initialized — slots are written before any backward jumps to loop headers
- `move` is conservatively treated as T_UNKNOWN, avoiding unsound transitive assumptions
Write type mapping:
| Instruction class | Write type |
|---|---|
| `int` | T_INT |
| `true`, `false` | T_BOOL |
| `null` | T_NULL |
| `access` | type of literal value |
| `array` | T_ARRAY |
| `record` | T_RECORD |
| `function` | T_FUNCTION |
| `length` | T_INT |
| bitwise ops | T_INT |
| `concat` | T_TEXT |
| `negate` | T_NUM |
| `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow` | T_NUM |
| bool ops, comparisons, `in` | T_BOOL |
| `move`, `load_field`, `load_index`, `load_dynamic`, `pop`, `get` | T_UNKNOWN |
| `invoke`, `tail_invoke` | T_UNKNOWN |
The result is a map of slot→type for slots where all writes agree on a single known type. Parameter slots (1..nr_args) and slot 0 are excluded.
Common patterns this enables:
- **Length variables** (`var len = length(arr)`): written by `length` (T_INT) only → invariant T_INT
- **Boolean flags** (`var found = false; ... found = true`): written by `false` and `true` → invariant T_BOOL
- **Locally-created containers** (`var arr = []`): written by `array` only → invariant T_ARRAY
- **Numeric accumulators** (`var sum = 0; sum = sum - x`): written by `access 0` (T_INT) and `subtract` (T_NUM) → merges to T_NUM
Note: Loop counters using `+` (`var i = 0; i = i + 1`) may not achieve write-type invariance because the `+` operator emits a guard dispatch with both `concat` (T_TEXT) and `add` (T_NUM) paths writing to the same temp slot, producing T_UNKNOWN. However, when one operand is a known number literal, `mcode.cm` emits a numeric-only path (see "Known-Number Add Shortcut" below), avoiding the text dispatch. Other arithmetic ops (`-`, `*`, `/`, `%`, `**`) always emit a single numeric write path and work cleanly with write-type analysis.
**Nop prefix:** none (analysis only, does not modify instructions)
### 3. eliminate_type_checks (type-check + jump elimination)
Forward pass that tracks the known type of each slot. When a type check (`is_int`, `is_text`, `is_num`, etc.) is followed by a conditional jump, and the slot's type is already known, the check and jump can be eliminated or converted to an unconditional jump.
Five cases:
- **Known match** (e.g., `is_int` on a slot known to be `int`): both the check and the conditional jump are eliminated (nop'd).
- **Subsumption match** (e.g., `is_num` on a slot known to be `int` or `float`): since `int` and `float` are subtypes of `num`, both the check and jump are eliminated.
- **Subsumption partial** (e.g., `is_int` on a slot known to be `num`): the `num` type could be `int` or `float`, so the check must remain. On fallthrough, the slot narrows to the checked subtype (`int`). This is NOT a mismatch — `num` values can pass an `is_int` check.
- **Known mismatch** (e.g., `is_text` on a slot known to be `int`): the check is nop'd and the conditional jump is rewritten to an unconditional `jump`.
- **Unknown**: the check remains, but on fallthrough, the slot's type is narrowed to the checked type (enabling downstream eliminations).
This pass also reduces `load_dynamic`/`store_dynamic` to `load_field`/`store_field` or `load_index`/`store_index` when the key slot's type is known.
At label join points, all type information is reset except for parameter types from backward inference and write-invariant types from slot write-type analysis.
**Nop prefix:** `_nop_tc_`
### 4. simplify_algebra (same-slot comparison folding)
Tracks known constant values. Folds same-slot comparisons:
| Pattern | Rewrite |
|---------|---------|
| `eq_* dest, x, x` | `true dest` |
| `le_* dest, x, x` | `true dest` |
| `ge_* dest, x, x` | `true dest` |
| `is_identical dest, x, x` | `true dest` |
| `ne_* dest, x, x` | `false dest` |
| `lt_* dest, x, x` | `false dest` |
| `gt_* dest, x, x` | `false dest` |
**Nop prefix:** none (rewrites in place, does not create nops)
### 5. simplify_booleans (not + jump fusion)
Peephole pass that eliminates unnecessary `not` instructions:
| Pattern | Rewrite |
|---------|---------|
| `not d, x; jump_false d, L` | nop; `jump_true x, L` |
| `not d, x; jump_true d, L` | nop; `jump_false x, L` |
| `not d1, x; not d2, d1` | nop; `move d2, x` |
This is particularly effective on `if (!cond)` patterns, which the compiler generates as `not; jump_false`. After this pass, they become a single `jump_true`.
**Nop prefix:** `_nop_bl_`
### 6. eliminate_moves (self-move elimination)
Removes `move a, a` instructions where the source and destination are the same slot. These can arise from earlier passes rewriting binary operations into moves.
**Nop prefix:** `_nop_mv_`
### 7. eliminate_unreachable (dead code after return)
Nops instructions after `return` until the next real label. Only `return` is treated as a terminal instruction; `disrupt` is not, because the disruption handler code immediately follows `disrupt` and must remain reachable.
The mcode compiler emits a label at disruption handler entry points (see `emit_label(gen_label("disruption"))` in mcode.cm), which provides the label boundary that stops this pass from eliminating handler code.
**Nop prefix:** `_nop_ur_`
### 8. eliminate_dead_jumps (jump-to-next-label elimination)
Removes `jump L` instructions where `L` is the immediately following label (skipping over any intervening nop strings). These are common after other passes eliminate conditional branches, leaving behind jumps that fall through naturally.
**Nop prefix:** `_nop_dj_`
## Pass Composition
All passes run in sequence in `optimize_function`:
```
infer_param_types → returns param_types map
infer_slot_write_types → returns write_types map
eliminate_type_checks → uses param_types + write_types
simplify_algebra
simplify_booleans
eliminate_moves
eliminate_unreachable
eliminate_dead_jumps
```
Each pass is independent and can be commented out for testing or benchmarking.
## Intrinsic Inlining
Before streamlining, `mcode.cm` recognizes calls to built-in intrinsic functions and emits direct opcodes instead of the generic frame/setarg/invoke call sequence. This reduces a 6-instruction call pattern to a single instruction:
| Call | Emitted opcode |
|------|---------------|
| `is_array(x)` | `is_array dest, src` |
| `is_function(x)` | `is_func dest, src` |
| `is_object(x)` | `is_record dest, src` |
| `is_stone(x)` | `is_stone dest, src` |
| `is_integer(x)` | `is_int dest, src` |
| `is_text(x)` | `is_text dest, src` |
| `is_number(x)` | `is_num dest, src` |
| `is_logical(x)` | `is_bool dest, src` |
| `is_null(x)` | `is_null dest, src` |
| `length(x)` | `length dest, src` |
| `push(arr, val)` | `push arr, val` |
These inlined opcodes have corresponding Mach VM implementations in `mach.c`.
## Unified Arithmetic
Arithmetic operations use generic opcodes: `add`, `subtract`, `multiply`, `divide`, `modulo`, `pow`, `negate`. There are no type-dispatched variants (e.g., no `add_int`/`add_float`).
The Mach VM handles arithmetic inline with a two-tier fast path. Since mcode's type guard dispatch guarantees both operands are numbers by the time arithmetic executes, the VM does not need polymorphic dispatch:
1. **Int-int fast path**: `JS_VALUE_IS_BOTH_INT` → native integer arithmetic with overflow check. If the result fits int32, returns int32; otherwise promotes to float64.
2. **Float fallback**: `JS_ToFloat64` both operands → native floating-point arithmetic. Non-finite results (infinity, NaN) produce null.
Division and modulo additionally check for zero divisor (→ null). Power uses `pow()` with non-finite handling.
The legacy `reg_vm_binop()` function remains available for comparison operators and any non-mcode bytecode paths, but arithmetic ops no longer call it.
Bitwise operations (`shl`, `shr`, `ushr`, `bitand`, `bitor`, `bitxor`, `bitnot`) remain integer-only and disrupt if operands are not integers.
The QBE/native backend maps generic arithmetic to helper calls (`qbe.add`, `qbe.sub`, etc.). The vision for the native path is that with sufficient type inference, the backend can unbox proven-numeric values to raw registers, operate directly, and only rebox at boundaries (returns, calls, stores).
## Known-Number Add Shortcut
The `+` operator is the only arithmetic op that is polymorphic at the mcode level — `emit_add_decomposed` in `mcode.cm` emits a guard dispatch that checks for text (→ `concat`) before numeric (→ `add`). This dual dispatch means the temp slot is written by both `concat` (T_TEXT) and `add` (T_NUM), producing T_UNKNOWN in write-type analysis.
When either operand is a known number literal (e.g., `i + 1`, `x + 0.5`), `emit_add_decomposed` skips the text dispatch entirely and emits `emit_numeric_binop("add")` — a single `is_num` guard + `add` with no `concat` path. This is safe because text concatenation requires both operands to be text; a known number can never participate in concat.
This optimization eliminates 6-8 instructions from the add block (two `is_text` checks, two conditional jumps, `concat`, `jump`) and produces a clean single-type write path that works with write-type analysis.
Other arithmetic ops (`subtract`, `multiply`, etc.) always use `emit_numeric_binop` and never have this problem.
## Target Slot Propagation
For simple local variable assignments (`i = expr`), the mcode compiler passes the variable's register slot as a `target` to the expression compiler. Binary operations that use `emit_numeric_binop` (subtract, multiply, divide, modulo, pow) can write directly to the target slot instead of allocating a temp and emitting a `move`:
```
// Before: i = i - 1
subtract 7, 2, 6 // temp = i - 1
move 2, 7 // i = temp
// After: i = i - 1
subtract 2, 2, 6 // i = i - 1 (direct)
```
The `+` operator is excluded from target slot propagation when it would use the full text+num dispatch (i.e., when neither operand is a known number), because writing both `concat` and `add` to the variable's slot would pollute its write type. When the known-number shortcut applies, `+` uses `emit_numeric_binop` and would be safe for target propagation, but this is not currently implemented — the exclusion is by operator kind, not by dispatch path.
## Debugging Tools
Three dump tools inspect the IR at different stages:
- **`dump_mcode.cm`** — prints the raw Mcode IR after `mcode.cm`, before streamlining
- **`dump_stream.cm`** — prints the IR after streamlining, with before/after instruction counts
- **`dump_types.cm`** — prints the streamlined IR with type annotations on each instruction
Usage:
```
./cell --core . dump_mcode.cm <file.ce|file.cm>
./cell --core . dump_stream.cm <file.ce|file.cm>
./cell --core . dump_types.cm <file.ce|file.cm>
```
## Tail Call Marking
When a function's return expression is a call (`stmt.tail == true` from the parser) and the function has no disruption handler, mcode.cm renames the final `invoke` instruction to `tail_invoke`. This is semantically identical to `invoke` in the current Mach VM, but marks the call site for future tail call optimization.
The disruption handler restriction exists because TCO would discard the current frame, but the handler must remain on the stack to catch disruptions from the callee.
`tail_invoke` is handled by the same passes as `invoke` in streamline (type tracking, algebraic simplification) and executes identically in the VM.
## Type Propagation Architecture
Type information flows through three compilation stages, each building on the previous:
### Stage 1: Parse-time type tags (parse.cm)
The parser assigns `type_tag` strings to scope variable entries when the type is syntactically obvious:
- **From initializers**: `def a = []``type_tag: "array"`, `def n = 42``type_tag: "integer"`, `def r = {}``type_tag: "record"`
- **From usage patterns** (def only): `def x = null; x[] = v` infers `type_tag: "array"` from the push. `def x = null; x.foo = v` infers `type_tag: "record"` from property access.
- **Type error detection** (def only): When a `def` variable has a known type_tag, provably wrong operations are compile errors:
- Property access (`.`) on array
- Push (`[]`) on non-array
- Text key on array
- Integer key on record
Only `def` (constant) variables participate in type inference and error detection. `var` variables can be reassigned, making their initializer type unreliable.
### Stage 2: Fold-time type propagation (fold.cm)
The fold pass extends type information through the AST:
- **Intrinsic folding**: `is_array(known_array)` folds to `true`. `length(known_array)` gets `hint: "array_length"`.
- **Purity analysis**: Expressions involving only `is_*` intrinsic calls with pure arguments are considered pure. This enables dead code elimination for unused `var`/`def` bindings with pure initializers, and elimination of standalone pure call statements.
- **Dead code**: Unused pure `var`/`def` declarations are removed. Standalone calls to pure intrinsics (where the result is discarded) are removed. Unreachable branches with constant conditions are removed.
The `pure_intrinsics` set currently contains only `is_*` sensory functions (`is_array`, `is_text`, `is_number`, `is_integer`, `is_function`, `is_logical`, `is_null`, `is_object`, `is_stone`). Other intrinsics like `text`, `number`, and `length` can disrupt on wrong argument types, so they are excluded — removing a call that would disrupt changes observable behavior.
### Stage 3: Streamline-time type tracking (streamline.cm)
The streamline optimizer uses a numeric type lattice (`T_INT`, `T_FLOAT`, `T_TEXT`, etc.) for fine-grained per-instruction tracking:
- **Backward inference** (pass 1): Scans typed operators to infer parameter types. Since parameters are `def` (immutable), inferred types persist across label boundaries.
- **Write-type invariance** (pass 2): Scans all instructions to find local slots where every write produces the same type. These invariant types persist across label boundaries alongside parameter types.
- **Forward tracking** (pass 3): `track_types` follows instruction execution order, tracking the type of each slot. Known-type operations set their destination type (e.g., `concat` → T_TEXT, `length` → T_INT). Generic arithmetic produces T_UNKNOWN. Type checks on unknown slots narrow the type on fallthrough.
- **Type check elimination** (pass 3): When a slot's type is already known, `is_<type>` + conditional jump pairs are eliminated or converted to unconditional jumps.
- **Dynamic access narrowing** (pass 3): `load_dynamic`/`store_dynamic` are narrowed to `load_field`/`store_field` or `load_index`/`store_index` when the key type is known.
Type information resets at label join points (since control flow merges could bring different types), except for parameter types from backward inference and write-invariant types from slot write-type analysis.
## Future Work
### Copy Propagation
A basic-block-local copy propagation pass would replace uses of a copied variable with its source, enabling further move elimination. An implementation was attempted but encountered an unsolved bug where 2-position instruction operand replacement produces incorrect code during self-hosting (the replacement logic for 3-position instructions works correctly). The root cause is not yet understood. See the project memory files for detailed notes.
### Expanded Purity Analysis
The current purity set is conservative (only `is_*`). It could be expanded by:
- **Argument-type-aware purity**: If all arguments to an intrinsic are known to be the correct types (via type_tag or slot_types), the call cannot disrupt and is safe to eliminate. For example, `length(known_array)` is pure but `length(unknown)` is not.
- **User function purity**: Analyze user-defined function bodies during pre_scan. A function is pure if its body contains only pure expressions and calls to known-pure functions. This requires fixpoint iteration for mutual recursion.
- **Callback-aware purity**: Intrinsics like `filter`, `find`, `reduce`, `some`, `every` are pure if their callback argument is pure.
### Move Type Resolution in Write-Type Analysis
Currently, `move` instructions produce T_UNKNOWN in write-type analysis. This prevents type propagation through moves — e.g., a slot written by `access 0` (T_INT) and `move` from an `add` result (T_NUM) merges to T_UNKNOWN instead of T_NUM.
A two-pass approach would fix this: first compute write types for all non-move instructions, then resolve moves by looking up the source slot's computed type. If the source has a known type, merge it into the destination; if unknown, skip the move (don't poison the destination with T_UNKNOWN).
This was implemented and tested but causes a bootstrap failure during self-hosting convergence. The root cause is not yet understood — the optimizer modifies its own bytecode, and the move resolution changes the type landscape enough to produce different code on each pass, preventing convergence. Further investigation is needed; the fix is correct in isolation but interacts badly with the self-hosting fixed-point iteration.
### Target Slot Propagation for Add with Known Numbers
When the known-number add shortcut applies (one operand is a literal number), the generated code uses `emit_numeric_binop` which has a single write path. Target slot propagation should be safe in this case, but is currently blocked by the blanket `kind != "+"` exclusion. Refining the exclusion to check whether the shortcut will apply (by testing `is_known_number` on either operand) would enable direct writes for patterns like `i = i + 1`.
### Forward Type Narrowing from Typed Operations
With unified arithmetic (generic `add`/`subtract`/`multiply`/`divide`/`modulo`/`negate` instead of typed variants), this approach is no longer applicable. Typed comparisons (`eq_int`, `lt_float`, etc.) still exist and their operands have known types, but these are already handled by backward inference.
### Guard Hoisting for Parameters
When a type check on a parameter passes (falls through), the parameter's type could be promoted to `param_types` so it persists across label boundaries. This would allow the first type check on a parameter to prove its type for the entire function. However, this is unsound for polymorphic parameters — if a function is called with different argument types, the first check would wrongly eliminate checks for subsequent types.
A safe version would require proving that a parameter is monomorphic (called with only one type across all call sites), which requires interprocedural analysis.
**Note:** For local variables (non-parameters), the write-type invariance analysis (pass 2) achieves a similar effect safely — if every write to a slot produces the same type, that type persists across labels without needing to hoist any guard.
### Tail Call Optimization
`tail_invoke` instructions are currently marked but execute identically to `invoke`. Actual TCO would reuse the current call frame instead of creating a new one. This requires:
- Ensuring argument count matches (or the frame can be resized)
- No live locals needed after the call (guaranteed by tail position)
- No disruption handler on the current function (already enforced by the marking)
- VM support in mach.c to rewrite the frame in place
### Interprocedural Type Inference
Currently all type inference is intraprocedural (within a single function). Cross-function analysis could:
- Infer return types from function bodies
- Propagate argument types from call sites to callees
- Specialize functions for known argument types (cloning)
### Strength Reduction
Common patterns that could be lowered to cheaper operations when operand types are known:
- `multiply x, 2` with proven-int operands → shift left
- `divide x, 2` with proven-int → arithmetic shift right
- `modulo x, power_of_2` with proven-int → bitwise and
### Numeric Unboxing (QBE/native path)
With unified arithmetic and backward type inference, the native backend can identify regions where numeric values remain in registers without boxing/unboxing:
1. **Guard once**: When backward inference proves a parameter is T_NUM, emit a single type guard at function entry.
2. **Unbox**: Convert the tagged JSValue to a raw double register.
3. **Operate**: Use native FP/int instructions directly (no function calls, no tag checks).
4. **Rebox**: Convert back to tagged JSValue only at rebox points (function returns, calls, stores to arrays/records).
This requires inserting `unbox`/`rebox` IR annotations (no-ops in the Mach VM, meaningful only to QBE).
### Loop-Invariant Code Motion
Type checks that are invariant across loop iterations (checking a variable that doesn't change in the loop body) could be hoisted above the loop. This would require identifying loop boundaries and proving invariance.
### Algebraic Identity Optimization
With unified arithmetic, algebraic identities (x+0→x, x*1→x, x*0→0, x/1→x) require knowing operand values at compile time. Since generic `add`/`multiply` operate on any numeric type, the constant-tracking logic in `simplify_algebra` could be extended to handle these for known-constant slots.
## Nop Convention
Eliminated instructions are replaced with strings matching `_nop_<prefix>_<counter>`. The prefix identifies which pass created the nop. Nop strings are:
- Skipped during interpretation (the VM ignores them)
- Skipped during QBE emission
- Not counted in instruction statistics
- Preserved in the instruction array to maintain positional stability for jump targets

96
docs/spec/values.md Normal file
View File

@@ -0,0 +1,96 @@
---
title: "Value Representation"
description: "JSValue tagging and encoding"
---
## Overview
Every value in ƿit is a 64-bit word called a JSValue. The runtime uses LSB (least significant bit) tagging to pack type information directly into the value, avoiding heap allocation for common types.
## Tag Encoding
The lowest bits of a JSValue determine its type:
| LSB Pattern | Type | Payload |
|-------------|------|---------|
| `xxxxxxx0` | Integer | 31-bit signed integer in upper bits |
| `xxxxx001` | Pointer | 61-bit aligned heap pointer |
| `xxxxx101` | Short float | 8-bit exponent + 52-bit mantissa |
| `xxxxx011` | Special | 5-bit tag selects subtype |
### Integers
If the least significant bit is 0, the value is an immediate 31-bit signed integer. The integer is stored in the upper bits, extracted via `v >> 1`.
```
[integer: 31 bits][0]
```
Range: -1073741824 to 1073741823. Numbers outside this range are stored as short floats or heap-allocated.
### Pointers
If the lowest 3 bits are `001`, the value is a pointer to a heap object. The pointer is 8-byte aligned, so the low 3 bits are available for the tag. The actual address is extracted by clearing the low 3 bits.
```
[pointer: 61 bits][001]
```
All heap objects (arrays, records, blobs, text, functions, etc.) are referenced through pointer-tagged JSValues.
### Short Floats
If the lowest 3 bits are `101`, the value encodes a floating-point number directly. The format uses an 8-bit exponent (bias 127) and 52-bit mantissa, similar to IEEE 754 but with reduced range.
```
[sign: 1][exponent: 8][mantissa: 52][101]
```
Range: approximately ±3.4 * 10^38. Numbers outside this range fall back to null. Zero is always positive zero.
### Specials
If the lowest 2 bits are `11`, the next 3 bits select a special type:
| 5-bit Tag | Value |
|-----------|-------|
| `00011` | Boolean (true/false in upper bits) |
| `00111` | Null |
| `01111` | Exception marker |
| `10111` | Uninitialized |
| `11011` | Immediate string |
| `11111` | Catch offset |
## Immediate Strings
Short ASCII strings (up to 7 characters) are packed directly into the JSValue without heap allocation:
```
[char6][char5][char4][char3][char2][char1][char0][length: 3][11011]
```
Each character occupies 8 bits. The length (0-7) is stored in bits 5-7. Only ASCII characters (0-127) qualify — any non-ASCII character forces heap allocation.
```javascript
var s = "hello" // 5 chars, fits in immediate string
var t = "" // immediate (length 0)
var u = "longtext" // 8 chars, heap-allocated
```
## Null
Null is encoded as a special-tagged value with tag `00111`. There is no `undefined` in ƿit — only null.
```javascript
var x = null // special tag null
var y = 1 / 0 // also null (division by zero)
var z = {}.missing // null (missing field)
```
## Boolean
True and false are encoded as specials with tag `00011`, distinguished by a bit in the upper payload.
## Summary
The tagging scheme ensures that the most common values — small integers, booleans, null, and short strings — require zero heap allocation. This significantly reduces GC pressure and improves cache locality.

170
docs/testing.md Normal file
View File

@@ -0,0 +1,170 @@
---
title: "Testing"
description: "Writing and running tests in ƿit"
weight: 45
type: "docs"
---
ƿit has built-in support for writing and running tests. Tests live in the `tests/` directory of a package and are `.cm` modules that return a record of test functions.
## Writing Tests
A test file returns a record where each key starting with `test_` is a test function. A test passes if it returns `null` (or nothing). It fails if it returns a text string describing the failure.
```javascript
// tests/math.cm
return {
test_addition: function() {
if (1 + 2 != 3) return "expected 3"
},
test_division: function() {
if (10 / 3 != 3.333333333333333333) return "unexpected result"
}
}
```
Test functions take no arguments. Use early returns with a failure message to report errors:
```javascript
test_array_push: function() {
var a = [1, 2]
a[] = 3
if (length(a) != 3) return "expected length 3, got " + text(length(a))
if (a[2] != 3) return "expected a[2] to be 3"
}
```
## Running Tests
```bash
pit test # run all tests in current package
pit test suite # run a specific test file (tests/suite.cm)
pit test tests/math # same, with explicit path
pit test all # run all tests in current package
pit test package <name> # run all tests in a named package
pit test package <name> <test> # run a specific test in a named package
pit test package all # run tests from all installed packages
```
### Flags
```bash
pit test suite -g # run GC after each test (useful for detecting leaks)
pit test suite --verify # enable IR verification during compilation
pit test suite --diff # run each test optimized and unoptimized, compare results
```
`--verify` and `--diff` can be combined:
```bash
pit test suite --verify --diff
```
## IR Verification
The `--verify` flag enables structural validation of the compiler's intermediate representation after each optimizer pass. This catches bugs like invalid slot references, broken jump targets, and malformed instructions.
When verification fails, errors are printed with the pass name that introduced them:
```
[verify_ir] slot_bounds: slot 12 out of range 0..9 in instruction add_int
[verify_ir] 1 errors after dead_code_elimination
```
IR verification adds overhead and is intended for development, not production use.
## Differential Testing
Differential testing runs each test through two paths — with the optimizer enabled and with it disabled — and compares results. Any mismatch between the two indicates an optimizer bug.
### Inline Mode
The `--diff` flag on `pit test` runs each test module through both paths during a normal test run:
```bash
pit test suite --diff
```
Output includes a mismatch count at the end:
```
Tests: 493 passed, 0 failed, 493 total
Diff mismatches: 0
```
### Standalone Mode
`pit diff` is a dedicated differential testing tool with detailed mismatch reporting:
```bash
pit diff # diff all test files in current package
pit diff suite # diff a specific test file
pit diff tests/math # same, with explicit path
```
For each test function, it reports whether the optimized and unoptimized results match:
```
tests/suite.cm: 493 passed, 0 failed
----------------------------------------
Diff: 493 passed, 0 failed, 493 total
```
When a mismatch is found:
```
tests/suite.cm: 492 passed, 1 failed
MISMATCH: test_foo: result mismatch opt=42 noopt=43
```
## Fuzz Testing
The fuzzer generates random self-checking programs, compiles them, and runs them through both optimized and unoptimized paths. Each generated program contains test functions that validate their own expected results, so failures catch both correctness bugs and optimizer mismatches.
```bash
pit fuzz # 100 iterations, random seed
pit fuzz 500 # 500 iterations, random seed
pit fuzz --seed 42 # 100 iterations, deterministic seed
pit fuzz 1000 --seed 42 # 1000 iterations, deterministic seed
```
The fuzzer generates programs that exercise:
- Integer and float arithmetic with known expected results
- Control flow (if/else, while loops)
- Closures and captured variable mutation
- Records and property access
- Arrays and iteration
- Higher-order functions
- Disruption handling
- Text concatenation
On failure, the generated source is saved to `tests/fuzz_failures/` for reproduction:
```
Fuzzing: 1000 iterations, starting seed=42
FAIL seed=57: diff fuzz_3: opt=10 noopt=11
saved to tests/fuzz_failures/seed_57.cm
----------------------------------------
Fuzz: 999 passed, 1 failed, 1000 total
Failures saved to tests/fuzz_failures/
```
Saved failure files are valid `.cm` modules that can be run directly or added to the test suite.
## Test File Organization
Tests live in the `tests/` directory of a package:
```
mypackage/
├── pit.toml
├── math.cm
└── tests/
├── suite.cm # main test suite
├── math.cm # math-specific tests
└── disrupt.cm # disruption tests
```
All `.cm` files under `tests/` are discovered automatically by `pit test`.

119
docs/wota.md Normal file
View File

@@ -0,0 +1,119 @@
---
title: "Wota Format"
description: "Word Object Transfer Arrangement"
weight: 86
type: "docs"
---
Wota is a binary message format for local inter-process communication. It is similar to Nota but works at word granularity (64-bit words) rather than byte granularity. Wota arrangements are less compact than Nota but faster to arrange and consume.
Wota stands for Word Object Transfer Arrangement.
## Type Summary
| Byte | Type |
|------|------|
| `00` | Integer |
| `01` | Floating Point |
| `02` | Array |
| `03` | Record |
| `04` | Blob |
| `05` | Text |
| `07` | Symbol |
## Preambles
Every Wota value starts with a preamble word. The least significant byte contains the type. The remaining 56 bits contain type-specific data.
## Blob
A blob is a string of bits. The remaining field contains the number of bits. The number of words that follow: `floor((number_of_bits + 63) / 64)`. The first bit of the blob goes into the most significant bit of the first word. The final word is padded with 0.
Example: A blob containing 25 bits `111100001110001100100001`:
```
0000000000001904 # preamble: 25 bits, type blob
F0E3208000000000 # data (padded to 64 bits)
```
## Text
The text is a string of UTF-32 characters packed 2 per word. The remaining field contains the number of characters. The number of words that follow: `floor((number_of_characters + 1) / 2)`. The final word is padded with 0.
Example: `"cat"`:
```
0000000000000305 # preamble: 3 characters, type text
0000006300000061 # 'c' and 'a'
0000007400000000 # 't' and padding
```
## Array
An array is an ordered sequence of values. The remaining field contains the number of elements. Following the preamble are the elements, each beginning with its own preamble. Nesting is encouraged. Cyclic structures are not allowed.
Example: `["duck", "dragon"]`:
```
0000000000000202 # preamble: 2 elements, type array
0000000000000405 # text "duck": 4 chars
0000006400000074 # 'd' 't' (reversed pair order)
000000630000006B # 'c' 'k'
0000000000000605 # text "dragon": 6 chars
0000006400000072 # 'd' 'r'
0000006100000067 # 'a' 'g'
0000006F0000006E # 'o' 'n'
```
## Record
A record is a set of key/value pairs. Keys must be text. The remaining field contains the number of pairs.
Example: `{"ox": ["O", "X"]}`:
```
0000000000000103 # preamble: 1 pair, type record
0000000000000205 # key "ox": 2 chars
0000006F00000078 # 'o' 'x'
0000000000000202 # value: array of 2
0000000000000105 # "O": 1 char
0000004F00000000 # 'O'
0000000000000105 # "X": 1 char
0000005800000000 # 'X'
```
## Number
Numbers are represented as DEC64. To arrange an integer, shift the integer up 8 bits. The number is incorporated directly into the preamble.
Example: `7`:
```
0000000000000700 # integer 7 as DEC64
```
To arrange a floating point number, place the number in the word following the floating point preamble.
Example: `4.25`:
```
0000000000000001 # preamble: type floating point
000000000001A9FE # DEC64 encoding of 4.25
```
Care must be taken when decoding that the least significant byte of the number is not `80` (the null exponent).
## Symbol
The remaining field contains the symbol.
Example: `[null, false, true, private, system]`:
```
0000000000000502 # array of 5
0000000000000007 # null
0000000000000207 # false
0000000000000307 # true
0000000000000807 # private
0000000000000907 # system
```

16
dump_ast.cm Normal file
View File

@@ -0,0 +1,16 @@
// dump_ast.cm — pretty-print the folded AST as JSON
//
// Usage: ./cell --core . dump_ast.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
print(json.encode(folded))

22
dump_ir.ce Normal file
View File

@@ -0,0 +1,22 @@
var tokenize = use('tokenize')
var parse_mod = use('parse')
var fold = use('fold')
var mcode_mod = use('mcode')
var streamline_mod = use('streamline')
var json = use('json')
var fd = use('fd')
var file = args[0]
var src = text(fd.slurp(file))
var tok = tokenize(src, file)
var ast = parse_mod(tok.tokens, src, file, tokenize)
var folded = fold(ast)
var compiled = mcode_mod(folded)
var optimized = streamline_mod(compiled)
var instrs = optimized.main.instructions
var i = 0
while (i < length(instrs)) {
print(text(i) + ': ' + json.encode(instrs[i]))
i = i + 1
}

117
dump_mcode.cm Normal file
View File

@@ -0,0 +1,117 @@
// dump_mcode.cm — pretty-print mcode IR (before streamlining)
//
// Usage: ./cell --core . dump_mcode.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
if (length(args) < 1) {
print("usage: cell --core . dump_mcode.cm <file>")
return
}
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_object(v)) {
return json.encode(v)
}
if (is_logical(v)) {
return v ? "true" : "false"
}
return text(v)
}
var dump_function = function(func, name) {
var nr_args = func.nr_args != null ? func.nr_args : 0
var nr_slots = func.nr_slots != null ? func.nr_slots : 0
var nr_close = func.nr_close_slots != null ? func.nr_close_slots : 0
var instrs = func.instructions
var i = 0
var pc = 0
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var operands = null
var pc_str = null
var op_str = null
print(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}, closures=${text(nr_close)}) ===`)
if (instrs == null || length(instrs) == 0) {
print(" (empty)")
return null
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (!starts_with(instr, "_nop_")) {
print(`${instr}:`)
}
} else if (is_array(instr)) {
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
pc_str = pad_right(text(pc), 5)
op_str = pad_right(op, 14)
print(` ${pc_str} ${op_str} ${operands}`)
pc = pc + 1
}
i = i + 1
}
return null
}
var main_name = null
var fi = 0
var func = null
var fname = null
// Dump main
if (compiled.main != null) {
main_name = compiled.name != null ? compiled.name : "<main>"
dump_function(compiled.main, main_name)
}
// Dump sub-functions
if (compiled.functions != null) {
fi = 0
while (fi < length(compiled.functions)) {
func = compiled.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
dump_function(func, `[${text(fi)}] ${fname}`)
fi = fi + 1
}
}

166
dump_stream.cm Normal file
View File

@@ -0,0 +1,166 @@
// dump_stream.cm — show mcode IR before and after streamlining
//
// Usage: ./cell --core . dump_stream.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
if (length(args) < 1) {
print("usage: cell --core . dump_stream.cm <file>")
return
}
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
// Deep copy IR for before snapshot
var before = json.decode(json.encode(compiled))
var optimized = streamline(compiled)
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_object(v)) {
return json.encode(v)
}
if (is_logical(v)) {
return v ? "true" : "false"
}
return text(v)
}
var count_stats = function(func) {
var instrs = func.instructions
var total = 0
var nops = 0
var calls = 0
var i = 0
var instr = null
if (instrs == null) {
return {total: 0, nops: 0, real: 0, calls: 0}
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
nops = nops + 1
}
} else if (is_array(instr)) {
total = total + 1
if (instr[0] == "invoke") {
calls = calls + 1
}
}
i = i + 1
}
return {total: total, nops: nops, real: total - nops, calls: calls}
}
var dump_function = function(func, show_nops) {
var instrs = func.instructions
var i = 0
var pc = 0
var instr = null
var op = null
var n = 0
var parts = null
var j = 0
var operands = null
var pc_str = null
var op_str = null
if (instrs == null || length(instrs) == 0) {
return null
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
if (show_nops) {
print(` ${pad_right(text(pc), 5)} --- nop ---`)
pc = pc + 1
}
} else {
print(`${instr}:`)
}
} else if (is_array(instr)) {
op = instr[0]
n = length(instr)
parts = []
j = 1
while (j < n - 2) {
push(parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(parts, ", ")
pc_str = pad_right(text(pc), 5)
op_str = pad_right(op, 14)
print(` ${pc_str} ${op_str} ${operands}`)
pc = pc + 1
}
i = i + 1
}
return null
}
var dump_pair = function(before_func, after_func, name) {
var nr_args = after_func.nr_args != null ? after_func.nr_args : 0
var nr_slots = after_func.nr_slots != null ? after_func.nr_slots : 0
var b_stats = count_stats(before_func)
var a_stats = count_stats(after_func)
var eliminated = a_stats.nops
print(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
print(` before: ${text(b_stats.total)} instructions, ${text(b_stats.calls)} invokes`)
print(` after: ${text(a_stats.real)} instructions (${text(eliminated)} eliminated), ${text(a_stats.calls)} invokes`)
print("\n -- streamlined --")
dump_function(after_func, false)
return null
}
var main_name = null
var fi = 0
var func = null
var bfunc = null
var fname = null
// Dump main
if (optimized.main != null && before.main != null) {
main_name = optimized.name != null ? optimized.name : "<main>"
dump_pair(before.main, optimized.main, main_name)
}
// Dump sub-functions
if (optimized.functions != null && before.functions != null) {
fi = 0
while (fi < length(optimized.functions)) {
func = optimized.functions[fi]
bfunc = before.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
dump_pair(bfunc, func, `[${text(fi)}] ${fname}`)
fi = fi + 1
}
}

237
dump_types.cm Normal file
View File

@@ -0,0 +1,237 @@
// dump_types.cm — show streamlined IR with type annotations
//
// Usage: ./cell --core . dump_types.cm <file.ce|file.cm>
var fd = use("fd")
var json = use("json")
var tokenize = use("tokenize")
var parse = use("parse")
var fold = use("fold")
var mcode = use("mcode")
var streamline = use("streamline")
if (length(args) < 1) {
print("usage: cell --core . dump_types.cm <file>")
return
}
var filename = args[0]
var src = text(fd.slurp(filename))
var tok = tokenize(src, filename)
var ast = parse(tok.tokens, src, filename, tokenize)
var folded = fold(ast)
var compiled = mcode(folded)
var optimized = streamline(compiled)
// Type constants
def T_UNKNOWN = "unknown"
def T_INT = "int"
def T_FLOAT = "float"
def T_NUM = "num"
def T_TEXT = "text"
def T_BOOL = "bool"
def T_NULL = "null"
def T_ARRAY = "array"
def T_RECORD = "record"
def T_FUNCTION = "function"
def int_result_ops = {
bitnot: true, bitand: true, bitor: true,
bitxor: true, shl: true, shr: true, ushr: true
}
def bool_result_ops = {
eq_int: true, ne_int: true, lt_int: true, gt_int: true,
le_int: true, ge_int: true,
eq_float: true, ne_float: true, lt_float: true, gt_float: true,
le_float: true, ge_float: true,
eq_text: true, ne_text: true, lt_text: true, gt_text: true,
le_text: true, ge_text: true,
eq_bool: true, ne_bool: true,
not: true, and: true, or: true,
is_int: true, is_text: true, is_num: true,
is_bool: true, is_null: true, is_identical: true,
is_array: true, is_func: true, is_record: true, is_stone: true
}
var access_value_type = function(val) {
if (is_number(val)) {
return is_integer(val) ? T_INT : T_FLOAT
}
if (is_text(val)) {
return T_TEXT
}
return T_UNKNOWN
}
var track_types = function(slot_types, instr) {
var op = instr[0]
var src_type = null
if (op == "access") {
slot_types[text(instr[1])] = access_value_type(instr[2])
} else if (op == "int") {
slot_types[text(instr[1])] = T_INT
} else if (op == "true" || op == "false") {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "null") {
slot_types[text(instr[1])] = T_NULL
} else if (op == "move") {
src_type = slot_types[text(instr[2])]
slot_types[text(instr[1])] = src_type != null ? src_type : T_UNKNOWN
} else if (int_result_ops[op] == true) {
slot_types[text(instr[1])] = T_INT
} else if (op == "concat") {
slot_types[text(instr[1])] = T_TEXT
} else if (bool_result_ops[op] == true) {
slot_types[text(instr[1])] = T_BOOL
} else if (op == "typeof") {
slot_types[text(instr[1])] = T_TEXT
} else if (op == "array") {
slot_types[text(instr[1])] = T_ARRAY
} else if (op == "record") {
slot_types[text(instr[1])] = T_RECORD
} else if (op == "function") {
slot_types[text(instr[1])] = T_FUNCTION
} else if (op == "invoke" || op == "tail_invoke") {
slot_types[text(instr[2])] = T_UNKNOWN
} else if (op == "load_field" || op == "load_index" || op == "load_dynamic") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "pop" || op == "get") {
slot_types[text(instr[1])] = T_UNKNOWN
} else if (op == "length") {
slot_types[text(instr[1])] = T_INT
} else if (op == "add" || op == "subtract" || op == "multiply" ||
op == "divide" || op == "modulo" || op == "pow" || op == "negate") {
slot_types[text(instr[1])] = T_UNKNOWN
}
return null
}
var pad_right = function(s, w) {
var r = s
while (length(r) < w) {
r = r + " "
}
return r
}
var fmt_val = function(v) {
if (is_null(v)) {
return "null"
}
if (is_number(v)) {
return text(v)
}
if (is_text(v)) {
return `"${v}"`
}
if (is_object(v)) {
return json.encode(v)
}
if (is_logical(v)) {
return v ? "true" : "false"
}
return text(v)
}
// Build type annotation string for an instruction
var type_annotation = function(slot_types, instr) {
var n = length(instr)
var parts = []
var j = 1
var v = null
var t = null
while (j < n - 2) {
v = instr[j]
if (is_number(v)) {
t = slot_types[text(v)]
if (t != null && t != T_UNKNOWN) {
push(parts, `s${text(v)}:${t}`)
}
}
j = j + 1
}
if (length(parts) == 0) {
return ""
}
return text(parts, " ")
}
var dump_function_typed = function(func, name) {
var nr_args = func.nr_args != null ? func.nr_args : 0
var nr_slots = func.nr_slots != null ? func.nr_slots : 0
var instrs = func.instructions
var slot_types = {}
var i = 0
var pc = 0
var instr = null
var op = null
var n = 0
var annotation = null
var operand_parts = null
var j = 0
var operands = null
var pc_str = null
var op_str = null
var line = null
print(`\n=== ${name} (args=${text(nr_args)}, slots=${text(nr_slots)}) ===`)
if (instrs == null || length(instrs) == 0) {
print(" (empty)")
return null
}
while (i < length(instrs)) {
instr = instrs[i]
if (is_text(instr)) {
if (starts_with(instr, "_nop_")) {
i = i + 1
continue
}
slot_types = {}
print(`${instr}:`)
} else if (is_array(instr)) {
op = instr[0]
n = length(instr)
annotation = type_annotation(slot_types, instr)
operand_parts = []
j = 1
while (j < n - 2) {
push(operand_parts, fmt_val(instr[j]))
j = j + 1
}
operands = text(operand_parts, ", ")
pc_str = pad_right(text(pc), 5)
op_str = pad_right(op, 14)
line = pad_right(` ${pc_str} ${op_str} ${operands}`, 50)
if (length(annotation) > 0) {
print(`${line} ; ${annotation}`)
} else {
print(line)
}
track_types(slot_types, instr)
pc = pc + 1
}
i = i + 1
}
return null
}
var main_name = null
var fi = 0
var func = null
var fname = null
// Dump main
if (optimized.main != null) {
main_name = optimized.name != null ? optimized.name : "<main>"
dump_function_typed(optimized.main, main_name)
}
// Dump sub-functions
if (optimized.functions != null) {
fi = 0
while (fi < length(optimized.functions)) {
func = optimized.functions[fi]
fname = func.name != null ? func.name : `<func_${text(fi)}>`
dump_function_typed(func, `[${text(fi)}] ${fname}`)
fi = fi + 1
}
}

251
editors/ai/pit-context.md Normal file
View File

@@ -0,0 +1,251 @@
# ƿit Language — AI Context
ƿit (pronounced "pit") is a safe, actor-based programming language. Its syntax resembles JavaScript but with significant differences. Scripts use `.ce` (actors) and `.cm` (modules) file extensions.
## Key Differences from JavaScript
- **`var` / `def`** — `var` is mutable, `def` is constant. No `let` or `const`.
- **`==` is strict** — No `===` or `!==`. `==` and `!=` are always strict comparison.
- **No `undefined`** — Only `null`. Division by zero produces `null`, not `Infinity`.
- **No classes** — Use `meme()`, `proto()`, `isa()` for prototype chains.
- **No `for...in`, `for...of`, spread, rest, or default params.**
- **Variables declared at function body level only** — Not inside `if`/`while`/`for` blocks.
- **All variables must be initialized** — `var x` alone is an error; use `var x = null`.
- **`disrupt` / `disruption`** — No `try`/`catch`/`throw`. Error handling uses:
```javascript
var fn = function() {
disrupt // raise an error (bare keyword, no value)
} disruption {
// handle the error
}
```
- **No arraybuffers** — Use `blob` (works with bits; `stone(blob)` before reading).
- **Identifiers can contain `?` and `!`** — e.g., `nil?`, `set!`, `is?valid`.
- **4-parameter limit** — Functions take at most 4 named parameters.
- **Everything lowercase** — Convention is all-lowercase identifiers with underscores.
## Variable Declaration
```javascript
var count = 0 // mutable
def MAX = 100 // constant (cannot be reassigned)
var x = null // must initialize (var x alone is an error)
```
## Functions
```javascript
var greet = function(name) {
print(`hello ${name}`)
}
// Arrow functions
var double = x => x * 2
var add = (a, b) => a + b
```
## Push / Pop Syntax
```javascript
var a = [1, 2]
a[] = 3 // push: a is now [1, 2, 3]
var v = a[] // pop: v is 3, a is [1, 2]
```
## Control Flow
```javascript
if (x > 0) {
print("positive")
} else {
print("non-positive")
}
while (i < 10) {
i = i + 1
}
for (var i = 0; i < 10; i = i + 1) {
print(i)
}
// do-while
do {
i = i + 1
} while (i < 10)
```
## Error Handling
```javascript
var safe_divide = function(a, b) {
if (b == 0) {
disrupt
}
return a / b
} disruption {
return null
}
```
## Creator Functions (Polymorphic)
These examine argument types to decide behavior:
### array()
- `array(5)` — `[null, null, null, null, null]`
- `array(3, 0)` — `[0, 0, 0]`
- `array(5, i => i * 2)` — `[0, 2, 4, 6, 8]`
- `array([1,2])` — copy
- `array([1,2,3], x => x * 10)` — map: `[10, 20, 30]`
- `array([1,2], [3,4])` — concat: `[1, 2, 3, 4]`
- `array([1,2,3,4,5], 1, 4)` — slice: `[2, 3, 4]`
- `array({a: 1, b: 2})` — keys: `["a", "b"]`
- `array("hello")` — characters: `["h", "e", "l", "l", "o"]`
- `array("a,b,c", ",")` — split: `["a", "b", "c"]`
### text()
- `text([1, 2, 3], ", ")` — join: `"1, 2, 3"`
- `text(255, 16)` — radix: `"ff"`
- `text("hello", 0, 3)` — substring: `"hel"`
### number()
- `number("42")` — parse: `42`
- `number("ff", 16)` — radix: `255`
- `number(true)` — `1`
### record()
- `record({a: 1})` — copy
- `record({a: 1}, {b: 2})` — merge: `{a: 1, b: 2}`
- `record(["x", "y"])` — from keys: `{x: true, y: true}`
## All Intrinsic Functions
**Constants:** `false`, `true`, `null`, `pi`
**Type checks:** `is_array`, `is_blob`, `is_character`, `is_data`, `is_digit`, `is_false`, `is_fit`, `is_function`, `is_integer`, `is_letter`, `is_logical`, `is_lower`, `is_null`, `is_number`, `is_object`, `is_pattern`, `is_stone`, `is_text`, `is_true`, `is_upper`, `is_whitespace`
**Creators:** `array`, `logical`, `number`, `record`, `text`
**Math:** `abs`, `ceiling`, `floor`, `fraction`, `max`, `min`, `modulo`, `neg`, `remainder`, `round`, `sign`, `trunc`, `whole`
**Text:** `character`, `codepoint`, `ends_with`, `extract`, `format`, `lower`, `normalize`, `replace`, `search`, `starts_with`, `trim`, `upper`
**Array:** `every`, `filter`, `find`, `for`, `length`, `reduce`, `reverse`, `some`, `sort`
**Objects:** `meme`, `proto`, `isa`, `stone`
**Functions:** `apply`, `splat`
**I/O:** `print`
**Async:** `fallback`, `parallel`, `race`, `sequence`
**Misc:** `logical`, `not`, `use`
## Variable Scoping
Variables are scoped to the function body in which they are declared. There is no block scoping. All declarations must be at the top level of a function body (not nested inside `if`/`while`/`for`).
```javascript
var outer = function() {
var x = 10
var inner = function() {
// x is visible here via closure
print(x)
}
inner()
}
```
## Modules (.cm files)
Modules return a value (typically a record of exports). They are loaded with `use()`, cached, and frozen.
```javascript
// math_utils.cm
var square = x => x * x
var cube = x => x * x * x
return {square: square, cube: cube}
// main.ce
var utils = use('math_utils')
print(utils.square(5)) // 25
```
## Standard Library (loaded with use())
- `blob` — binary data (works with bits, not bytes)
- `time` — time constants and conversions
- `math` — trig, logarithms, roots (sub-modules: `math/radians`, `math/turns`)
- `json` — JSON encoding/decoding (`json.encode`, `json.decode`)
- `random` — random number generation
- `fd` — file descriptor operations (`fd.read`, `fd.write`, `fd.slurp`, `fd.stat`)
## Actor Model (.ce files)
Actors are independent execution units that never share memory. They communicate via message passing.
```javascript
// greeter.ce
$receiver(function(msg) {
$send(msg.from, {greeting: `hello ${msg.name}`})
})
```
### Actor Intrinsics ($ prefix)
- `$me` — this actor's address
- `$send(address, message)` — send a message
- `$start(script, env)` — start a new actor
- `$stop()` — stop this actor
- `$delay(ms)` — delay processing
- `$receiver(fn)` — set message handler
- `$clock(interval, message)` — periodic self-message
- `$portal(name)` — create named portal
- `$contact(name)` — connect to portal
- `$couple(address)` — lifecycle coupling
- `$unneeded(fn)` — cleanup callback
- `$connection(address)` — establish connection
- `$time_limit(ms)` — execution time limit
## Common Patterns
### Iteration
```javascript
// Preferred: use for() intrinsic
for([1, 2, 3], function(item, index) {
print(`${text(index)}: ${text(item)}`)
})
// C-style for loop
for (var i = 0; i < length(items); i = i + 1) {
print(items[i])
}
```
### String Building
```javascript
// Use backtick interpolation
var msg = `hello ${name}, you are ${text(age)} years old`
// Join array
var csv = text(values, ",")
```
### Record Manipulation
```javascript
var obj = {name: "alice", age: 30}
var keys = array(obj) // ["name", "age"]
var copy = record(obj) // mutable copy
var merged = record(obj, {role: "admin"})
```
### Error-Safe Operations
```javascript
var safe_parse = function(input) {
return number(input)
} disruption {
return null
}
```

View File

@@ -0,0 +1,30 @@
{
"comments": {
"lineComment": "//",
"blockComment": ["/*", "*/"]
},
"brackets": [
["{", "}"],
["[", "]"],
["(", ")"]
],
"autoClosingPairs": [
{ "open": "{", "close": "}" },
{ "open": "[", "close": "]" },
{ "open": "(", "close": ")" },
{ "open": "\"", "close": "\"", "notIn": ["string"] },
{ "open": "`", "close": "`", "notIn": ["string"] }
],
"surroundingPairs": [
["{", "}"],
["[", "]"],
["(", ")"],
["\"", "\""],
["`", "`"]
],
"indentationRules": {
"increaseIndentPattern": "^.*\\{[^}\"'`]*$",
"decreaseIndentPattern": "^\\s*\\}"
},
"wordPattern": "[a-zA-Z_$][a-zA-Z0-9_$?!]*"
}

View File

@@ -0,0 +1,113 @@
// Document analysis module.
// Call make(tokenize_mod, parse_mod) to get an analysis object.
var json = use('json')
// Create an analysis module bound to the tokenize and parse functions.
var make = function(tokenize_mod, parse_mod) {
// Tokenize and parse a document, storing the results.
var update = function(docs, uri, params) {
var src = params.src
var version = params.version
var tok_result = null
var ast = null
var errors = []
var doc = null
var do_tokenize = function() {
tok_result = tokenize_mod(src, uri)
} disruption {
errors = [{message: "Tokenize failed", line: 1, column: 1}]
}
var do_parse = function() {
ast = parse_mod(tok_result.tokens, src, uri, tokenize_mod)
} disruption {
// parse_mod may set errors on ast even on partial failure
}
do_tokenize()
if (tok_result != null) {
do_parse()
if (ast != null && ast.errors != null) {
errors = ast.errors
}
}
doc = {
uri: uri,
text: src,
version: version,
tokens: (tok_result != null) ? tok_result.tokens : [],
ast: ast,
errors: errors
}
docs[uri] = doc
return doc
}
// Remove a document from the store.
var remove = function(docs, uri) {
delete docs[uri]
}
// Convert parse errors to LSP diagnostics.
var diagnostics = function(doc) {
var result = []
var _i = 0
var e = null
var line = null
var col = null
while (_i < length(doc.errors)) {
e = doc.errors[_i]
line = (e.line != null) ? e.line - 1 : 0
col = (e.column != null) ? e.column - 1 : 0
result[] = {
range: {
start: {line: line, character: col},
end: {line: line, character: col + 1}
},
severity: 1,
source: "pit",
message: e.message
}
_i = _i + 1
}
return result
}
// Find the token at a given line/column (0-based).
var token_at = function(doc, line, col) {
var tokens = doc.tokens
var _i = 0
var tok = null
while (_i < length(tokens)) {
tok = tokens[_i]
if (tok.from_row == line && tok.from_column <= col && tok.to_column >= col) {
return tok
}
if (tok.from_row < line && tok.to_row > line) {
return tok
}
if (tok.from_row < line && tok.to_row == line && tok.to_column >= col) {
return tok
}
if (tok.from_row == line && tok.to_row > line && tok.from_column <= col) {
return tok
}
_i = _i + 1
}
return null
}
return {
update: update,
remove: remove,
diagnostics: diagnostics,
token_at: token_at
}
}
return make

View File

@@ -0,0 +1,133 @@
// Completion provider for the ƿit LSP.
// CompletionItemKind constants (LSP spec)
def KIND_FUNCTION = 3
def KIND_VARIABLE = 6
def KIND_KEYWORD = 14
def KIND_CONSTANT = 21
// All intrinsic function names
def intrinsic_functions = [
"abs", "apply", "array", "ceiling", "character", "codepoint",
"ends_with", "every", "extract", "fallback", "filter", "find",
"floor", "format", "fraction",
"is_array", "is_blob", "is_character", "is_data", "is_digit",
"is_false", "is_fit", "is_function", "is_integer", "is_letter",
"is_logical", "is_lower", "is_null", "is_number", "is_object",
"is_pattern", "is_stone", "is_text", "is_true", "is_upper",
"is_whitespace",
"length", "logical", "lower", "max", "min", "modulo",
"neg", "normalize", "not", "number",
"parallel", "print", "race", "record", "reduce", "remainder",
"replace", "reverse", "round",
"search", "sequence", "sign", "some", "sort", "starts_with",
"stone", "text", "trim", "trunc", "upper", "whole",
"meme", "proto", "isa", "splat", "use"
]
// Keywords that can be completed
def keywords = [
"var", "def", "if", "else", "for", "while", "do",
"function", "return", "go", "break", "continue",
"disrupt", "disruption", "delete", "in", "this",
"null", "true", "false"
]
// Actor intrinsics (only in .ce files)
def actor_intrinsics = [
"$me", "$send", "$start", "$stop", "$delay",
"$receiver", "$clock", "$portal", "$contact",
"$couple", "$unneeded", "$connection", "$time_limit"
]
// Walk AST scopes to find variables visible at a position.
var collect_scope_vars = function(doc, line, col) {
var vars = []
var ast = doc.ast
var _i = 0
var _j = 0
var scope = null
var v = null
if (ast == null || ast.scopes == null) {
return vars
}
// Collect variables from all scopes (simplified: return all declared vars)
while (_i < length(ast.scopes)) {
scope = ast.scopes[_i]
if (scope.vars != null) {
_j = 0
while (_j < length(scope.vars)) {
v = scope.vars[_j]
if (v.name != null) {
vars[] = {
label: v.name,
kind: (v.is_const == true) ? KIND_CONSTANT : KIND_VARIABLE,
detail: (v.is_const == true) ? "def" : "var"
}
}
_j = _j + 1
}
}
_i = _i + 1
}
return vars
}
// Provide completions for a document at a position.
var complete = function(doc, line, col) {
var items = []
var _i = 0
var is_actor = ends_with(doc.uri, ".ce")
// Intrinsic functions
_i = 0
while (_i < length(intrinsic_functions)) {
items[] = {
label: intrinsic_functions[_i],
kind: KIND_FUNCTION,
detail: "intrinsic"
}
_i = _i + 1
}
// Keywords
_i = 0
while (_i < length(keywords)) {
items[] = {
label: keywords[_i],
kind: KIND_KEYWORD,
detail: "keyword"
}
_i = _i + 1
}
// Actor intrinsics (only for .ce files)
if (is_actor) {
_i = 0
while (_i < length(actor_intrinsics)) {
items[] = {
label: actor_intrinsics[_i],
kind: KIND_FUNCTION,
detail: "actor intrinsic"
}
_i = _i + 1
}
}
// Variables from scope analysis
var scope_vars = collect_scope_vars(doc, line, col)
_i = 0
while (_i < length(scope_vars)) {
items[] = scope_vars[_i]
_i = _i + 1
}
return items
}
return {
complete: complete
}

461
editors/vscode/lsp/hover.cm Normal file
View File

@@ -0,0 +1,461 @@
// Hover provider for the ƿit LSP.
// Shows documentation for intrinsic functions and variable info.
// Intrinsic function documentation database.
// Each entry: {signature, description}
def intrinsic_docs = {
abs: {
signature: "abs(number)",
description: "Absolute value. Returns null for non-numbers."
},
apply: {
signature: "apply(function, array)",
description: "Execute the function, passing array elements as input values."
},
array: {
signature: "array(value, ...)",
description: "Create arrays. Polymorphic: array(number) creates sized array, array(array) copies, array(array, fn) maps, array(text) splits into characters, array(text, sep) splits by separator."
},
ceiling: {
signature: "ceiling(number, place)",
description: "Round up. If place is 0 or null, round to smallest integer >= number."
},
character: {
signature: "character(value)",
description: "If text, returns the first character. If a non-negative integer, returns the character from that codepoint."
},
codepoint: {
signature: "codepoint(text)",
description: "Returns the codepoint number of the first character."
},
ends_with: {
signature: "ends_with(text, suffix)",
description: "Returns true if the text ends with the given suffix."
},
every: {
signature: "every(array, function)",
description: "Returns true if every element satisfies the predicate."
},
extract: {
signature: "extract(text, pattern, from, to)",
description: "Match text to pattern. Returns a record of saved fields, or null if no match."
},
fallback: {
signature: "fallback(requestor_array)",
description: "Returns a requestor that tries each requestor in order until one succeeds."
},
filter: {
signature: "filter(array, function)",
description: "Returns a new array containing elements for which function returns true."
},
find: {
signature: "find(array, function, reverse, from)",
description: "Returns the element number where function returns true, or null if not found. If second arg is not a function, compares directly."
},
floor: {
signature: "floor(number, place)",
description: "Round down. If place is 0 or null, round to greatest integer <= number."
},
format: {
signature: "format(text, collection, transformer)",
description: "Substitute {key} placeholders in text with values from a collection (array or record)."
},
fraction: {
signature: "fraction(number)",
description: "Returns the fractional part of a number."
},
is_array: {
signature: "is_array(value)",
description: "Returns true if the value is an array."
},
is_blob: {
signature: "is_blob(value)",
description: "Returns true if the value is a blob."
},
is_character: {
signature: "is_character(value)",
description: "Returns true if the value is a single character."
},
is_data: {
signature: "is_data(value)",
description: "Returns true if the value is data (not a function)."
},
is_digit: {
signature: "is_digit(value)",
description: "Returns true if the value is a digit character."
},
is_false: {
signature: "is_false(value)",
description: "Returns true if the value is false."
},
is_fit: {
signature: "is_fit(value)",
description: "Returns true if the value is a fit integer."
},
is_function: {
signature: "is_function(value)",
description: "Returns true if the value is a function."
},
is_integer: {
signature: "is_integer(value)",
description: "Returns true if the value is an integer."
},
is_letter: {
signature: "is_letter(value)",
description: "Returns true if the value is a letter character."
},
is_logical: {
signature: "is_logical(value)",
description: "Returns true if the value is a logical (boolean)."
},
is_lower: {
signature: "is_lower(value)",
description: "Returns true if the value is a lowercase character."
},
is_null: {
signature: "is_null(value)",
description: "Returns true if the value is null."
},
is_number: {
signature: "is_number(value)",
description: "Returns true if the value is a number."
},
is_object: {
signature: "is_object(value)",
description: "Returns true if the value is an object (record)."
},
is_pattern: {
signature: "is_pattern(value)",
description: "Returns true if the value is a pattern (regex)."
},
is_stone: {
signature: "is_stone(value)",
description: "Returns true if the value is frozen (stoned)."
},
is_text: {
signature: "is_text(value)",
description: "Returns true if the value is text."
},
is_true: {
signature: "is_true(value)",
description: "Returns true if the value is true."
},
is_upper: {
signature: "is_upper(value)",
description: "Returns true if the value is an uppercase character."
},
is_whitespace: {
signature: "is_whitespace(value)",
description: "Returns true if the value is a whitespace character."
},
length: {
signature: "length(value)",
description: "Array: number of elements. Text: number of codepoints. Function: arity. Blob: number of bits. Record: record.length()."
},
logical: {
signature: "logical(value)",
description: "Convert to logical. 0/false/null/\"false\" produce false; 1/true/\"true\" produce true."
},
lower: {
signature: "lower(text)",
description: "Returns text with all uppercase characters converted to lowercase."
},
max: {
signature: "max(number, number)",
description: "Returns the larger of two numbers."
},
min: {
signature: "min(number, number)",
description: "Returns the smaller of two numbers."
},
modulo: {
signature: "modulo(dividend, divisor)",
description: "Result has the sign of the divisor."
},
neg: {
signature: "neg(number)",
description: "Negate. Reverse the sign of a number."
},
normalize: {
signature: "normalize(text)",
description: "Unicode normalize."
},
not: {
signature: "not(logical)",
description: "Returns the opposite logical. Returns null for non-logicals."
},
number: {
signature: "number(value, radix_or_format)",
description: "Convert to number. Polymorphic: number(logical), number(text), number(text, radix), number(text, format)."
},
parallel: {
signature: "parallel(requestor_array, throttle, need)",
description: "Start all requestors concurrently. Optional throttle limits concurrency; optional need specifies minimum successes."
},
print: {
signature: "print(value)",
description: "Print a value to standard output."
},
race: {
signature: "race(requestor_array, throttle, need)",
description: "Like parallel but returns as soon as needed results are obtained. Default need is 1."
},
record: {
signature: "record(value, ...)",
description: "Create records. Polymorphic: record(record) copies, record(record, record) merges, record(array) creates from keys."
},
reduce: {
signature: "reduce(array, function, initial, reverse)",
description: "Reduce an array to a single value by applying a function to pairs of elements."
},
remainder: {
signature: "remainder(dividend, divisor)",
description: "For fit integers: dividend - ((dividend // divisor) * divisor)."
},
replace: {
signature: "replace(text, target, replacement, limit)",
description: "Return text with target replaced. Target can be text or pattern. Replacement can be text or function."
},
reverse: {
signature: "reverse(array)",
description: "Returns a new array with elements in the opposite order."
},
round: {
signature: "round(number, place)",
description: "Round to nearest."
},
search: {
signature: "search(text, target, from)",
description: "Search text for target. Returns character position or null."
},
sequence: {
signature: "sequence(requestor_array)",
description: "Process requestors in order. Each result becomes input to the next."
},
sign: {
signature: "sign(number)",
description: "Returns -1, 0, or 1."
},
some: {
signature: "some(array, function)",
description: "Returns true if any element satisfies the predicate."
},
sort: {
signature: "sort(array, select)",
description: "Returns a new sorted array. Sort keys must be all numbers or all texts. Ascending and stable."
},
starts_with: {
signature: "starts_with(text, prefix)",
description: "Returns true if the text starts with the given prefix."
},
stone: {
signature: "stone(value)",
description: "Petrify the value, making it permanently immutable. Deep freeze."
},
text: {
signature: "text(value, ...)",
description: "Convert to text. Polymorphic: text(array, sep) joins, text(number, radix/format) formats, text(text, from, to) substrings."
},
trim: {
signature: "trim(text, reject)",
description: "Remove characters from both ends. Default removes whitespace."
},
trunc: {
signature: "trunc(number, place)",
description: "Truncate toward zero."
},
upper: {
signature: "upper(text)",
description: "Returns text with all lowercase characters converted to uppercase."
},
whole: {
signature: "whole(number)",
description: "Returns the whole part of a number."
},
meme: {
signature: "meme()",
description: "Create a new meme (prototype chain marker)."
},
proto: {
signature: "proto(object, meme)",
description: "Set the prototype meme of an object."
},
isa: {
signature: "isa(object, meme)",
description: "Returns true if the object has the given meme in its prototype chain."
},
splat: {
signature: "splat(function, array)",
description: "Call function with array elements as separate arguments."
},
use: {
signature: "use(path)",
description: "Load a module. Returns the module's exported value. Modules are cached and frozen."
},
pi: {
signature: "pi",
description: "An approximation of circumference / diameter: 3.1415926535897932."
}
}
// Actor intrinsic documentation
def actor_docs = {
"$me": {
signature: "$me",
description: "The address of this actor."
},
"$send": {
signature: "$send(address, message)",
description: "Send a message to another actor."
},
"$start": {
signature: "$start(script, env)",
description: "Start a new actor from a script path."
},
"$stop": {
signature: "$stop()",
description: "Stop this actor."
},
"$delay": {
signature: "$delay(milliseconds)",
description: "Delay processing for a number of milliseconds."
},
"$receiver": {
signature: "$receiver(function)",
description: "Set the message receiver function for this actor."
},
"$clock": {
signature: "$clock(interval, message)",
description: "Send a message to self at regular intervals."
},
"$portal": {
signature: "$portal(name)",
description: "Create a named portal for inter-actor communication."
},
"$contact": {
signature: "$contact(portal_name)",
description: "Connect to a named portal."
},
"$couple": {
signature: "$couple(address)",
description: "Couple with another actor for lifecycle management."
},
"$unneeded": {
signature: "$unneeded(function)",
description: "Set a function to be called when this actor is no longer needed."
},
"$connection": {
signature: "$connection(address)",
description: "Establish a connection with another actor."
},
"$time_limit": {
signature: "$time_limit(milliseconds)",
description: "Set a time limit for this actor's execution."
}
}
// Provide hover info for a token.
var hover = function(doc, line, col, token_at) {
var tok = token_at(doc, line, col)
var info = null
var name = null
var _i = 0
var _j = 0
var scope = null
var v = null
if (tok == null) {
return null
}
// Check intrinsic functions
if (tok.kind == "name" && tok.value != null) {
name = tok.value
info = intrinsic_docs[name]
if (info != null) {
return {
contents: {
kind: "markdown",
value: `**${info.signature}**\n\n${info.description}`
}
}
}
}
// Check actor intrinsics ($name)
if (tok.value != null && starts_with(tok.value, "$")) {
info = actor_docs[tok.value]
if (info != null) {
return {
contents: {
kind: "markdown",
value: `**${info.signature}**\n\n${info.description}`
}
}
}
}
// Check keywords
if (tok.kind == "var" || tok.kind == "def") {
return {
contents: {
kind: "markdown",
value: (tok.kind == "var")
? "**var** — Declare a mutable variable."
: "**def** — Declare a constant."
}
}
}
if (tok.kind == "disrupt") {
return {
contents: {
kind: "markdown",
value: "**disrupt** — Raise an error. Use with **disruption** block to handle errors."
}
}
}
if (tok.kind == "disruption") {
return {
contents: {
kind: "markdown",
value: "**disruption** — Error handling block. Catches errors raised by **disrupt**."
}
}
}
// User variable: show declaration info from scope
if (tok.kind == "name" && tok.value != null && doc.ast != null && doc.ast.scopes != null) {
_i = 0
while (_i < length(doc.ast.scopes)) {
scope = doc.ast.scopes[_i]
if (scope.vars != null) {
_j = 0
while (_j < length(scope.vars)) {
v = scope.vars[_j]
if (v.name == tok.value) {
return {
contents: {
kind: "markdown",
value: (v.is_const == true)
? `**def** ${v.name}`
: `**var** ${v.name}`
}
}
}
_j = _j + 1
}
}
_i = _i + 1
}
}
return null
}
return {
hover: hover,
intrinsic_docs: intrinsic_docs,
actor_docs: actor_docs
}

209
editors/vscode/lsp/lsp.ce Normal file
View File

@@ -0,0 +1,209 @@
// ƿit Language Server Protocol (LSP) main loop.
// Communicates via JSON-RPC over stdin/stdout.
var fd = use('fd')
var json_mod = use('json')
var protocol = use('protocol')
var analysis_make = use('analysis')
var completions = use('completions')
var hover_mod = use('hover')
var symbols = use('symbols')
// Get tokenize_mod and parse_mod from the environment.
// These are the same functions the compiler uses internally.
var tokenize_mod = use('tokenize')
var parse_mod = use('parse')
// Create analysis module bound to tokenize/parse
var analysis = analysis_make(tokenize_mod, parse_mod)
// Document store: URI -> {text, version, ast, tokens, errors}
var docs = {}
// Log to stderr for debugging (does not interfere with protocol).
var log = function(msg) {
fd.write(2, `[pit-lsp] ${msg}\n`)
}
// Publish diagnostics for a document.
var publish_diagnostics = function(uri, doc) {
var diags = analysis.diagnostics(doc)
protocol.notify("textDocument/publishDiagnostics", {
uri: uri,
diagnostics: diags
})
}
// Parse a document and publish diagnostics.
var parse_and_notify = function(uri, src, version) {
var doc = analysis.update(docs, uri, {src: src, version: version})
publish_diagnostics(uri, doc)
}
// Handle initialize request.
var handle_initialize = function(id, params) {
protocol.respond(id, {
capabilities: {
textDocumentSync: {
openClose: true,
change: 1,
save: {includeText: true}
},
completionProvider: {
triggerCharacters: [".", "$"]
},
hoverProvider: true,
definitionProvider: true,
documentSymbolProvider: true
},
serverInfo: {
name: "pit-lsp",
version: "0.1.0"
}
})
}
// Handle textDocument/didOpen notification.
var handle_did_open = function(params) {
var td = params.textDocument
parse_and_notify(td.uri, td.text, td.version)
}
// Handle textDocument/didChange notification (full text sync).
var handle_did_change = function(params) {
var td = params.textDocument
var changes = params.contentChanges
if (length(changes) > 0) {
parse_and_notify(td.uri, changes[0].text, td.version)
}
}
// Handle textDocument/didClose notification.
var handle_did_close = function(params) {
var uri = params.textDocument.uri
analysis.remove(docs, uri)
// Clear diagnostics
protocol.notify("textDocument/publishDiagnostics", {
uri: uri,
diagnostics: []
})
}
// Handle textDocument/didSave notification.
var handle_did_save = function(params) {
var td = params.textDocument
if (params.text != null) {
parse_and_notify(td.uri, params.text, td.version)
}
}
// Handle textDocument/completion request.
var handle_completion = function(id, params) {
var uri = params.textDocument.uri
var pos = params.position
var doc = docs[uri]
var items = []
if (doc != null) {
items = completions.complete(doc, pos.line, pos.character)
}
protocol.respond(id, items)
}
// Handle textDocument/hover request.
var handle_hover = function(id, params) {
var uri = params.textDocument.uri
var pos = params.position
var doc = docs[uri]
var result = null
if (doc != null) {
result = hover_mod.hover(doc, pos.line, pos.character, analysis.token_at)
}
protocol.respond(id, result)
}
// Handle textDocument/definition request.
var handle_definition = function(id, params) {
var uri = params.textDocument.uri
var pos = params.position
var doc = docs[uri]
var result = null
if (doc != null) {
result = symbols.definition(doc, pos.line, pos.character, analysis.token_at)
}
protocol.respond(id, result)
}
// Handle textDocument/documentSymbol request.
var handle_document_symbol = function(id, params) {
var uri = params.textDocument.uri
var doc = docs[uri]
var result = []
if (doc != null) {
result = symbols.document_symbols(doc)
}
protocol.respond(id, result)
}
// Dispatch a single message. Wrapped in a function for disruption handling.
var dispatch_message = function(msg) {
var method = msg.method
if (method == "initialize") {
handle_initialize(msg.id, msg.params)
} else if (method == "initialized") {
// no-op
} else if (method == "textDocument/didOpen") {
handle_did_open(msg.params)
} else if (method == "textDocument/didChange") {
handle_did_change(msg.params)
} else if (method == "textDocument/didClose") {
handle_did_close(msg.params)
} else if (method == "textDocument/didSave") {
handle_did_save(msg.params)
} else if (method == "textDocument/completion") {
handle_completion(msg.id, msg.params)
} else if (method == "textDocument/hover") {
handle_hover(msg.id, msg.params)
} else if (method == "textDocument/definition") {
handle_definition(msg.id, msg.params)
} else if (method == "textDocument/documentSymbol") {
handle_document_symbol(msg.id, msg.params)
} else if (method == "shutdown") {
protocol.respond(msg.id, null)
return "shutdown"
} else if (method == "exit") {
return "exit"
} else {
if (msg.id != null) {
protocol.respond_error(msg.id, -32601, `Method not found: ${method}`)
}
}
return null
} disruption {
log(`error handling ${msg.method}`)
if (msg.id != null) {
protocol.respond_error(msg.id, -32603, `Internal error handling ${msg.method}`)
}
return null
}
// Main loop.
log("starting")
var running = true
var msg = null
var result = null
while (running) {
msg = protocol.read_message()
if (msg == null) {
running = false
break
}
result = dispatch_message(msg)
if (result == "exit") {
running = false
}
}
log("stopped")

View File

@@ -0,0 +1,102 @@
// JSON-RPC protocol helpers for LSP communication over stdin/stdout.
// Reads Content-Length framed messages from stdin, writes to stdout.
var fd = use('fd')
var json = use('json')
// Read a single JSON-RPC message from stdin.
// Protocol: "Content-Length: N\r\n\r\n" followed by N bytes of JSON.
var read_message = function() {
var header = ""
var ch = null
var content_length = null
var body = null
var total = 0
var chunk = null
// Read header byte by byte until we hit \r\n\r\n
while (true) {
ch = fd.read(0, 1)
if (ch == null) {
return null
}
header = header + text(ch)
if (ends_with(header, "\r\n\r\n")) {
break
}
}
// Parse Content-Length from header
var lines = array(header, "\r\n")
var _i = 0
while (_i < length(lines)) {
if (starts_with(lines[_i], "Content-Length:")) {
content_length = number(trim(text(lines[_i], 16)))
}
_i = _i + 1
}
if (content_length == null) {
return null
}
// Read exactly content_length bytes
body = ""
total = 0
while (total < content_length) {
chunk = fd.read(0, content_length - total)
if (chunk == null) {
return null
}
chunk = text(chunk)
body = body + chunk
total = total + length(chunk)
}
return json.decode(body)
}
// Send a JSON-RPC message to stdout.
var send_message = function(msg) {
var body = json.encode(msg)
var header = `Content-Length: ${text(length(body))}\r\n\r\n`
fd.write(1, header + body)
}
// Send a JSON-RPC response for a request.
var respond = function(id, result) {
send_message({
jsonrpc: "2.0",
id: id,
result: result
})
}
// Send a JSON-RPC error response.
var respond_error = function(id, code, message) {
send_message({
jsonrpc: "2.0",
id: id,
error: {
code: code,
message: message
}
})
}
// Send a JSON-RPC notification (no id).
var notify = function(method, params) {
send_message({
jsonrpc: "2.0",
method: method,
params: params
})
}
return {
read_message: read_message,
send_message: send_message,
respond: respond,
respond_error: respond_error,
notify: notify
}

View File

@@ -0,0 +1,238 @@
// Document symbols and go-to-definition provider for the ƿit LSP.
// SymbolKind constants (LSP spec)
def KIND_FUNCTION = 12
def KIND_VARIABLE = 13
def KIND_CONSTANT = 14
// Walk AST to extract document symbols (top-level vars/defs and functions).
var document_symbols = function(doc) {
var symbols = []
var ast = doc.ast
var _i = 0
var _j = 0
var stmt = null
var decl = null
var name = null
var kind = null
var range = null
if (ast == null || ast.statements == null) {
return symbols
}
while (_i < length(ast.statements)) {
stmt = ast.statements[_i]
if (stmt.kind == "var" || stmt.kind == "def") {
name = null
kind = KIND_VARIABLE
if (stmt.left != null && stmt.left.name != null) {
name = stmt.left.name
}
if (stmt.kind == "def") {
kind = KIND_CONSTANT
}
if (stmt.right != null && (stmt.right.kind == "function" || stmt.right.kind == "arrow function")) {
kind = KIND_FUNCTION
}
if (name != null) {
range = {
start: {line: stmt.from_row, character: stmt.from_column},
end: {line: stmt.to_row, character: stmt.to_column}
}
symbols[] = {
name: name,
kind: kind,
range: range,
selectionRange: {
start: {line: stmt.left.from_row, character: stmt.left.from_column},
end: {line: stmt.left.to_row, character: stmt.left.to_column}
}
}
}
}
if (stmt.kind == "var_list" && stmt.list != null) {
_j = 0
while (_j < length(stmt.list)) {
decl = stmt.list[_j]
if (decl.left != null && decl.left.name != null) {
kind = (decl.kind == "def") ? KIND_CONSTANT : KIND_VARIABLE
if (decl.right != null && (decl.right.kind == "function" || decl.right.kind == "arrow function")) {
kind = KIND_FUNCTION
}
range = {
start: {line: decl.from_row, character: decl.from_column},
end: {line: decl.to_row, character: decl.to_column}
}
symbols[] = {
name: decl.left.name,
kind: kind,
range: range,
selectionRange: {
start: {line: decl.left.from_row, character: decl.left.from_column},
end: {line: decl.left.to_row, character: decl.left.to_column}
}
}
}
_j = _j + 1
}
}
_i = _i + 1
}
return symbols
}
// Find the declaration location of a name at a given position.
var definition = function(doc, line, col, token_at) {
var tok = token_at(doc, line, col)
var ast = doc.ast
var name = null
var _i = 0
var _j = 0
var scope = null
var v = null
var decl = null
if (tok == null || tok.kind != "name" || tok.value == null) {
return null
}
name = tok.value
if (ast == null) {
return null
}
// Search through scopes for the variable declaration
if (ast.scopes != null) {
_i = 0
while (_i < length(ast.scopes)) {
scope = ast.scopes[_i]
if (scope.vars != null) {
_j = 0
while (_j < length(scope.vars)) {
v = scope.vars[_j]
if (v.name == name) {
decl = find_declaration(ast.statements, name)
if (decl != null) {
return {
uri: doc.uri,
range: {
start: {line: decl.from_row, character: decl.from_column},
end: {line: decl.to_row, character: decl.to_column}
}
}
}
}
_j = _j + 1
}
}
_i = _i + 1
}
}
// Fallback: walk statements for var/def with this name
decl = find_declaration(ast.statements, name)
if (decl != null) {
return {
uri: doc.uri,
range: {
start: {line: decl.from_row, character: decl.from_column},
end: {line: decl.to_row, character: decl.to_column}
}
}
}
return null
}
// Recursively search statements for a var/def declaration of a given name.
var find_declaration = function(statements, name) {
var _i = 0
var _j = 0
var stmt = null
var result = null
if (statements == null) {
return null
}
while (_i < length(statements)) {
stmt = statements[_i]
// Direct var/def
if ((stmt.kind == "var" || stmt.kind == "def")
&& stmt.left != null && stmt.left.name == name) {
return stmt
}
// var_list
if (stmt.kind == "var_list" && stmt.list != null) {
_j = 0
while (_j < length(stmt.list)) {
if (stmt.list[_j].left != null && stmt.list[_j].left.name == name) {
return stmt.list[_j]
}
_j = _j + 1
}
}
// Recurse into blocks
if (stmt.statements != null) {
result = find_declaration(stmt.statements, name)
if (result != null) {
return result
}
}
// if/else
if (stmt.kind == "if") {
if (stmt.then != null && stmt.then.statements != null) {
result = find_declaration(stmt.then.statements, name)
if (result != null) {
return result
}
}
if (stmt.else != null && stmt.else.statements != null) {
result = find_declaration(stmt.else.statements, name)
if (result != null) {
return result
}
}
}
// Function body
if ((stmt.kind == "function" || stmt.kind == "arrow function") && stmt.statements != null) {
result = find_declaration(stmt.statements, name)
if (result != null) {
return result
}
}
// var/def with function right side
if ((stmt.kind == "var" || stmt.kind == "def") && stmt.right != null) {
if ((stmt.right.kind == "function" || stmt.right.kind == "arrow function") && stmt.right.statements != null) {
result = find_declaration(stmt.right.statements, name)
if (result != null) {
return result
}
}
}
_i = _i + 1
}
return null
}
return {
document_symbols: document_symbols,
definition: definition
}

View File

@@ -0,0 +1,62 @@
{
"name": "pit-language",
"displayName": "ƿit Language",
"description": "Language support for ƿit (.ce/.cm) — syntax highlighting, diagnostics, completions, hover, and go-to-definition",
"version": "0.1.0",
"publisher": "pit-lang",
"engines": {
"vscode": "^1.75.0"
},
"categories": [
"Programming Languages"
],
"activationEvents": [
"onLanguage:pit"
],
"main": "./out/extension.js",
"contributes": {
"languages": [
{
"id": "pit",
"aliases": [
"ƿit",
"pit"
],
"extensions": [
".ce",
".cm"
],
"configuration": "./language-configuration.json"
}
],
"grammars": [
{
"language": "pit",
"scopeName": "source.pit",
"path": "./syntaxes/pit.tmLanguage.json"
}
],
"configuration": {
"title": "ƿit",
"properties": {
"pit.cellPath": {
"type": "string",
"default": "cell",
"description": "Path to the cell executable"
}
}
}
},
"scripts": {
"compile": "tsc -p ./",
"watch": "tsc -watch -p ./"
},
"dependencies": {
"vscode-languageclient": "^9.0.0",
"vscode-languageserver-protocol": "^3.17.0"
},
"devDependencies": {
"@types/vscode": "^1.75.0",
"typescript": "^5.0.0"
}
}

View File

@@ -0,0 +1,44 @@
import * as path from "path";
import { workspace, ExtensionContext } from "vscode";
import {
LanguageClient,
LanguageClientOptions,
ServerOptions,
} from "vscode-languageclient/node";
let client: LanguageClient;
export function activate(context: ExtensionContext) {
const config = workspace.getConfiguration("pit");
const cellPath = config.get<string>("cellPath", "cell");
const lspDir = path.join(context.extensionPath, "lsp");
const serverOptions: ServerOptions = {
command: cellPath,
args: ["lsp/lsp"],
options: { cwd: lspDir },
};
const clientOptions: LanguageClientOptions = {
documentSelector: [{ scheme: "file", language: "pit" }],
synchronize: {
fileEvents: workspace.createFileSystemWatcher("**/*.{ce,cm}"),
},
};
client = new LanguageClient(
"pitLanguageServer",
"ƿit Language Server",
serverOptions,
clientOptions
);
client.start();
}
export function deactivate(): Thenable<void> | undefined {
if (!client) {
return undefined;
}
return client.stop();
}

View File

@@ -0,0 +1,160 @@
{
"$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
"name": "pit",
"scopeName": "source.pit",
"patterns": [
{ "include": "#comment-line" },
{ "include": "#comment-block" },
{ "include": "#string-template" },
{ "include": "#string-double" },
{ "include": "#regexp" },
{ "include": "#keyword-control" },
{ "include": "#keyword-error" },
{ "include": "#storage-type" },
{ "include": "#constant-language" },
{ "include": "#variable-language" },
{ "include": "#actor-intrinsic" },
{ "include": "#keyword-operator" },
{ "include": "#arrow-function" },
{ "include": "#support-function" },
{ "include": "#constant-numeric-hex" },
{ "include": "#constant-numeric-binary" },
{ "include": "#constant-numeric-octal" },
{ "include": "#constant-numeric" },
{ "include": "#punctuation" }
],
"repository": {
"comment-line": {
"name": "comment.line.double-slash.pit",
"match": "//.*$"
},
"comment-block": {
"name": "comment.block.pit",
"begin": "/\\*",
"end": "\\*/",
"beginCaptures": { "0": { "name": "punctuation.definition.comment.begin.pit" } },
"endCaptures": { "0": { "name": "punctuation.definition.comment.end.pit" } }
},
"string-double": {
"name": "string.quoted.double.pit",
"begin": "\"",
"end": "\"",
"beginCaptures": { "0": { "name": "punctuation.definition.string.begin.pit" } },
"endCaptures": { "0": { "name": "punctuation.definition.string.end.pit" } },
"patterns": [
{
"name": "constant.character.escape.pit",
"match": "\\\\(?:[\"\\\\bfnrt/]|u[0-9a-fA-F]{4})"
}
]
},
"string-template": {
"name": "string.template.pit",
"begin": "`",
"end": "`",
"beginCaptures": { "0": { "name": "punctuation.definition.string.template.begin.pit" } },
"endCaptures": { "0": { "name": "punctuation.definition.string.template.end.pit" } },
"patterns": [
{
"name": "constant.character.escape.pit",
"match": "\\\\(?:[`\\\\bfnrt/$]|u[0-9a-fA-F]{4})"
},
{
"name": "meta.template.expression.pit",
"begin": "\\$\\{",
"end": "\\}",
"beginCaptures": { "0": { "name": "punctuation.definition.template-expression.begin.pit" } },
"endCaptures": { "0": { "name": "punctuation.definition.template-expression.end.pit" } },
"patterns": [
{ "include": "source.pit" }
]
}
]
},
"regexp": {
"name": "string.regexp.pit",
"begin": "(?<=[=(:,;!&|?~^>]|^|return|disrupt)\\s*(/(?![/*]))",
"end": "/([gimsuvy]*)",
"beginCaptures": { "1": { "name": "punctuation.definition.string.begin.pit" } },
"endCaptures": { "1": { "name": "keyword.other.pit" } },
"patterns": [
{
"name": "constant.character.escape.pit",
"match": "\\\\."
}
]
},
"keyword-control": {
"name": "keyword.control.pit",
"match": "\\b(if|else|for|while|do|break|continue|return|go)\\b"
},
"keyword-error": {
"name": "keyword.control.error.pit",
"match": "\\b(disrupt|disruption)\\b"
},
"storage-type": {
"patterns": [
{
"name": "storage.type.pit",
"match": "\\b(var|def)\\b"
},
{
"name": "storage.type.function.pit",
"match": "\\bfunction\\b"
}
]
},
"constant-language": {
"name": "constant.language.pit",
"match": "\\b(null|true|false)\\b"
},
"variable-language": {
"name": "variable.language.this.pit",
"match": "\\bthis\\b"
},
"actor-intrinsic": {
"name": "variable.language.actor.pit",
"match": "\\$[a-zA-Z_][a-zA-Z0-9_]*"
},
"keyword-operator": {
"name": "keyword.operator.pit",
"match": "\\b(delete|in|typeof)\\b"
},
"arrow-function": {
"name": "storage.type.function.arrow.pit",
"match": "=>"
},
"support-function": {
"name": "support.function.pit",
"match": "\\b(abs|apply|array|ceiling|character|codepoint|ends_with|every|extract|fallback|filter|find|floor|for|format|fraction|is_array|is_blob|is_character|is_data|is_digit|is_false|is_fit|is_function|is_integer|is_letter|is_logical|is_lower|is_null|is_number|is_object|is_pattern|is_stone|is_text|is_true|is_upper|is_whitespace|length|logical|lower|max|min|modulo|neg|normalize|not|number|parallel|print|race|record|reduce|remainder|replace|reverse|round|search|sequence|sign|some|sort|starts_with|stone|text|trim|trunc|upper|whole|meme|proto|isa|splat|use)(?=\\s*\\()"
},
"constant-numeric-hex": {
"name": "constant.numeric.hex.pit",
"match": "\\b0[xX][0-9a-fA-F]+\\b"
},
"constant-numeric-binary": {
"name": "constant.numeric.binary.pit",
"match": "\\b0[bB][01]+\\b"
},
"constant-numeric-octal": {
"name": "constant.numeric.octal.pit",
"match": "\\b0[oO][0-7]+\\b"
},
"constant-numeric": {
"name": "constant.numeric.pit",
"match": "\\b[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)?\\b"
},
"punctuation": {
"patterns": [
{
"name": "punctuation.separator.comma.pit",
"match": ","
},
{
"name": "punctuation.terminator.statement.pit",
"match": ";"
}
]
}
}
}

View File

@@ -0,0 +1,13 @@
{
"compilerOptions": {
"module": "commonjs",
"target": "ES2020",
"outDir": "out",
"lib": ["ES2020"],
"sourceMap": true,
"rootDir": "src",
"strict": true
},
"include": ["src"],
"exclude": ["node_modules", "out"]
}

21
fd.cm
View File

@@ -1,4 +1,4 @@
var fd = this
var fd = use('internal/fd')
var wildstar = use('wildstar')
function last_pos(str, sep) {
@@ -12,11 +12,11 @@ function last_pos(str, sep) {
// Helper to join paths
function join_paths(base, rel) {
base = replace(base, /\/+$/, "")
rel = replace(rel, /^\/+/, "")
if (!base) return rel
if (!rel) return base
return base + "/" + rel
var b = replace(base, /\/+$/, "")
var r = replace(rel, /^\/+/, "")
if (!b) return r
if (!r) return b
return b + "/" + r
}
fd.join_paths = join_paths
@@ -39,7 +39,8 @@ fd.stem = function stem(path) {
}
fd.globfs = function(globs, dir) {
if (dir == null) dir = "."
var _dir = dir
if (_dir == null) _dir = "."
var results = []
function check_neg(path) {
@@ -88,12 +89,12 @@ fd.globfs = function(globs, dir) {
});
}
var st = fd.stat(dir)
var st = fd.stat(_dir)
if (st && st.isDirectory) {
visit(dir, "")
visit(_dir, "")
}
return results
}
return fd
return fd

View File

@@ -324,7 +324,7 @@ static void listfiles_cb(const char *path, void *userdata) {
// Playdate listfiles returns just the name, but sometimes with slash for dir?
// Docs say "names of files".
JS_SetPropertyUint32(ctx->js, ctx->array, ctx->index++, JS_NewString(ctx->js, path));
JS_SetPropertyNumber(ctx->js, ctx->array, ctx->index++, JS_NewString(ctx->js, path));
}
JSC_SCALL(fd_readdir,
@@ -427,7 +427,7 @@ static void enum_cb(const char *name, void *userdata) {
strcpy(item_rel, name);
}
JS_SetPropertyUint32(ctx->js, ctx->results, (*ctx->count)++, JS_NewString(ctx->js, item_rel));
JS_SetPropertyNumber(ctx->js, ctx->results, (*ctx->count)++, JS_NewString(ctx->js, item_rel));
if (ctx->recurse) {
// Check if directory

Some files were not shown because too many files have changed in this diff Show More