From 545e7ccd6c20e020afbda752ecdb2b7b94a1d45b Mon Sep 17 00:00:00 2001 From: John Alanbrook Date: Wed, 15 Jan 2025 23:53:17 -0600 Subject: [PATCH] faster sprite render path --- scripts/components.js | 3 +- scripts/render.js | 4 + source/jsffi.c | 167 +++++++++++++++++++++++++++++------------- 3 files changed, 122 insertions(+), 52 deletions(-) diff --git a/scripts/components.js b/scripts/components.js index 5388c006..9b0b4aa1 100644 --- a/scripts/components.js +++ b/scripts/components.js @@ -22,6 +22,7 @@ frog = { */ globalThis.sprite_qt = os.make_rtree(); +//globalThis.sprite_qt = os.make_quadtree({x:-2000,y:2000,w:4000,h:4000}); var sprite = { image: undefined, @@ -243,7 +244,7 @@ component.sprite = function (obj) { sp.transform = os.make_transform(); sp.transform.parent = obj.transform; sp.guid = prosperon.guid(); - allsprites.push(sp); + allsprites.push(msp); sp.transform.change_hook = function() { sprite_qt.remove(msp); msp.rect = sp.transform.torect(); diff --git a/scripts/render.js b/scripts/render.js index 70700eaa..c34d8b6f 100644 --- a/scripts/render.js +++ b/scripts/render.js @@ -1348,9 +1348,13 @@ try { var waittime = 1/240; var last_frame_time = 0; // Ran once per frame +var fpses = []; prosperon.process = function process() { var now = profile.now(); var dt = now - last_frame_time; + fpses.push(dt); + if (fpses.length > 300) fpses.shift(); + console.log(1/(vector.sum(fpses)/fpses.length)) if (dt < waittime) os.sleep(waittime-dt); last_frame_time = profile.now(); diff --git a/source/jsffi.c b/source/jsffi.c index 9486b156..268f4552 100644 --- a/source/jsffi.c +++ b/source/jsffi.c @@ -1694,6 +1694,44 @@ JSValue make_quad_indices_buffer(JSContext *js, int quads) return JS_DupValue(js,idx_buffer); } +struct quad_buffers { + HMM_Vec2 *pos; + HMM_Vec2 *uv; + HMM_Vec4 *color; + int verts; +}; + +struct quad_buffers quad_buffers_new(int verts) +{ + struct quad_buffers b; + b.verts = verts; + b.pos = malloc(sizeof(HMM_Vec2)*verts); + b.uv = malloc(sizeof(HMM_Vec2)*verts); + b.color = malloc(sizeof(HMM_Vec4)*verts); + return b; +} + +JSValue quadbuffers_to_mesh(JSContext *js, struct quad_buffers buffers) +{ + JSValue jspos = make_gpu_buffer(js, buffers.pos, sizeof(HMM_Vec2)*buffers.verts, JS_TYPED_ARRAY_FLOAT32, 2, 0, 0); + JSValue jsuv = make_gpu_buffer(js, buffers.uv, sizeof(HMM_Vec2)*buffers.verts, JS_TYPED_ARRAY_FLOAT32, 2,0,0); + JSValue jscolor = make_gpu_buffer(js, buffers.color, sizeof(HMM_Vec4)*buffers.verts, JS_TYPED_ARRAY_FLOAT32, 4,0,0); + + size_t quads = buffers.verts/4; + size_t count = buffers.verts/2*3; + JSValue jsidx = make_quad_indices_buffer(js, quads); + + JSValue ret = JS_NewObject(js); + JS_SetProperty(js, ret, pos_atom, jspos); + JS_SetProperty(js, ret, uv_atom, jsuv); + JS_SetProperty(js, ret, color_atom, jscolor); + JS_SetProperty(js, ret, indices_atom, jsidx); + JS_SetProperty(js, ret, vertices_atom, number2js(js, buffers.verts)); + JS_SetProperty(js,ret,num_indices_atom, number2js(js,count)); + + return ret; +} + JSValue quads_to_mesh(JSContext *js, text_vert *buffer) { size_t verts = arrlen(buffer); @@ -4063,64 +4101,70 @@ JSC_CCALL(gpu_sort_sprite, ) JSC_CCALL(gpu_make_sprite_queue, - size_t quads = js_arrlen(js, argv[0]); sprite *sprites = NULL; - arrsetcap(sprites, quads); + size_t quads = 0; + int needfree = 1; + + // test for fastest + size_t size; + sprites = JS_GetArrayBuffer(js,&size,argv[0]); + if (sprites) { + quads = size/sizeof(*sprites); + needfree = 0; + for (int i = 0; i < quads; i++) + JS_DupValue(js,sprites[i].image); + } else { + quads = js_arrlen(js, argv[0]); + arrsetcap(sprites, quads); + + for (int i = 0; i < quads; i++) { + JSValue sub = JS_GetPropertyUint32(js, argv[0], i); + sprite *jsp = js2sprite(js, sub); + if (jsp) { + arrput(sprites, *jsp); + JS_DupValue(js,jsp->image); + } + else { + sprite sp = {0}; + JS_GETATOM(js,sp.affine, sub, rect_atom, rect) + JS_GETATOM(js,sp.color,sub,color_atom,color) + JS_GETATOM(js,sp.layer,sub,layer_atom,number) + JS_GETATOM(js,sp.uv,sub,src_atom,rect) + sp.image = JS_GetProperty(js,sub,image_atom); + arrput(sprites,sp); + } + JS_FreeValue(js, sub); + } + } + + qsort(sprites, quads, sizeof(sprite), sort_sprite); + + struct quad_buffers buffers = quad_buffers_new(quads*4); for (int i = 0; i < quads; i++) { - JSValue sub = JS_GetPropertyUint32(js, argv[0], i); - sprite *jsp = js2sprite(js, sub); - if (jsp) { - arrput(sprites, *jsp); - JS_DupValue(js,jsp->image); - } - else { - sprite sp = {0}; - JS_GETATOM(js,sp.affine, sub, rect_atom, rect) - JS_GETATOM(js,sp.color,sub,color_atom,color) - JS_GETATOM(js,sp.layer,sub,layer_atom,number) - JS_GETATOM(js,sp.uv,sub,src_atom,rect) - sp.image = JS_GetProperty(js,sub,image_atom); - arrput(sprites,sp); - } - JS_FreeValue(js, sub); - } - - qsort(sprites, arrlen(sprites), sizeof(sprite), sort_sprite); - - text_vert *buffer = NULL; - arrsetcap(buffer, arrlen(sprites)*4); - - for (int i = 0; i < arrlen(sprites); i++) { rect pr = sprites[i].affine; rect uv = sprites[i].uv; HMM_Vec4 c = sprites[i].color; - text_vert v[4]; - v[0].pos = (HMM_Vec2){ pr.x, pr.y }; - v[1].pos = (HMM_Vec2){ pr.x+pr.w, pr.y }; - v[2].pos = (HMM_Vec2){ pr.x, pr.y+pr.h }; - v[3].pos = (HMM_Vec2){ pr.x+pr.w, pr.y+pr.h }; + int idx = i * 4; - v[0].uv = (HMM_Vec2){ uv.x, uv.y+uv.h }; - v[1].uv = (HMM_Vec2){ uv.x+uv.w, uv.y+uv.h }; - v[2].uv = (HMM_Vec2){ uv.x, uv.y }; - v[3].uv = (HMM_Vec2){ uv.x+uv.w, uv.y }; + buffers.pos[idx + 0] = (HMM_Vec2){ pr.x, pr.y }; + buffers.pos[idx + 1] = (HMM_Vec2){ pr.x+pr.w, pr.y }; + buffers.pos[idx + 2] = (HMM_Vec2){ pr.x, pr.y+pr.h }; + buffers.pos[idx + 3] = (HMM_Vec2){ pr.x+pr.w, pr.y+pr.h }; - v[0].color = c; - v[1].color = c; - v[2].color = c; - v[3].color = c; + buffers.uv[idx + 0] = (HMM_Vec2){ uv.x, uv.y+uv.h }; + buffers.uv[idx + 1] = (HMM_Vec2){ uv.x+uv.w, uv.y+uv.h }; + buffers.uv[idx + 2] = (HMM_Vec2){ uv.x, uv.y }; + buffers.uv[idx + 3] = (HMM_Vec2){ uv.x+uv.w, uv.y }; - arrput(buffer, v[0]); - arrput(buffer, v[1]); - arrput(buffer, v[2]); - arrput(buffer, v[3]); + buffers.color[idx + 0] = c; + buffers.color[idx + 1] = c; + buffers.color[idx + 2] = c; + buffers.color[idx + 3] = c; } - - JSValue mesh = quads_to_mesh(js, buffer); - - arrfree(buffer); + + JSValue mesh = quadbuffers_to_mesh(js, buffers); ret = JS_NewArray(js); int first_index = 0; @@ -4128,7 +4172,7 @@ JSC_CCALL(gpu_make_sprite_queue, int n = 0; JSValue img = JS_UNDEFINED; - for (int i = 0; i < arrlen(sprites); i++) { + for (int i = 0; i < quads; i++) { if (!JS_SameValue(js, sprites[i].image, img)) { if (count > 0) { JSValue q = JS_NewObject(js); @@ -4158,7 +4202,9 @@ JSC_CCALL(gpu_make_sprite_queue, JS_SetPropertyUint32(js, ret, n++, q); } - arrfree(sprites); + if (needfree) + arrfree(sprites); + JS_FreeValue(js, mesh); ) @@ -4383,6 +4429,7 @@ JSC_CCALL(gpu_acquire_cmd_buffer, 2: an optional transfer buffer to use; if undefined a temporary one is used */ JSC_CCALL(gpu_upload, + Uint64 ss = SDL_GetTicksNS(); JSValue js_cmd = argv[0]; JSValue js_buffers = argv[1]; JSValue js_transfer = argv[2]; @@ -4463,15 +4510,17 @@ JSC_CCALL(gpu_upload, return JS_ThrowReferenceError(js, "Failed to map transfer buffer: %s", SDL_GetError()); } + Uint64 sy = SDL_GetTicksNS(); // Copy all data into the mapped transfer buffer size_t current_offset = 0; for (size_t i = 0; i < len; i++) { memcpy(mapped_data + current_offset, items[i].data, items[i].size); current_offset += items[i].size; } - + printf("copy and unmap took %u\n", SDL_GetTicksNS()-sy); SDL_UnmapGPUTransferBuffer(gpu, transfer); + // Issue uploads for each item current_offset = 0; for (size_t i = 0; i < len; i++) { @@ -4493,6 +4542,8 @@ JSC_CCALL(gpu_upload, SDL_EndGPUCopyPass(copy_pass); free(items); + + printf("UPLOAD TOOK %u ns\n", SDL_GetTicksNS()-ss); ) JSC_CCALL(gpu_wait_for_fences, @@ -7266,6 +7317,13 @@ bool rtree_iter(const NUMTYPE *min, const NUMTYPE *max, const JSValue *data, str return 1; } +bool rtree_array_iter(const NUMTYPE *min, const NUMTYPE *max, const JSValue *data, sprite **arr) +{ + sprite *sp = js2sprite(global_js, *data); + arrput(*arr, *sp); + return 1; +} + JSC_CCALL(rtree_query, rtree *tree = js2rtree(js,self); rect r = js2rect(js,argv[0]); @@ -7275,12 +7333,19 @@ JSC_CCALL(rtree_query, min[1] = r.y; max[0] = r.x+r.w; max[1] = r.y+r.h; - struct rtree_iter_data data = {0}; + +/* struct rtree_iter_data data = {0}; data.js = js; data.arr = JS_NewArray(js); data.n = 0; rtree_search(tree, min, max, rtree_iter, &data); ret = data.arr; +*/ + + sprite *arr = NULL; + rtree_search(tree, min, max, rtree_array_iter, &arr); + ret = JS_NewArrayBufferCopy(js,arr,arrlen(arr)*sizeof(*arr)); + arrfree(arr); ) JSC_CCALL(rtree_count,