faster sprite render path

This commit is contained in:
2025-01-15 23:53:17 -06:00
parent 96096adbc7
commit 545e7ccd6c
3 changed files with 122 additions and 52 deletions

View File

@@ -1694,6 +1694,44 @@ JSValue make_quad_indices_buffer(JSContext *js, int quads)
return JS_DupValue(js,idx_buffer);
}
struct quad_buffers {
HMM_Vec2 *pos;
HMM_Vec2 *uv;
HMM_Vec4 *color;
int verts;
};
struct quad_buffers quad_buffers_new(int verts)
{
struct quad_buffers b;
b.verts = verts;
b.pos = malloc(sizeof(HMM_Vec2)*verts);
b.uv = malloc(sizeof(HMM_Vec2)*verts);
b.color = malloc(sizeof(HMM_Vec4)*verts);
return b;
}
JSValue quadbuffers_to_mesh(JSContext *js, struct quad_buffers buffers)
{
JSValue jspos = make_gpu_buffer(js, buffers.pos, sizeof(HMM_Vec2)*buffers.verts, JS_TYPED_ARRAY_FLOAT32, 2, 0, 0);
JSValue jsuv = make_gpu_buffer(js, buffers.uv, sizeof(HMM_Vec2)*buffers.verts, JS_TYPED_ARRAY_FLOAT32, 2,0,0);
JSValue jscolor = make_gpu_buffer(js, buffers.color, sizeof(HMM_Vec4)*buffers.verts, JS_TYPED_ARRAY_FLOAT32, 4,0,0);
size_t quads = buffers.verts/4;
size_t count = buffers.verts/2*3;
JSValue jsidx = make_quad_indices_buffer(js, quads);
JSValue ret = JS_NewObject(js);
JS_SetProperty(js, ret, pos_atom, jspos);
JS_SetProperty(js, ret, uv_atom, jsuv);
JS_SetProperty(js, ret, color_atom, jscolor);
JS_SetProperty(js, ret, indices_atom, jsidx);
JS_SetProperty(js, ret, vertices_atom, number2js(js, buffers.verts));
JS_SetProperty(js,ret,num_indices_atom, number2js(js,count));
return ret;
}
JSValue quads_to_mesh(JSContext *js, text_vert *buffer)
{
size_t verts = arrlen(buffer);
@@ -4063,64 +4101,70 @@ JSC_CCALL(gpu_sort_sprite,
)
JSC_CCALL(gpu_make_sprite_queue,
size_t quads = js_arrlen(js, argv[0]);
sprite *sprites = NULL;
arrsetcap(sprites, quads);
size_t quads = 0;
int needfree = 1;
// test for fastest
size_t size;
sprites = JS_GetArrayBuffer(js,&size,argv[0]);
if (sprites) {
quads = size/sizeof(*sprites);
needfree = 0;
for (int i = 0; i < quads; i++)
JS_DupValue(js,sprites[i].image);
} else {
quads = js_arrlen(js, argv[0]);
arrsetcap(sprites, quads);
for (int i = 0; i < quads; i++) {
JSValue sub = JS_GetPropertyUint32(js, argv[0], i);
sprite *jsp = js2sprite(js, sub);
if (jsp) {
arrput(sprites, *jsp);
JS_DupValue(js,jsp->image);
}
else {
sprite sp = {0};
JS_GETATOM(js,sp.affine, sub, rect_atom, rect)
JS_GETATOM(js,sp.color,sub,color_atom,color)
JS_GETATOM(js,sp.layer,sub,layer_atom,number)
JS_GETATOM(js,sp.uv,sub,src_atom,rect)
sp.image = JS_GetProperty(js,sub,image_atom);
arrput(sprites,sp);
}
JS_FreeValue(js, sub);
}
}
qsort(sprites, quads, sizeof(sprite), sort_sprite);
struct quad_buffers buffers = quad_buffers_new(quads*4);
for (int i = 0; i < quads; i++) {
JSValue sub = JS_GetPropertyUint32(js, argv[0], i);
sprite *jsp = js2sprite(js, sub);
if (jsp) {
arrput(sprites, *jsp);
JS_DupValue(js,jsp->image);
}
else {
sprite sp = {0};
JS_GETATOM(js,sp.affine, sub, rect_atom, rect)
JS_GETATOM(js,sp.color,sub,color_atom,color)
JS_GETATOM(js,sp.layer,sub,layer_atom,number)
JS_GETATOM(js,sp.uv,sub,src_atom,rect)
sp.image = JS_GetProperty(js,sub,image_atom);
arrput(sprites,sp);
}
JS_FreeValue(js, sub);
}
qsort(sprites, arrlen(sprites), sizeof(sprite), sort_sprite);
text_vert *buffer = NULL;
arrsetcap(buffer, arrlen(sprites)*4);
for (int i = 0; i < arrlen(sprites); i++) {
rect pr = sprites[i].affine;
rect uv = sprites[i].uv;
HMM_Vec4 c = sprites[i].color;
text_vert v[4];
v[0].pos = (HMM_Vec2){ pr.x, pr.y };
v[1].pos = (HMM_Vec2){ pr.x+pr.w, pr.y };
v[2].pos = (HMM_Vec2){ pr.x, pr.y+pr.h };
v[3].pos = (HMM_Vec2){ pr.x+pr.w, pr.y+pr.h };
int idx = i * 4;
v[0].uv = (HMM_Vec2){ uv.x, uv.y+uv.h };
v[1].uv = (HMM_Vec2){ uv.x+uv.w, uv.y+uv.h };
v[2].uv = (HMM_Vec2){ uv.x, uv.y };
v[3].uv = (HMM_Vec2){ uv.x+uv.w, uv.y };
buffers.pos[idx + 0] = (HMM_Vec2){ pr.x, pr.y };
buffers.pos[idx + 1] = (HMM_Vec2){ pr.x+pr.w, pr.y };
buffers.pos[idx + 2] = (HMM_Vec2){ pr.x, pr.y+pr.h };
buffers.pos[idx + 3] = (HMM_Vec2){ pr.x+pr.w, pr.y+pr.h };
v[0].color = c;
v[1].color = c;
v[2].color = c;
v[3].color = c;
buffers.uv[idx + 0] = (HMM_Vec2){ uv.x, uv.y+uv.h };
buffers.uv[idx + 1] = (HMM_Vec2){ uv.x+uv.w, uv.y+uv.h };
buffers.uv[idx + 2] = (HMM_Vec2){ uv.x, uv.y };
buffers.uv[idx + 3] = (HMM_Vec2){ uv.x+uv.w, uv.y };
arrput(buffer, v[0]);
arrput(buffer, v[1]);
arrput(buffer, v[2]);
arrput(buffer, v[3]);
buffers.color[idx + 0] = c;
buffers.color[idx + 1] = c;
buffers.color[idx + 2] = c;
buffers.color[idx + 3] = c;
}
JSValue mesh = quads_to_mesh(js, buffer);
arrfree(buffer);
JSValue mesh = quadbuffers_to_mesh(js, buffers);
ret = JS_NewArray(js);
int first_index = 0;
@@ -4128,7 +4172,7 @@ JSC_CCALL(gpu_make_sprite_queue,
int n = 0;
JSValue img = JS_UNDEFINED;
for (int i = 0; i < arrlen(sprites); i++) {
for (int i = 0; i < quads; i++) {
if (!JS_SameValue(js, sprites[i].image, img)) {
if (count > 0) {
JSValue q = JS_NewObject(js);
@@ -4158,7 +4202,9 @@ JSC_CCALL(gpu_make_sprite_queue,
JS_SetPropertyUint32(js, ret, n++, q);
}
arrfree(sprites);
if (needfree)
arrfree(sprites);
JS_FreeValue(js, mesh);
)
@@ -4383,6 +4429,7 @@ JSC_CCALL(gpu_acquire_cmd_buffer,
2: an optional transfer buffer to use; if undefined a temporary one is used
*/
JSC_CCALL(gpu_upload,
Uint64 ss = SDL_GetTicksNS();
JSValue js_cmd = argv[0];
JSValue js_buffers = argv[1];
JSValue js_transfer = argv[2];
@@ -4463,15 +4510,17 @@ JSC_CCALL(gpu_upload,
return JS_ThrowReferenceError(js, "Failed to map transfer buffer: %s", SDL_GetError());
}
Uint64 sy = SDL_GetTicksNS();
// Copy all data into the mapped transfer buffer
size_t current_offset = 0;
for (size_t i = 0; i < len; i++) {
memcpy(mapped_data + current_offset, items[i].data, items[i].size);
current_offset += items[i].size;
}
printf("copy and unmap took %u\n", SDL_GetTicksNS()-sy);
SDL_UnmapGPUTransferBuffer(gpu, transfer);
// Issue uploads for each item
current_offset = 0;
for (size_t i = 0; i < len; i++) {
@@ -4493,6 +4542,8 @@ JSC_CCALL(gpu_upload,
SDL_EndGPUCopyPass(copy_pass);
free(items);
printf("UPLOAD TOOK %u ns\n", SDL_GetTicksNS()-ss);
)
JSC_CCALL(gpu_wait_for_fences,
@@ -7266,6 +7317,13 @@ bool rtree_iter(const NUMTYPE *min, const NUMTYPE *max, const JSValue *data, str
return 1;
}
bool rtree_array_iter(const NUMTYPE *min, const NUMTYPE *max, const JSValue *data, sprite **arr)
{
sprite *sp = js2sprite(global_js, *data);
arrput(*arr, *sp);
return 1;
}
JSC_CCALL(rtree_query,
rtree *tree = js2rtree(js,self);
rect r = js2rect(js,argv[0]);
@@ -7275,12 +7333,19 @@ JSC_CCALL(rtree_query,
min[1] = r.y;
max[0] = r.x+r.w;
max[1] = r.y+r.h;
struct rtree_iter_data data = {0};
/* struct rtree_iter_data data = {0};
data.js = js;
data.arr = JS_NewArray(js);
data.n = 0;
rtree_search(tree, min, max, rtree_iter, &data);
ret = data.arr;
*/
sprite *arr = NULL;
rtree_search(tree, min, max, rtree_array_iter, &arr);
ret = JS_NewArrayBufferCopy(js,arr,arrlen(arr)*sizeof(*arr));
arrfree(arr);
)
JSC_CCALL(rtree_count,