diff --git a/meson.build b/meson.build index 5f0226ab..9d3a1b30 100644 --- a/meson.build +++ b/meson.build @@ -1,13 +1,14 @@ -project('prosperon', ['c', 'cpp'], default_options : [ 'cpp_std=c++11']) +project('prosperon', ['c', 'cpp'], + version: '0.9.2', + meson_version: '>=1.4', + default_options : [ 'cpp_std=c++11']) libtype = get_option('default_library') link = [] src = [] -if not get_option('editor') -# add_project_arguments('-DNEDITOR', language:'c') -endif +add_project_arguments('-pedantic', language: ['c']) git_tag_cmd = run_command('git', 'describe', '--tags', '--abbrev=0', check: false) prosperon_version = 'unknown' @@ -46,12 +47,13 @@ endif cmake = import('cmake') -sdl3_cmake_options = [ - '-DSDL_STATIC=ON', - '-DSDL_SHARED=OFF', - '-DSDL_TEST=OFF', - '-DCMAKE_BUILD_TYPE=Release', -] +sdl3_opts = cmake.subproject_options() +sdl3_opts.add_cmake_defines({ + 'SDL_STATIC': 'ON', + 'SDL_SHARED': 'OFF', + 'SDL_TEST': 'OFF', + 'CMAKE_BUILD_TYPE': 'Release' +}) cc = meson.get_compiler('c') @@ -75,7 +77,7 @@ if host_machine.system() == 'windows' deps += cc.find_library('imm32') deps += cc.find_library('version') deps += cc.find_library('cfgmgr32') - sdl3_cmake_options += '-DHAVE_ISINF=1' # TODO: A hack to get this to compile on MSYS2; otherwise it doesn't link correctly + sdl3_opts.add_cmake_defines({'HAVE_ISINF': '1'}) # TODO: A hack to get this to compile on MSYS2; otherwise it doesn't link correctly link += '-static' endif @@ -83,7 +85,7 @@ if host_machine.system() == 'emscripten' link += '-sUSE_WEBGPU' endif -sdl3_proj = cmake.subproject('sdl3', cmake_options: sdl3_cmake_options) +sdl3_proj = cmake.subproject('sdl3', options: sdl3_opts) deps += sdl3_proj.dependency('SDL3-static') @@ -125,7 +127,7 @@ if get_option('enet') endif sources = [] -src += ['anim.c', 'config.c', 'datastream.c','font.c','gameobject.c','HandmadeMath.c','jsffi.c','model.c','render.c','script.c','simplex.c','spline.c', 'timer.c', 'transform.c','prosperon.c', 'wildmatch.c', 'sprite.c', 'quadtree.c', 'aabb.c', 'rtree.c'] +src += ['anim.c', 'config.c', 'datastream.c','font.c','HandmadeMath.c','jsffi.c','model.c','render.c','script.c','simplex.c','spline.c', 'timer.c', 'transform.c','prosperon.c', 'wildmatch.c', 'sprite.c', 'rtree.c'] imsrc = ['GraphEditor.cpp','ImCurveEdit.cpp','ImGradient.cpp','imgui_draw.cpp','imgui_tables.cpp','imgui_widgets.cpp','imgui.cpp','ImGuizmo.cpp','imnodes.cpp','implot_items.cpp','implot.cpp', 'imgui_impl_sdlrenderer3.cpp', 'imgui_impl_sdl3.cpp', 'imgui_impl_sdlgpu3.cpp'] @@ -167,7 +169,7 @@ core = custom_target('core.zip', ' && echo "Rebuilding core.zip" && rm -f ' + meson.current_build_dir() + '/core.zip && ' + 'zip -r ' + meson.current_build_dir() + '/core.zip scripts fonts icons shaders' ], - build_always: true, + build_always_stale: true, build_by_default: true ) @@ -196,7 +198,7 @@ prosperon = custom_target('prosperon', '@INPUT1@', '@OUTPUT@' ], - build_always: true, + build_always_stale: true, build_by_default: true ) @@ -209,10 +211,10 @@ copy_tests = custom_target( output: 'tests', command: [ 'cp', '-rf', - join_paths(meson.source_root(), 'tests'), - meson.build_root() + join_paths(meson.project_source_root(), 'tests'), + meson.project_build_root() ], - build_always: true, + build_always_stale: true, build_by_default: true ) diff --git a/scripts/modules/imgui.js b/scripts/modules/imgui.js index 6431fc7c..3a2318a3 100644 --- a/scripts/modules/imgui.js +++ b/scripts/modules/imgui.js @@ -322,16 +322,6 @@ imgui.barplot[prosperon.DOC] = `Plot a bar chart in the current ImPlot. :return: None `; -imgui.pieplot[prosperon.DOC] = `Plot a pie chart in the current ImPlot. - -:param labels: An array of label strings for each slice. -:param values: An array of numeric values corresponding to each slice. -:param x: The x position of the pie’s center. -:param y: The y position of the pie’s center. -:param radius: The radius of the pie chart. -:return: None -`; - imgui.textplot[prosperon.DOC] = `Render text at the specified coordinates in plot space. :param text: The string to render. diff --git a/scripts/modules/io.js b/scripts/modules/io.js index 084e2faf..644387fe 100644 --- a/scripts/modules/io.js +++ b/scripts/modules/io.js @@ -103,8 +103,10 @@ io.basedir[prosperon.DOC] = `Return the application's base directory (where the :return: A string with the base directory path. ` -io.userdir[prosperon.DOC] = `Return the user's directory, often used for saving data. +io.prefdir[prosperon.DOC] = `Get the user-and-app-specific path where files can be written. +:param org: The name of your organization. +:param app: The name of your application. :return: A string with the user's directory path. ` diff --git a/scripts/modules/render.js b/scripts/modules/render.js index 522e5584..9450b3d6 100644 --- a/scripts/modules/render.js +++ b/scripts/modules/render.js @@ -361,7 +361,6 @@ function make_shader(sh_file) { num_uniform_buffers: refl.ubos ? refl.ubos.length : 0, entrypoint: shader_type === "msl" ? "main0" : "main" } - console.log(`making shader ${sh_file} of format ${shader_type}`) shader.gpu = render._main.make_shader(shader) shader.reflection = refl; diff --git a/source/aabb.c b/source/aabb.c deleted file mode 100644 index 0b09b377..00000000 --- a/source/aabb.c +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include - -#include "aabb.h" - -aabb* -aabb_new(float x, float y, float hW, float hH) { - aabb* a = malloc(sizeof(aabb)); - a->center.x = x; - a->center.y = y; - a->dims.w = hW; - a->dims.h = hH; - return a; -} - -void -aabb_free(aabb *a) { - free(a); -} - -int -aabb_contains(aabb *a, float x, float y) { - return (x >= a->center.x-a->dims.w && - x <= a->center.x+a->dims.w) && - (y >= a->center.y-a->dims.h && - y <= a->center.y+a->dims.h); -} - -int -aabb_intersects(aabb *a, aabb *b) { - return (abs(a->center.x - b->center.x) < (a->dims.w + b->dims.w)) && - (abs(a->center.y - b->center.y) < (a->dims.h + b->dims.h)); -} - diff --git a/source/aabb.h b/source/aabb.h deleted file mode 100644 index 963222b0..00000000 --- a/source/aabb.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - aabb.h - 2014 JSK (kutani@projectkutani.com) - - Simple (2D) axis-aligned bounding box implementation. Part of the Panic - Panic project. - - Released to the public domain. See LICENSE for details. -*/ -#ifndef _AABB_H - #define _AABB_H - -/** \brief axis-aligned bounding box - - Simple struct of four floats, divided into two sub-structs. - - center {x, y} - The center point of the bounding box - dims {w, h} - The half-width and half-height of the box -*/ -typedef struct aabb { - struct { - float x; - float y; - } center; - struct { - float w; - float h; - } dims; -} aabb; - -/// Malloc's a new aabb struct -/*! - Mallocs a new aabb struct and sets center and dims to the passed - x, y, hW, and hH values. -*/ -aabb* aabb_new(float x, float y, float hW, float hH); - - -/// Frees the passed aabb. -void aabb_free(aabb *a); - -/// Checks if the point x,y lies within the passed aabb -int aabb_contains(aabb *a, float x, float y); - -/// Checks if the two passed aabb's intersect -int aabb_intersects(aabb *a, aabb *b); - -#endif diff --git a/source/anim.c b/source/anim.c index b80a17dc..5c106094 100644 --- a/source/anim.c +++ b/source/anim.c @@ -16,7 +16,10 @@ void animation_run(struct animation *anim, float now) HMM_Vec4 sample_cubicspline(sampler *sampler, float t, int prev, int next) { - return (HMM_Vec4)HMM_SLerp(HMM_QV4(sampler->data[prev]), t, HMM_QV4(sampler->data[next])); + HMM_Vec4 ret; + HMM_Quat qv = HMM_SLerp(HMM_QV4(sampler->data[prev]), t, HMM_QV4(sampler->data[next])); + memcpy(ret.e, qv.e, sizeof(ret.e)); + return ret; } HMM_Vec4 sample_sampler(sampler *sampler, float time) @@ -37,6 +40,9 @@ HMM_Vec4 sample_sampler(sampler *sampler, float time) float td = sampler->times[next_time]-sampler->times[previous_time]; float t = (time - sampler->times[previous_time])/td; + HMM_Vec4 ret; + HMM_Quat qv; + switch(sampler->type) { case LINEAR: return HMM_LerpV4(sampler->data[previous_time],time,sampler->data[next_time]); @@ -48,7 +54,9 @@ HMM_Vec4 sample_sampler(sampler *sampler, float time) return sample_cubicspline(sampler,t, previous_time, next_time); break; case SLERP: - return (HMM_Vec4)HMM_SLerp(sampler->data[previous_time].quat, time, sampler->data[next_time].quat); + qv = HMM_SLerp(sampler->data[previous_time].quat, time, sampler->data[next_time].quat); + memcpy(ret.e,qv.e,sizeof(ret.e)); + return ret; break; } return sample_cubicspline(sampler,t, previous_time, next_time); diff --git a/source/config.c b/source/config.c index 3451d647..5ae39455 100644 --- a/source/config.c +++ b/source/config.c @@ -24,11 +24,12 @@ #define STBI_NO_STDIO #include "stb_image.h" +#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_BOX + #define STB_IMAGE_RESIZE_IMPLEMENTATION #include "stb_image_resize2.h" #define STB_IMAGE_WRITE_IMPLEMENTATION -#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_BOX #include "stb_image_write.h" #define PL_MPEG_IMPLEMENTATION diff --git a/source/cute_aseprite.h b/source/cute_aseprite.h index 63ca7cae..08e582a1 100644 --- a/source/cute_aseprite.h +++ b/source/cute_aseprite.h @@ -306,24 +306,7 @@ struct ase_t void* mem_ctx; }; -#define ASEPRITE_ERROR_MAX 256 -static char aseprite_error[ASEPRITE_ERROR_MAX] = {0}; - -static const char *aseprite_GetError() { - return aseprite_error; -} - -static void aseprite_clear_error() { - aseprite_error[0] = 0; -} - -static void aseprite_set_error(const char *msg) { - if (msg) { - strncpy(aseprite_error, msg, ASEPRITE_ERROR_MAX-1); - aseprite_error[ASEPRITE_ERROR_MAX-1] = 0; - } else - aseprite_error[0] = 0; -} +const char *aseprite_GetError(); #endif // CUTE_ASEPRITE_H @@ -331,6 +314,24 @@ static void aseprite_set_error(const char *msg) { #ifndef CUTE_ASEPRITE_IMPLEMENTATION_ONCE #define CUTE_ASEPRITE_IMPLEMENTATION_ONCE +#define ASEPRITE_ERROR_MAX 256 +char aseprite_error[ASEPRITE_ERROR_MAX] = {0}; + +const char *aseprite_GetError() { + return aseprite_error; +} + +void aseprite_clear_error() { + aseprite_error[0] = 0; +} + +void aseprite_set_error(const char *msg) { + if (msg) { + strncpy(aseprite_error, msg, ASEPRITE_ERROR_MAX-1); + aseprite_error[ASEPRITE_ERROR_MAX-1] = 0; + } else + aseprite_error[0] = 0; +} #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS diff --git a/source/datastream.c b/source/datastream.c index 1f2b1acf..1ef4bd5a 100644 --- a/source/datastream.c +++ b/source/datastream.c @@ -15,11 +15,6 @@ void datastream_free(JSRuntime *rt,datastream *ds) free(ds); } -static void render_audio(plm_t *mpeg, plm_samples_t *samples, struct datastream *ds) { -// for (int i = 0; i < samples->count * CHANNELS; i++) -// ringpush(ds->ring, samples->interleaved[i]); -} - struct datastream *ds_openvideo(void *raw, size_t rawlen) { struct datastream *ds = malloc(sizeof(*ds)); diff --git a/source/font.c b/source/font.c index 686af696..0391c8c3 100644 --- a/source/font.c +++ b/source/font.c @@ -21,31 +21,6 @@ void font_free(JSRuntime *rt, font *f) free(f); } -struct sFont *MakeSDFFont(const char *fontfile, int height) -{ - int packsize = 1024; - struct sFont *newfont = calloc(1, sizeof(struct sFont)); - newfont->height = height; - - char fontpath[256]; - snprintf(fontpath, 256, "fonts/%s", fontfile); - -// unsigned char *ttf_buffer = slurp_file(fontpath, NULL); - unsigned char *bitmap = malloc(packsize * packsize); - - stbtt_fontinfo fontinfo; -// if (!stbtt_InitFont(&fontinfo, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer, 0))) { -// YughError("Failed to make font %s", fontfile); -// } - - for (int i = 32; i < 95; i++) { - int w, h, xoff, yoff; -// unsigned char *stbtt_GetGlyphSDF(&fontinfo, height, i, 1, 0, 1, &w, &h, &xoff, &yoff); - } - - return newfont; -} - struct sFont *MakeFont(void *ttf_buffer, size_t len, int height) { if (!ttf_buffer) return NULL; @@ -111,29 +86,6 @@ struct sFont *MakeFont(void *ttf_buffer, size_t len, int height) { return newfont; } -int text_flush() { -/* if (arrlen(text_buffer) == 0) return 0; - - sg_range verts; - verts.ptr = text_buffer; - verts.size = sizeof(struct text_vert) * arrlen(text_buffer); - if (sg_query_buffer_will_overflow(*buf, verts.size)) { - sg_destroy_buffer(*buf); - *buf = sg_make_buffer(&(sg_buffer_desc){ - .size = verts.size, - .type = SG_BUFFERTYPE_STORAGEBUFFER, - .usage = SG_USAGE_STREAM, - .label = "text buffer" - }); - } - - sg_append_buffer(*buf, &verts); - int n = arrlen(text_buffer); - arrsetlen(text_buffer, 0); - return n; -*/ -} - void sdrawCharacter(struct text_vert **buffer, stbtt_packedchar c, HMM_Vec2 cursor, float scale, struct rgba color) { struct text_vert vert; @@ -185,7 +137,7 @@ const char *esc_color(const char *c, struct rgba *color, struct rgba defc) { struct rgba d; if (!color) color = &d; - if (*c != '\e') return c; + if (*c != '\033') return c; c++; if (*c != '[') return c; c++; @@ -235,7 +187,7 @@ HMM_Vec2 measure_text(const char *text, font *f, float size, float letterSpacing continue; } - float charWidth = f->Characters[*c].advance + letterSpacing; + float charWidth = f->Characters[(unsigned char)*c].advance + letterSpacing; // Handle wrapping if (wrap > 0 && lineWidth + charWidth > wrap) { @@ -283,15 +235,13 @@ HMM_Vec2 measure_text(const char *text, font *f, float size, float letterSpacing } /* pos given in screen coordinates */ struct text_vert *renderText(const char *text, HMM_Vec2 pos, font *f, float scale, colorf color, float wrap) { - int wrapAtWord = 1; text_vert *buffer = NULL; - int len = strlen(text); HMM_Vec2 cursor = pos; float lineHeight = f->ascent - f->descent; float lineWidth = 0; - for (char *c = text; *c != 0; c++) { + for (const char *c = text; *c != 0; c++) { if (*c == '\n') { cursor.x = pos.x; cursor.y -= lineHeight + f->linegap; @@ -299,7 +249,7 @@ struct text_vert *renderText(const char *text, HMM_Vec2 pos, font *f, float scal continue; } - struct character chara = f->Characters[*c]; + struct character chara = f->Characters[(unsigned char)*c]; if (wrap > 0 && lineWidth + chara.advance > wrap) { cursor.x = pos.x; diff --git a/source/font.h b/source/font.h index f4e36289..fe8fc757 100644 --- a/source/font.h +++ b/source/font.h @@ -60,7 +60,4 @@ struct sFont *MakeFont(void *data, size_t len, int height); struct text_vert *renderText(const char *text, HMM_Vec2 pos, font *f, float scale, colorf color, float wrap); HMM_Vec2 measure_text(const char *text, font *f, float scale, float letterSpacing, float wrap); -// Flushes all letters from renderText calls into the provided buffer -int text_flush(); - #endif diff --git a/source/gameobject.c b/source/gameobject.c deleted file mode 100644 index ebb1fc96..00000000 --- a/source/gameobject.c +++ /dev/null @@ -1,33 +0,0 @@ -#include "gameobject.h" - -#include "math.h" -#include - -#include "stb_ds.h" - -static void velocityFn(cpBody *body, cpVect gravity, cpFloat damping, cpFloat dt) -{ -/* gameobject *go = body2go(body); - gameobject_apply(go); - cpVect pos = cpBodyGetPosition(body); - HMM_Vec2 g = warp_force((HMM_Vec3){pos.x, pos.y, 0}, go->warp_mask).xy; - if (!go) { - cpBodyUpdateVelocity(body,g.cp,damping,dt); - return; - } - -// cpFloat d = isfinite(go->damping) ? go->damping : damping; - cpFloat d = damping; - - cpBodyUpdateVelocity(body,g.cp,d,dt*go->timescale); - - if (isfinite(go->maxvelocity)) - cpBodySetVelocity(body, cpvclamp(cpBodyGetVelocity(body), go->maxvelocity)); - - if (isfinite(go->maxangularvelocity)) { - float av = cpBodyGetAngularVelocity(body); - if (fabs(av) > go->maxangularvelocity) - cpBodySetAngularVelocity(body, copysignf(go->maxangularvelocity, av)); - } -*/ -} diff --git a/source/gameobject.h b/source/gameobject.h deleted file mode 100644 index 668ecb22..00000000 --- a/source/gameobject.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef GAMEOBJECT_H -#define GAMEOBJECT_H - -#define dag_rm(p,c) do{\ - for (int i = arrlen(p->children)-1; i--; i >=0) {\ - if (p->children[i] == c) { \ - arrdelswap(p->children,i);\ - c->parent=NULL;\ - break;\ -}}}while(0) - -#define dag_set(p,c) do{\ - arrpush(p->children,c);\ - if(c->parent) dag_rm(c->parent,c);\ - c->parent=p;\ -}while(0) - -#define dag_clip(p) do{\ - if (p->parent)\ - dag_rm(p->parent,p);\ -}while(0) - -struct gameobject { - float damping; - float timescale; - float maxvelocity; - float maxangularvelocity; - unsigned int layer; - unsigned int warp_mask; -}; - -/* - Friction uses coulomb model. When shapes collide, their friction is multiplied. Some example values: - Steel on steel: 0.0005 - Wood on steel: 0.0012 - Wood on wood: 0.0015 - => steel = 0.025 - => wood = 0.04 - => hardrubber = 0.31 - => concrete = 0.05 - => rubber = 0.5 - Hardrubber on steel: 0.0077 - Hardrubber on concrete: 0.015 - Rubber on concrete: 0.025 -*/ - -typedef struct gameobject gameobject; - -#endif diff --git a/source/jsffi.c b/source/jsffi.c index 88b3776e..7dcf9bdf 100644 --- a/source/jsffi.c +++ b/source/jsffi.c @@ -62,8 +62,6 @@ typedef struct rtree rtree; //#include #endif - - #define STATE_VECTOR_LENGTH 624 #define STATE_VECTOR_M 397 /* changes to STATE_VECTOR_LENGTH also require changes to this */ @@ -1038,7 +1036,7 @@ static const char *vals_SDL_GPUTextureFormat[] = { "astc 12x12 float" }; -JS2ENUM(SDL_GPUTextureFormat, rets_SDL_GPUTextureFormat, vals_SDL_GPUTextureFormat); +JS2ENUM(SDL_GPUTextureFormat, rets_SDL_GPUTextureFormat, vals_SDL_GPUTextureFormat) SDL_GPUColorTargetBlendState js2SDL_GPUColorTargetBlendState(JSContext *js, JSValue v) { @@ -1606,7 +1604,7 @@ int point2segindex(HMM_Vec2 p, HMM_Vec2 *segs, double slop) { return best; } -static JSValue idx_buffer = JS_UNDEFINED; +static JSValue idx_buffer; static int idx_count = 0; JSValue make_quad_indices_buffer(JSContext *js, int quads) @@ -1828,7 +1826,7 @@ JSValue js_math_dot(JSContext *js, JSValue self, int argc, JSValue *argv) { free(a); free(b); return number2js(js,dot); -}; +} JSValue js_math_project(JSContext *js, JSValue self, int argc, JSValue *argv) { size_t alen, blen; @@ -2572,10 +2570,10 @@ JSC_CCALL(os_engine_start, JS_SDL_PROP(js, p, SDL_PROP_APP_METADATA_URL_STRING, url) JS_SDL_PROP(js, p, SDL_PROP_APP_METADATA_TYPE_STRING, type) - if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_CAMERA) < 0) - return JS_ThrowReferenceError(js, "Couldn't initialize SDL: %s\n", SDL_GetError()); + if (!SDL_Init(SDL_INIT_VIDEO | SDL_INIT_CAMERA)) + return JS_ThrowReferenceError(js, "Couldn't initialize SDL: %s\n", SDL_GetError()); - char *title; + const char *title; JS_GETPROP(js,title,argv[0],title,cstring) SDL_Window *new = SDL_CreateWindow(title, js2number(js, js_getproperty(js,argv[0], width_atom)), js2number(js,js_getproperty(js,argv[0], height_atom)), SDL_WINDOW_RESIZABLE); @@ -3533,7 +3531,6 @@ JSC_CCALL(renderer_make_sprite_mesh, for (int i = 0; i < quads; i++) { JSValue sub = JS_GetPropertyUint32(js,sprites,i); JSValue jstransform = JS_GetProperty(js,sub,transform_atom); - transform *tr = js2transform(js,jstransform); JSValue jssrc = JS_GetProperty(js,sub,src_atom); JSValue jscolor = JS_GetProperty(js,sub,color_atom); @@ -3552,24 +3549,12 @@ JSC_CCALL(renderer_make_sprite_mesh, // Calculate the base index for the current quad size_t base = i * 4; - -// HMM_Mat3 trmat = transform2mat3_global(tr); - - HMM_Vec3 base_quad[4] = { - {0.0,0.0,1.0}, - {1.0,0.0,1.0}, - {0.0,1.0,1.0}, - {1.0,1.0,1.0} - }; - -// for (int j = 0; j < 4; j++) -// posdata[base+j] = HMM_MulM3V3(trmat, base_quad[j]).xy; // Define the UV coordinates based on the source rectangle - uvdata[base + 0] = (HMM_Vec2){ src.x, src.y + src.h }; - uvdata[base + 1] = (HMM_Vec2){ src.x + src.w, src.y + src.h }; - uvdata[base + 2] = (HMM_Vec2){ src.x, src.y }; - uvdata[base + 3] = (HMM_Vec2){ src.x + src.w, src.y }; + uvdata[base + 0] = (HMM_Vec2){ src.x, src.y + src.h }; + uvdata[base + 1] = (HMM_Vec2){ src.x + src.w, src.y + src.h }; + uvdata[base + 2] = (HMM_Vec2){ src.x, src.y }; + uvdata[base + 3] = (HMM_Vec2){ src.x + src.w, src.y }; colordata[base] = color; colordata[base+1] = color; @@ -4156,18 +4141,12 @@ JSC_CCALL(gpu_texture, return jstex; ) -static HMM_Vec3 base_quad[4] = { - {0.0,0.0,1.0}, - {1.0,0.0,1.0}, - {0.0,1.0,1.0}, - {1.0,1.0,1.0} - }; - static HMM_Vec4 base_quad_4[4] = { - { 0.0,0.0, 1.0f, 1.0f }, - { 1,0,0.0, 1.0f, 1.0f }, - { 0.0,1.0, 1.0f, 1.0f }, - { 1.0,1.0, 1.0f, 1.0f } - }; +static HMM_Vec3 base_quad[4] = { + {0.0,0.0,1.0}, + {1.0,0.0,1.0}, + {0.0,1.0,1.0}, + {1.0,1.0,1.0} +}; static inline void add_quad(text_vert **verts, rect *restrict src, rect *restrict dst) { @@ -4644,7 +4623,6 @@ JSC_CCALL(gpu_acquire_cmd_buffer, 2: an optional transfer buffer to use; if undefined a temporary one is used */ JSC_CCALL(gpu_upload, - Uint64 ss = SDL_GetTicksNS(); JSValue js_cmd = argv[0]; JSValue js_buffers = argv[1]; JSValue js_transfer = argv[2]; @@ -4724,16 +4702,14 @@ JSC_CCALL(gpu_upload, return JS_ThrowReferenceError(js, "Failed to map transfer buffer: %s", SDL_GetError()); } - Uint64 sy = SDL_GetTicksNS(); // Copy all data into the mapped transfer buffer size_t current_offset = 0; for (size_t i = 0; i < len; i++) { - memcpy(mapped_data + current_offset, items[i].data, items[i].size); + memcpy((char*)mapped_data + current_offset, items[i].data, items[i].size); current_offset += items[i].size; } SDL_UnmapGPUTransferBuffer(gpu, transfer); - // Issue uploads for each item current_offset = 0; for (size_t i = 0; i < len; i++) { @@ -5120,14 +5096,6 @@ JSC_CCALL(gpu_tile, arrfree(verts); ) -static const JSCFunctionListEntry js_SDL_GPUCopyPass_funcs[] = {}; -static const JSCFunctionListEntry js_SDL_GPUFence_funcs[] = {}; -static const JSCFunctionListEntry js_SDL_GPUTransferBuffer_funcs[] = {}; -static const JSCFunctionListEntry js_SDL_GPUShader_funcs[] = {}; -static const JSCFunctionListEntry js_SDL_GPUSampler_funcs[] = {}; -static const JSCFunctionListEntry js_SDL_GPUGraphicsPipeline_funcs[] = {}; -static const JSCFunctionListEntry js_SDL_GPUComputePipeline_funcs[] = {}; - static const JSCFunctionListEntry js_SDL_GPUDevice_funcs[] = { MIST_FUNC_DEF(gpu, claim_window, 1), MIST_FUNC_DEF(gpu, make_pipeline, 1), // loads pipeline state into an object @@ -5707,8 +5675,6 @@ static const JSCFunctionListEntry js_SDL_Camera_funcs[] = MIST_FUNC_DEF(camera, release_frame, 1), }; -static const JSCFunctionListEntry js_SDL_Cursor_funcs[] = {}; - JSC_CCALL(texture_mode, SDL_Texture *tex = js2SDL_Texture(js,self); SDL_SetTextureScaleMode(tex,js2number(js,argv[0])); @@ -5832,42 +5798,14 @@ static const JSCFunctionListEntry js_console_funcs[] = { MIST_FUNC_DEF(console,print,1), }; -JSC_CCALL(profile_gather_rate, - JS_SetInterruptRate(js2number(js,argv[0])); -) - -JSC_CCALL(profile_gather_stop, - JS_SetInterruptHandler(JS_GetRuntime(js),NULL,NULL); -) - -JSC_CCALL(profile_best_t, - char* result[50]; - double seconds = js2number(js,argv[0]); - if (seconds < 1e-6) - snprintf(result, 50, "%.2f ns", seconds * 1e9); - else if (seconds < 1e-3) - snprintf(result, 50, "%.2f µs", seconds * 1e6); - else if (seconds < 1) - snprintf(result, 50, "%.2f ms", seconds * 1e3); - else - snprintf(result, 50, "%.2f s", seconds); - - return JS_NewString(js,result); -) - -static const JSCFunctionListEntry js_profile_funcs[] = { - MIST_FUNC_DEF(profile,best_t, 1), - MIST_FUNC_DEF(profile,gather_rate,1), - MIST_FUNC_DEF(profile,gather_stop,0), -}; - JSC_CCALL(debug_stack_depth, return number2js(js,js_debugger_stack_depth(js))) JSC_CCALL(debug_build_backtrace, return js_debugger_build_backtrace(js,NULL)) JSC_CCALL(debug_closure_vars, return js_debugger_closure_variables(js,argv[0])) JSC_CCALL(debug_local_vars, return js_debugger_local_variables(js, js2number(js,argv[0]))) -JSC_CCALL(debug_fn_info, return js_debugger_fn_info(js, argv[0])); -JSC_CCALL(debug_backtrace_fns, return js_debugger_backtrace_fns(js,NULL)); -JSC_CCALL(debug_dump_obj, return js_dump_value(js, argv[0])); +JSC_CCALL(debug_fn_info, return js_debugger_fn_info(js, argv[0])) +JSC_CCALL(debug_backtrace_fns, return js_debugger_backtrace_fns(js,NULL)) +JSC_CCALL(debug_dump_obj, return js_dump_value(js, argv[0])) + static const JSCFunctionListEntry js_debug_funcs[] = { MIST_FUNC_DEF(debug, stack_depth, 0), MIST_FUNC_DEF(debug, build_backtrace, 0), @@ -5932,22 +5870,33 @@ JSC_SCALL(io_slurp, END: ) +size_t js_physfs_write(JSContext *js, PHYSFS_File *f, JSValue val) +{ + size_t len; + size_t wrote; + if (JS_IsString(val)) { + const char *data = JS_ToCStringLen(js,&len,val); + wrote = PHYSFS_writeBytes(f,data,len); + JS_FreeCString(js,data); + } else { + unsigned char *data = JS_GetArrayBuffer(js,&len,val); + wrote = PHYSFS_writeBytes(f,data,len); + } + + if (wrote < len) wrote = -1; + return wrote; +} + JSC_SCALL(io_slurpwrite, PHYSFS_File *f = PHYSFS_openWrite(str); if (!f) { ret = JS_ThrowReferenceError(js,"could not write to %s: %s", str, PHYSFS_getErrorByCode(PHYSFS_getLastErrorCode())); goto END; } - size_t len; - unsigned char *data; - if (JS_IsString(argv[1])) - data = JS_ToCStringLen(js,&len,argv[1]); - else - data = JS_GetArrayBuffer(js,&len, argv[1]); + size_t wrote = js_physfs_write(js,f,argv[1]); - size_t wrote = PHYSFS_writeBytes(f,data, len); PHYSFS_close(f); - if (wrote == -1 || wrote < len) + if (wrote == -1) ret = JS_ThrowReferenceError(js,"%s", PHYSFS_getErrorByCode(PHYSFS_getLastErrorCode())); END: @@ -5989,10 +5938,11 @@ int globfs_cb(struct globdata *data, char *dir, char *file) } char **glob = data->globs; + while (*glob != NULL) { if (wildmatch(*glob, path, WM_WILDSTAR) == WM_MATCH) goto END; - *glob++; + glob++; } PHYSFS_Stat stat; @@ -6023,18 +5973,18 @@ JSC_CCALL(io_globfs, data.arr = ret; data.idx = 0; int globs_len = js_arrlen(js,argv[0]); - char *globs[globs_len+1]; + const char *globs[globs_len+1]; for (int i = 0; i < globs_len; i++) { JSValue g = JS_GetPropertyUint32(js,argv[0],i); globs[i] = JS_ToCString(js,g); JS_FreeValue(js,g); } + globs[globs_len] = NULL; data.globs = globs; const char *path = NULL; if (!JS_IsUndefined(argv[1])) path = JS_ToCString(js,argv[1]); - printf("LOOKING INTO %s\n", path); PHYSFS_enumerate(path, globfs_cb, &data); for (int i = 0; i < globs_len; i++) @@ -6100,7 +6050,7 @@ JSC_SCALL(io_enumerate, ) JSC_CCALL(io_basedir, return JS_NewString(js,PHYSFS_getBaseDir())) -JSC_CCALL(io_userdir, return JS_NewString(js,PHYSFS_getUserDir())) +JSC_SSCALL(io_prefdir, return JS_NewString(js,PHYSFS_getPrefDir(str, str2))) JSC_SCALL(io_open, PHYSFS_File *f = PHYSFS_openWrite(str); @@ -6158,7 +6108,7 @@ static const JSCFunctionListEntry js_io_funcs[] = { MIST_FUNC_DEF(io,slurpwrite,2), MIST_FUNC_DEF(io,writepath, 1), MIST_FUNC_DEF(io,basedir, 0), - MIST_FUNC_DEF(io, userdir, 0), + MIST_FUNC_DEF(io, prefdir, 2), MIST_FUNC_DEF(io, realdir, 1), MIST_FUNC_DEF(io, open, 1), MIST_FUNC_DEF(io, searchpath, 0), @@ -6174,15 +6124,8 @@ JSC_CCALL(file_close, JSC_CCALL(file_write, PHYSFS_File *f = js2PHYSFS_File(js,self); - size_t len; - unsigned char *data; - if (JS_IsString(argv[0])) - data = JS_ToCStringLen(js,&len,argv[0]); - else - data = JS_GetArrayBuffer(js,&len, argv[0]); - - size_t wrote = PHYSFS_writeBytes(f,data,len); - if (wrote == -1 || wrote < len) + size_t wrote = js_physfs_write(js,f,argv[0]); + if (wrote == -1) return JS_ThrowReferenceError(js,"%s", PHYSFS_getErrorByCode(PHYSFS_getLastErrorCode())); ) @@ -6309,6 +6252,7 @@ static JSValue js_transform_set_change_hook(JSContext *js, JSValueConst self, JS if (!JS_IsUndefined(v) && !JS_IsFunction(js,v)) return JS_ThrowReferenceError(js, "Hook must be a function."); JS_FreeValue(js,t->change_hook); t->change_hook = JS_DupValue(js,v); + return JS_UNDEFINED; } static JSValue js_transform_get_parent(JSContext *js, JSValueConst self) @@ -6590,17 +6534,6 @@ JSC_CCALL(os_gc_threshold, JS_SetGCThreshold(JS_GetRuntime(js), js2number(js,arg JSC_CCALL(os_max_stacksize, JS_SetMaxStackSize(JS_GetRuntime(js), js2number(js,argv[0]))) JSC_CCALL(os_rt_info, return JS_GetRTInfo(JS_GetRuntime(js),js)) -static JSValue tmp2js(JSContext *js,FILE *tmp) -{ - size_t size = ftell(tmp); - rewind(tmp); - char *buffer = calloc(size+1, sizeof(char)); - fread(buffer, sizeof(char),size, tmp); - JSValue ret = JS_NewString(js,buffer); - free(buffer); - return ret; -} - JSC_CCALL(os_dump_atoms, return js_dump_atoms(js); ) @@ -6641,8 +6574,8 @@ JSC_CCALL(os_mallinfo, JSJMEMRET(keepcost);*/ ) -JSC_CCALL(os_rusage, - ret = JS_NewObject(js); +JSValue js_os_rusage(JSContext *js, JSValue self, int argc, JSValue *argv) { + JSValue ret = JS_NewObject(js); #ifndef _WIN32 struct rusage jsmem; @@ -6662,7 +6595,9 @@ JSC_CCALL(os_rusage, JSJMEMRET(ru_nvcsw); JSJMEMRET(ru_nivcsw); #endif -) + + return ret; +} JSC_CCALL(os_mem, return js_get_memory_usage(js)) JSC_CCALL(os_value_id, @@ -7135,7 +7070,7 @@ JSC_CCALL(os_hostname, return JS_NewString(js,""); ) -JSC_CCALL(os_freemem, +JSValue js_os_freemem(JSContext *js, JSValue self, int argc, JSValue *argv) { #ifdef _WIN32 MEMORYSTATUSEX statex; statex.dwLength = sizeof(statex); @@ -7158,9 +7093,9 @@ JSC_CCALL(os_freemem, // Fallback: unknown return JS_NewInt64(js,0); #endif -) +} -JSC_CCALL(os_arch, +JSValue js_os_arch(JSContext *js, JSValue self, int argc, JSValue *argv) { #if defined(__x86_64__) || defined(_M_X64) return JS_NewString(js,"x64"); #elif defined(__aarch64__) || defined(_M_ARM64) @@ -7184,9 +7119,9 @@ JSC_CCALL(os_arch, #else return JS_NewString(js,"unknown"); #endif -) +} -JSC_CCALL(os_version, +JSValue js_os_version(JSContext *js, JSValue self, int argc, JSValue *argv) { #ifdef _WIN32 typedef LONG (WINAPI *RtlGetVersionPtr)(PRTL_OSVERSIONINFOW); HMODULE h = GetModuleHandleA("ntdll.dll"); @@ -7225,7 +7160,9 @@ JSC_CCALL(os_version, if (!uname(&info)) return JS_NewString(js, info.release); return JS_NewString(js, ""); #endif -) + + return JS_UNDEFINED; +} static const JSCFunctionListEntry js_os_funcs[] = { MIST_FUNC_DEF(os, make_transform, 0), @@ -7269,7 +7206,7 @@ JSC_CCALL(js_dump_class, return js_get_object_class_distribution(js)) JSC_CCALL(js_dump_type_overheads, return js_get_object_type_overheads(js)) JSC_CCALL(js_dump_objects, return js_dump_objects(js)) -JSValue cycle_fn = JS_UNDEFINED; +static JSValue cycle_fn; void cycle_hook_call(JSContext *js, JSValue v) { @@ -7336,7 +7273,7 @@ static const JSCFunctionListEntry js_video_funcs[] = { void gui_input(SDL_Event *e); // Polls and handles all input events -JSC_CCALL(os_engine_input, +JSValue js_os_engine_input(JSContext *js, JSValue self, int argc, JSValue *argv) { SDL_Event event; while (SDL_PollEvent(&event)) { #ifndef NEDITOR @@ -7346,7 +7283,8 @@ JSC_CCALL(os_engine_input, JSValue ret = JS_Call(js,argv[0], JS_UNDEFINED, 1, &e); uncaught_exception(js,ret); } -) + return JS_UNDEFINED; +} JSC_CCALL(os_push_event, SDL_UserEvent e; @@ -7714,19 +7652,21 @@ void ffi_load(JSContext *js, int argc, char **argv) { QJSCLASSPREP_FUNCS(SDL_Texture) QJSCLASSPREP_FUNCS(SDL_Renderer) QJSCLASSPREP_FUNCS(SDL_Camera) - QJSCLASSPREP_FUNCS(SDL_Cursor) QJSCLASSPREP_FUNCS(SDL_GPUDevice) QJSCLASSPREP_FUNCS(SDL_GPUTexture) QJSCLASSPREP_FUNCS(SDL_GPUCommandBuffer) QJSCLASSPREP_FUNCS(SDL_GPURenderPass) QJSCLASSPREP_FUNCS(SDL_GPUComputePass) - QJSCLASSPREP_FUNCS(SDL_GPUCopyPass) - QJSCLASSPREP_FUNCS(SDL_GPUFence) - QJSCLASSPREP_FUNCS(SDL_GPUTransferBuffer) - QJSCLASSPREP_FUNCS(SDL_GPUShader) - QJSCLASSPREP_FUNCS(SDL_GPUSampler) - QJSCLASSPREP_FUNCS(SDL_GPUGraphicsPipeline) - QJSCLASSPREP_FUNCS(SDL_GPUComputePipeline) + + QJSCLASSPREP_NO_FUNCS(SDL_Cursor) + QJSCLASSPREP_NO_FUNCS(SDL_GPUCopyPass) + QJSCLASSPREP_NO_FUNCS(SDL_GPUFence) + QJSCLASSPREP_NO_FUNCS(SDL_GPUTransferBuffer) + QJSCLASSPREP_NO_FUNCS(SDL_GPUShader) + QJSCLASSPREP_NO_FUNCS(SDL_GPUSampler) + QJSCLASSPREP_NO_FUNCS(SDL_GPUGraphicsPipeline) + QJSCLASSPREP_NO_FUNCS(SDL_GPUComputePipeline) + QJSCLASSPREP_FUNCS(sprite) // QJSCLASSPREP_FUNCS(SDL_GPUGraphicsPipeline) // QJSCLASSPREP_FUNCS(SDL_GPUSampler) @@ -7874,5 +7814,8 @@ void ffi_load(JSContext *js, int argc, char **argv) { JS_SetPropertyStr(js,globalThis,"prosperon", prosp); + idx_buffer = JS_UNDEFINED; + cycle_fn = JS_UNDEFINED; + JS_FreeValue(js,globalThis); } diff --git a/source/model.c b/source/model.c index ef8f28cb..7b02b618 100644 --- a/source/model.c +++ b/source/model.c @@ -1,7 +1,6 @@ #include "model.h" #include "stb_ds.h" -#include "gameobject.h" #include "render.h" @@ -18,59 +17,19 @@ #include "jsffi.h" -unsigned short pack_short_tex(float c) { return c * USHRT_MAX; } - SDL_GPUBuffer *texcoord_floats(float *f, int n) { - unsigned short packed[n]; - for (int i = 0; i < n; i++) { - float v = f[i]; - if (v < 0) v = 0; - if (v > 1) v = 1; - packed[i] = pack_short_tex(v); - } - -/* return sg_make_buffer(&(sg_buffer_desc){ - .data = SG_RANGE(packed), - .label = "tex coord vert buffer", - });*/ - return NULL; -} - -SDL_GPUBuffer *par_idx_buffer(uint32_t *p, int v) -{ - uint16_t idx[v]; - for (int i = 0; i < v; i++) idx[i] = p[i]; - -/* return sg_make_buffer(&(sg_buffer_desc){ - .data = SG_RANGE(idx), - .type = SG_BUFFERTYPE_INDEXBUFFER - });*/ return NULL; } SDL_GPUBuffer *float_buffer(float *f, int v) { return NULL; -/* return sg_make_buffer(&(sg_buffer_desc){ - .data = (sg_range){ - .ptr = f, - .size = sizeof(*f)*v - } - });*/ } SDL_GPUBuffer *index_buffer(float *f, int verts) { return NULL; -/* uint16_t idxs[verts]; - for (int i = 0; i < verts; i++) - idxs[i] = f[i]; - - return sg_make_buffer(&(sg_buffer_desc){ - .data = SG_RANGE(idxs), - .type = SG_BUFFERTYPE_INDEXBUFFER, - });*/ } uint32_t pack_int10_n2(float *norm) @@ -87,71 +46,21 @@ uint32_t pack_int10_n2(float *norm) SDL_GPUBuffer *normal_floats(float *f, int n) { return float_buffer(f, n); -/* uint32_t packed_norms[n/3]; - for (int v = 0, i = 0; v < n/3; v++, i+= 3) - packed_norms[v] = pack_int10_n2(f+i); - - return sg_make_buffer(&(sg_buffer_desc){ - .data = SG_RANGE(packed_norms), - .label = "normal vert buffer", - });*/ } SDL_GPUBuffer *ubyten_buffer(float *f, int v) { return NULL; -/* unsigned char b[v]; - for (int i = 0; i < (v); i++) - b[i] = f[i]*255; - - return sg_make_buffer(&(sg_buffer_desc){.data=SG_RANGE(b)});*/ } SDL_GPUBuffer *ubyte_buffer(float *f, int v) { return NULL; -/* unsigned char b[v]; - for (int i = 0; i < (v); i++) - b[i] = f[i]; - - return sg_make_buffer(&(sg_buffer_desc){.data=SG_RANGE(b)}); -*/ } SDL_GPUBuffer *accessor2buffer(cgltf_accessor *a, int type) { return NULL; -/* int n = cgltf_accessor_unpack_floats(a, NULL, 0); - float vs[n]; - cgltf_accessor_unpack_floats(a, vs, n); - - switch(type) { - case MAT_POS: - return sg_make_buffer(&(sg_buffer_desc){ - .data.ptr = vs, - .data.size = sizeof(float)*n - }); - case MAT_NORM: - return normal_floats(vs,n); - case MAT_TAN: - return normal_floats(vs,n); // TODO: MAKE A TANGENT READER - case MAT_COLOR: - return ubyten_buffer(vs,n); - case MAT_WEIGHT: - return ubyten_buffer(vs,n); - case MAT_BONE: - return ubyte_buffer(vs,n); - case MAT_UV: - return texcoord_floats(vs,n); - case MAT_INDEX: - return index_buffer(vs,n); - } - - return sg_make_buffer(&(sg_buffer_desc) { - .data.size = 4, - .usage = SG_USAGE_STREAM - }); -*/ } void packFloats(float *src, float *dest, int srcLength) { diff --git a/source/model.h b/source/model.h index 3313011e..8bb2dd67 100644 --- a/source/model.h +++ b/source/model.h @@ -3,7 +3,6 @@ #include "HandmadeMath.h" #include "transform.h" -#include "gameobject.h" #include "anim.h" #include "cgltf.h" diff --git a/source/prosperon.c b/source/prosperon.c index 0e45ae47..1836b097 100644 --- a/source/prosperon.c +++ b/source/prosperon.c @@ -96,7 +96,7 @@ int main(int argc, char **argv) { prosperon = argv[0]; PHYSFS_init(argv[0]); - char *base = PHYSFS_getBaseDir(); + const char *base = PHYSFS_getBaseDir(); PHYSFS_setWriteDir(base); PHYSFS_mount(base, "/", 0); diff --git a/source/qjs_imgui.cpp b/source/qjs_imgui.cpp index 60dbcaf0..a336a3d6 100644 --- a/source/qjs_imgui.cpp +++ b/source/qjs_imgui.cpp @@ -3,6 +3,8 @@ #include "imnodes.h" #include "quickjs.h" +#include + #include #include #include "imgui_impl_sdl3.h" @@ -163,11 +165,10 @@ JSC_SCALL(imgui_plot, fill_plotdata(js, argv[1], argv[3]); \ bool shaded = JS_ToBool(js,argv[2]);\ int flag = 0; \ + if (shaded) flag = SHADED; \ + ImPlot::FN(str, &plotdata[0].x, &plotdata[0].y, arrlen(plotdata), ADD flag, 0, sizeof(ImVec2)); \ ) \ -//if (shaded) flag = SHADED; -// ImPlot::FN(str, &plotdata[0].x, &plotdata[0].y, arrlen(plotdata), ADD flag, 0, sizeof(ImVec2)); - static ImVec2 *plotdata = NULL; void fill_plotdata(JSContext *js, JSValue v, JSValue last) @@ -204,40 +205,19 @@ PLOT_FN(digitalplot, PlotDigital,,0) JSC_SCALL(imgui_barplot, fill_plotdata(js, argv[1], JS_UNDEFINED); -// ImPlot::PlotBars(str, &plotdata[0].x, &plotdata[0].y, js_arrlen(js, argv[1]), js2number(js, argv[2]), 0, 0, sizeof(ImVec2)); + ImPlot::PlotBars(str, &plotdata[0].x, &plotdata[0].y, js_arrlen(js, argv[1]), js2number(js, argv[2]), 0, 0, sizeof(ImVec2)); ) JSC_SCALL(imgui_histogramplot, size_t offset, len, per_e; JSValue typed = JS_GetTypedArrayBuffer(js, argv[1], &offset, &len, &per_e); -// ImPlot::PlotHistogram(str, JS_GetArrayBuffer(js, NULL, typed), js_arrlen(js, argv[1])); + ImPlot::PlotHistogram(str, JS_GetArrayBuffer(js, NULL, typed), js_arrlen(js, argv[1])); JS_FreeValue(js, typed); ) -JSC_SCALL(imgui_heatplot, - int rows = js2number(js, argv[2]); - int cols = js2number(js, argv[3]); -// if (rows*cols == (int)js_arrlen(js, argv[1])) -// ImPlot::PlotHeatmap(str, histodata, rows, cols); -) - -JSC_CCALL(imgui_pieplot, -/* if (js_arrlen(js, argv[0]) != js_arrlen(js, argv[1])) return JS_UNDEFINED; - - const char *labels[js_arrlen(js, argv[0])]; - for (int i = 0; i < js_arrlen(js, argv[0]); i++) - labels[i] = JS_ToCString(js, js_getpropidx(argv[0], i)); - - fill_histodata(argv[1]); - ImPlot::PlotPieChart(labels, histodata, js_arrlen(js, argv[1]), js2number(js, argv[2]), js2number(js, argv[3]), js2number(js, argv[4])); - - for (int i = 0; i < js_arrlen(js, argv[0]); i++) - JS_FreeCString(js,labels[i]);*/ -) - JSC_SCALL(imgui_textplot, ImVec2 c = js2vec2(js, argv[1]); -// ImPlot::PlotText(str, c.x, c.y); + ImPlot::PlotText(str, c.x, c.y); ) JSC_CCALL(imgui_inplot, @@ -275,7 +255,7 @@ JSC_SSCALL(imgui_textinput, if (JS_IsUndefined(argv[1])) buffer[0] = 0; else - strncpy(buffer, str2, 512); + strncpy(buffer, str2, sizeof(buffer)-1); ImGui::InputText(str, buffer, sizeof(buffer)); if (strcmp(buffer, str2)) @@ -289,7 +269,7 @@ JSC_SSCALL(imgui_textbox, if (JS_IsUndefined(argv[1])) buffer[0] = 0; else - strncpy(buffer, str2, 512); + strncpy(buffer, str2, sizeof(buffer)-1); ImGui::InputTextMultiline(str, buffer, sizeof(buffer)); if (strcmp(buffer, str2)) @@ -834,7 +814,6 @@ const JSCFunctionListEntry js_imgui_funcs[] = { MIST_FUNC_DEF(imgui, stairplot, 4), MIST_FUNC_DEF(imgui, digitalplot, 4), MIST_FUNC_DEF(imgui, barplot, 3), - MIST_FUNC_DEF(imgui, pieplot, 5), MIST_FUNC_DEF(imgui, textplot, 2), MIST_FUNC_DEF(imgui, histogramplot, 2), MIST_FUNC_DEF(imgui, plotaxes, 2), diff --git a/source/qjs_macros.h b/source/qjs_macros.h index 7586540e..47f05fb5 100644 --- a/source/qjs_macros.h +++ b/source/qjs_macros.h @@ -134,10 +134,13 @@ JS_SetPropertyStr(js, globalThis, #NAME, NAME); \ /* Defines a class and uses its function list as its prototype */ #define QJSCLASSPREP_FUNCS(TYPE) \ +QJSCLASSPREP_NO_FUNCS(TYPE) \ +JS_SetPropertyFunctionList(js, TYPE##_proto, js_##TYPE##_funcs, countof(js_##TYPE##_funcs)); \ + +#define QJSCLASSPREP_NO_FUNCS(TYPE) \ JS_NewClassID(&js_##TYPE##_id);\ JS_NewClass(JS_GetRuntime(js), js_##TYPE##_id, &js_##TYPE##_class);\ JSValue TYPE##_proto = JS_NewObject(js); \ -JS_SetPropertyFunctionList(js, TYPE##_proto, js_##TYPE##_funcs, countof(js_##TYPE##_funcs)); \ JS_SetClassProto(js, js_##TYPE##_id, TYPE##_proto); \ JS_SetPropertyStr(js, c_types, #TYPE, JS_DupValue(js,TYPE##_proto)); \ @@ -147,8 +150,6 @@ JSValue js_##NAME##_use(JSContext *js) { \ JS_SetPropertyFunctionList(js,mod,js_##NAME##_funcs,countof(js_##NAME##_funcs)); \ return mod; } \ -#define MISTLINE(NAME) (ModuleEntry){ #NAME, js_##NAME##_funcs, countof(js_##NAME##_funcs) } - #define countof(x) (sizeof(x)/sizeof((x)[0])) diff --git a/source/qjs_tracy.c b/source/qjs_tracy.c index 082f1ab0..da9db3c0 100644 --- a/source/qjs_tracy.c +++ b/source/qjs_tracy.c @@ -37,6 +37,7 @@ static JSValue js_tracy_fiber_leave(JSContext *js, JSValue self, int argc, JSVal const char *str = JS_AtomToCString(js, atom); TracyCFiberLeave(str); JS_FreeAtom(js,atom); + return JS_UNDEFINED; } static JSValue js_tracy_plot(JSContext *js, JSValue self, int argc, JSValue *argv) @@ -61,7 +62,15 @@ static JSValue js_tracy_plot_config(JSContext *js, JSValue self, int argc, JSVal return JS_UNDEFINED; #endif -// TracyCPlotConfig(str, js2number(js,argv[1]), JS_ToBool(js,argv[2]), JS_ToBool(js,argv[3]), js2number(js,argv[4])) + const char *str = JS_ToCString(js,argv[0]); + + uint32_t type, color; + + JS_ToUint32(js,&type, argv[1]); + JS_ToUint32(js,&color,argv[4]); + TracyCPlotConfig(str, type, JS_ToBool(js,argv[2]), JS_ToBool(js,argv[3]), color); + JS_FreeCString(js,str); + return JS_UNDEFINED; } @@ -73,6 +82,8 @@ static JSValue js_tracy_frame_mark(JSContext *js, JSValue self, int argc, JSValu #endif TracyCFrameMark + + return JS_UNDEFINED; } static JSValue js_tracy_message(JSContext *js, JSValue self, int argc, JSValue *argv) @@ -86,6 +97,7 @@ static JSValue js_tracy_message(JSContext *js, JSValue self, int argc, JSValue * const char *str = JS_ToCStringLen(js, &len, argv[0]); TracyCMessage(str,len); JS_FreeCString(js,str); + return JS_UNDEFINED; } static JSValue js_tracy_thread_name(JSContext *js, JSValue self, int argc, JSValue *argv) @@ -98,6 +110,7 @@ static JSValue js_tracy_thread_name(JSContext *js, JSValue self, int argc, JSVal const char *str = JS_ToCString(js, argv[0]); TracyCSetThreadName(str); JS_FreeCString(js,str); + return JS_UNDEFINED; } static JSValue js_tracy_zone_begin(JSContext *js, JSValue self, int argc, JSValue *argv) @@ -121,6 +134,7 @@ static JSValue js_tracy_zone_begin(JSContext *js, JSValue self, int argc, JSValu JS_Call(js, argv[0], JS_UNDEFINED, 0, NULL); TracyCZoneEnd(TCTX); + return JS_UNDEFINED; } #ifdef SOKOL_GLCORE @@ -189,7 +203,6 @@ static JSValue js_tracy_gpu_zone_begin(JSContext *js, JSValue self, int argc, JS }; ___tracy_emit_gpu_zone_end(enddata); qhead = (qhead+1)%query_count; - return ret; } @@ -477,8 +490,8 @@ static JSValue js_tracy_image(JSContext *js, JSValue self, int argc, JSValue *ar /* SDL_Surface *img = js2SDL_Surface(js,argv[0]); SDL_Surface *scaled = SDL_ScaleSurface(img, 320,180,SDL_SCALEMODE_LINEAR); ___tracy_emit_frame_image(scaled->pixels, scaled->w,scaled->h, 0,0); - SDL_DestroySurface(scaled); - return JS_UNDEFINED;*/ + SDL_DestroySurface(scaled);*/ + return JS_UNDEFINED; } #endif @@ -513,6 +526,8 @@ JSValue js_tracy_level(JSContext *js, JSValue selff, int argc, JSValue *argv) js_debug_sethook(js, tracy_call_hook, JS_HOOK_CALL); js_debug_sethook(js, tracy_end_hook, JS_HOOK_RET); } + + return JS_UNDEFINED; } static const JSCFunctionListEntry js_tracy_funcs[] = { @@ -523,11 +538,13 @@ static const JSCFunctionListEntry js_tracy_funcs[] = { JS_CFUNC_DEF("gpu_init", 0, js_tracy_gpu_init), JS_CFUNC_DEF("gpu_sync", 0, js_tracy_gpu_sync), JS_CFUNC_DEF("end_frame", 0, js_tracy_frame_mark), + JS_CFUNC_DEF("thread_name", 1, js_tracy_thread_name), JS_CFUNC_DEF("zone", 1, js_tracy_zone_begin), JS_CFUNC_DEF("message", 1, js_tracy_message), JS_CFUNC_DEF("plot", 2, js_tracy_plot), JS_CFUNC_DEF("image", 3, js_tracy_image), JS_CFUNC_DEF("level", 1, js_tracy_level), + JS_CFUNC_DEF("plot_config", 5, js_tracy_plot_config), }; JSValue js_tracy_use(JSContext *js) diff --git a/source/quadtree.c b/source/quadtree.c deleted file mode 100644 index 0c635c4d..00000000 --- a/source/quadtree.c +++ /dev/null @@ -1,246 +0,0 @@ -#include -#include -#include -#include - -#include "aabb.h" - -/// Default node size cap -#define QTREE_STDCAP 4 - -/// A function pointer def for determining if an element exists in a range -typedef int (*qtree_fnc)(void *ptr, aabb *range); -typedef int (*qtree_rm)(void *ptr, void *cmp); - -/// Quadtree node -typedef struct qnode { - uint16_t cnt; ///< Number of elements in this node - aabb bound; ///< Area this node covers - void **elist; ///< List of element pointers - struct qnode *nw; ///< NW quadrant of this node - struct qnode *ne; ///< NE quadrant of this node - struct qnode *sw; ///< SW quadrant of this node - struct qnode *se; ///< SE quadrant of this node -} qnode; - -/// Quadtree container -typedef struct _qtree { - uint16_t maxnodecap; ///< Maximum element count per node - qnode *root; ///< Root node - qtree_fnc cmpfnc; ///< Element range compare function pointer - qtree_rm rmfnc; -} _qtree; - -typedef struct _qtree* qtree; - -/// Simple container for returning found elements -typedef struct retlist { - uint32_t cnt; ///< Number of elements found - aabb range; ///< Range to use for searching - void **list; ///< Array of pointers to found elements -} retlist; - -static void retlist_add(retlist *r, void *p) { - r->list = realloc(r->list, sizeof(void*)*(r->cnt+1)); - r->list[r->cnt] = p; - r->cnt++; -} - -static uint16_t qtree_getMaxNodeCnt(qtree q) { - uint16_t r; - r = q->maxnodecap; - return r; -} - -static qnode* qnode_new(qtree p, float x, float y, float hW, float hH) { - qnode *q = malloc(sizeof(qnode)); - memset(q, 0, sizeof(qnode)); - q->bound.center.x = x; - q->bound.center.y = y; - q->bound.dims.w = hW; - q->bound.dims.h = hH; - - return q; -} - -static void qnode_free(qtree q, qnode *qn) { - if(qn->cnt) - free(qn->elist); - - qn->cnt = 0; - - if(qn->nw) { - qnode_free(q, qn->nw); - qnode_free(q, qn->ne); - qnode_free(q, qn->sw); - qnode_free(q, qn->se); - } - - free(qn); -} - -static void add(qnode *q, void *p) { - q->elist = realloc(q->elist, sizeof(void*)*(q->cnt+1)); - q->elist[q->cnt] = p; - q->cnt++; -} - -static void drop(qnode *q, uint16_t idx) { - void **narry = malloc(sizeof(void*)*(q->cnt-1)); - - // This is a little (lot) ugly; a pair of memcpy's would be - // better, but I had some problems with it - for(uint16_t i=0,skip=0; icnt; i++) { - if(i == idx) { skip++; continue; } - narry[i-skip] = q->elist[i]; - } - - void **old = q->elist; - q->elist = narry; - free(old); - q->cnt--; -} - -static void subdivide(qtree p, qnode *q) { - float cx = q->bound.center.x; - float cy = q->bound.center.y; - float hw = q->bound.dims.w/2; - float hh = q->bound.dims.h/2; - - q->nw = qnode_new(p, cx-hw, cy-hh, hw, hh); - q->ne = qnode_new(p, cx+hw, cy-hh, hw, hh); - q->sw = qnode_new(p, cx-hw, cy+hh, hw, hh); - q->se = qnode_new(p, cx+hw, cy+hh, hw, hh); -} - -static int qnode_insert(qtree q, qnode *qn, void *ptr) { - int ret = 0; - - if(! (q->cmpfnc)(ptr, &qn->bound)) return 0; - - if(qn->cnt < qtree_getMaxNodeCnt(q)) { - add(qn, ptr); - return 1; - } - - if(! qn->nw) - subdivide(q, qn); - - if(qnode_insert(q,qn->nw,ptr)) - return 1; - else if(qnode_insert(q,qn->ne,ptr)) - return 1; - else if(qnode_insert(q,qn->sw,ptr)) - return 1; - else if(qnode_insert(q,qn->se,ptr)) - return 1; -} - -static void* qnode_remove(qtree q, qnode *qn, void *ptr) { - if(qn->cnt) { - for(uint16_t i=0; icnt; i++) { - if(q->rmfnc(qn->elist[i], ptr)) { - drop(qn, i); - ptr = NULL; - goto QN_REM_EXIT; - } - } - } - - if(! qn->nw) - return NULL; - - if(qnode_remove(q, qn->nw, ptr)) return ptr; - if(qnode_remove(q, qn->ne, ptr)) return ptr; - if(qnode_remove(q, qn->sw, ptr)) return ptr; - if(qnode_remove(q, qn->se, ptr)) return ptr; - - return NULL; - QN_REM_EXIT: - return ptr; -} - -static void qnode_getInRange(qtree q, qnode *qn, retlist *r) { - if(qn->cnt) { - if(! aabb_intersects(&qn->bound, &r->range)) - goto QN_GET_EXIT; - - for(uint16_t i=0; icnt; i++) - if((q->cmpfnc)(qn->elist[i], &r->range)) - retlist_add(r, qn->elist[i]); - } - - if(! qn->nw) - goto QN_GET_EXIT; - - qnode_getInRange(q, qn->nw, r); - qnode_getInRange(q, qn->ne, r); - qnode_getInRange(q, qn->sw, r); - qnode_getInRange(q, qn->se, r); - - QN_GET_EXIT: - return; -} - -qtree qtree_new(float x, float y, float w, float h, qtree_fnc fnc, qtree_rm rm) { - qtree q = malloc(sizeof(_qtree)); - memset(q, 0, sizeof(_qtree)); - - q->maxnodecap = QTREE_STDCAP; - q->cmpfnc = fnc; - q->rmfnc = rm; - q->root = qnode_new(q, x+(w/2),y+(h/2),w/2,h/2); - - return q; -} - -void qtree_destroy(qtree q) { - void *m; - if(q->root) qnode_free(q, q->root); - - memset(q, 0, sizeof(_qtree)); - - free(q); -} - -void qtree_insert(qtree q, void *ptr) { - qnode_insert(q, q->root, ptr); -} - -void qtree_remove(qtree q, void *ptr) { - qnode_remove(q, q->root, ptr); -} - -void qtree_setMaxNodeCnt(qtree q, uint16_t cnt) { - q->maxnodecap = cnt || 1; -} - -void qtree_clear(qtree q) { - float x = q->root->bound.center.x; - float y = q->root->bound.center.y; - float w = q->root->bound.dims.w; - float h = q->root->bound.dims.h; - qnode *qn = q->root; - - q->root = qnode_new(q, x, y, w, h); - - qnode_free(q, qn); -} - -void** qtree_findInArea(qtree q, float x, float y, float w, float h, uint32_t *cnt) { - float hw = w/2; - float hh = h/2; - - retlist ret; - memset(&ret, 0, sizeof(retlist)); - - ret.range.center.x = x+hw; - ret.range.center.y = y+hh; - ret.range.dims.w = hw; - ret.range.dims.h = hh; - - qnode_getInRange(q, q->root, &ret); - - *cnt = ret.cnt; - return ret.list; -} diff --git a/source/quadtree.h b/source/quadtree.h deleted file mode 100644 index 35b28272..00000000 --- a/source/quadtree.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - quadtree.h - 2014 JSK (kutani@projectkutani.com) - - Part of the Panic Panic project. - - Released to the public domain. See LICENSE for details. -*/ -#ifndef _QUADTREE_H - #define _QUADTREE_H - -#ifndef _AABB_H - #include "aabb.h" -#endif - -/// Opaque pointer to a quadtree data structure -typedef struct _qtree* qtree; - -/// A function pointer def for determining if an element exists in a range -typedef int (*qtree_fnc)(void *ptr, aabb *range); - -typedef int (*qtree_rm)(void *ptr, void *cmp); - -/// Create a new qtree -/*! - Creates a new qtree with a bound of w,h size, centered at x,y. - - Uses the passed function pointer fnc to test elements against nodes - for insertion, and finding. - - Returns a new qtree pointer. -*/ -qtree qtree_new(float x, float y, float w, float h, qtree_fnc fnc, qtree_rm rm); - -void qtree_destroy(qtree q); - -/// Insert an element -/*! - Inserts the passed element into quadtree q. - - Uses the function passed to qtree_new() to determine where the - element should go. -*/ -void qtree_insert(qtree q, void *ptr); - -/// Removes an element from the quadtree -/*! - Performs a selective removal of the passed element. - - Performs a naive pointer comparison and a depth-first search of the - tree, so this isn't very fast. -*/ -void qtree_remove(qtree q, void *ptr); - -/// Set the maximum number of elements per node -/*! - Sets the maximum elements per quadtree node. - - The default is 4. -*/ -void qtree_setMaxNodeCnt(qtree q, uint16_t cnt); - -/// Resets a quadtree -/*! - Clears all nodes held by the quadtree and creates a fresh root node - with no elements assigned. -*/ -void qtree_clear(qtree q); - -/// Find all elements within a rectangular bound -/*! - Performs a search for any elements within the given x,y + w,h - bound. Returns an array of pointers to any elements (which should be - freed by the user), and places the number of elements in cnt. -*/ -void** qtree_findInArea(qtree q, float x, float y, float w, float h, uint32_t *cnt); - -#endif diff --git a/source/script.c b/source/script.c index b2057728..bab86fe1 100644 --- a/source/script.c +++ b/source/script.c @@ -27,7 +27,7 @@ static JSRuntime *rt = NULL; JSContext *global_js = NULL; -JSValue on_exception = JS_UNDEFINED; +JSValue on_exception; #define ENGINE "scripts/core/engine.js" @@ -158,6 +158,8 @@ void script_startup(int argc, char **argv) { JS_AddIntrinsicBigDecimal(js); JS_AddIntrinsicOperators(js); + on_exception = JS_UNDEFINED; + ffi_load(js, argc, argv); PHYSFS_File *eng = PHYSFS_openRead(ENGINE); diff --git a/source/spline.c b/source/spline.c index ae9a64c3..1115f44d 100644 --- a/source/spline.c +++ b/source/spline.c @@ -3,318 +3,447 @@ #include "transform.h" #include "math.h" +/* ------------------------------------------------------------------------- + Cubic Spline Basis Matrices + ------------------------------------------------------------------------- */ static const HMM_Mat4 cubic_hermite_m = { - 2, -2, 1, 1, - -3, 3, -2, -1, - 0, 0, 1, 0, - 1, 0, 0, 0 + 2, -2, 1, 1, + -3, 3, -2, -1, + 0, 0, 1, 0, + 1, 0, 0, 0 }; static const HMM_Mat4 cubic_hermite_dm = { - 0, 0, 0, 0, - 6, -6, 3, 3, - -6, 6, -4, -2, - 0, 0, 1, 0 + 0, 0, 0, 0, + 6, -6, 3, 3, + -6, 6, -4, -2, + 0, 0, 1, 0 }; static const HMM_Mat4 cubic_hermite_ddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - 12, -12, 6, 6, - -6, 6, -4, -2 + 0, 0, 0, 0, + 0, 0, 0, 0, + 12, -12, 6, 6, + -6, 6, -4, -2 }; static const HMM_Mat4 cubic_hermite_dddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 12, -12, 6, 6 + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 12, -12, 6, 6 }; static const HMM_Mat4 b_spline_m = { - -1/6, 3/6, -3/6, 1, - 3/6, -6/6, 3/6, 0, - -3/6, 0, 3/6, 0, - 1/6, 4/6, 1/6, 0 + -1.0f/6, 3.0f/6, -3.0f/6, 1.0f, + 3.0f/6, -6.0f/6, 3.0f/6, 0.0f, + -3.0f/6, 0.0f, 3.0f/6, 0.0f, + 1.0f/6, 4.0f/6, 1.0f/6, 0.0f }; static const HMM_Mat4 b_spline_dm = { - 0, 0, 0, 0, - -3/6, 9/6, -9/6, 3, - 6/6, -12/6, 6/6, 0, - -3/6, 0, 3/6, 0 + 0, 0, 0, 0, + -3.0f/6, 9.0f/6, -9.0f/6, 3.0f, + 6.0f/6, -12.0f/6, 6.0f/6, 0.0f, + -3.0f/6, 0.0f, 3.0f/6, 0.0f }; static const HMM_Mat4 b_spline_ddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - -6/6, 18/6, -18/6, 6, - 6/6, -12/6, 6/6, 0 + 0, 0, 0, 0, + 0, 0, 0, 0, + -6.0f/6, 18.0f/6, -18.0f/6, 6.0f, + 6.0f/6, -12.0f/6, 6.0f/6, 0.0f }; static const HMM_Mat4 b_spline_dddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - -6/6, 18/6, -18/6, 6 + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + -6.0f/6, 18.0f/6, -18.0f/6, 6.0f }; static const HMM_Mat4 bezier_m = { - -1, 3, -3, 1, - 3, -6, 3, 0, - -3, 3, 0, 0, - 1, 0, 0, 0 + -1, 3, -3, 1, + 3, -6, 3, 0, + -3, 3, 0, 0, + 1, 0, 0, 0 }; static const HMM_Mat4 bezier_dm = { - 0, 0, 0, 0, - -3, 9, -9, 3, - 6, -12, 6, 0, - -3, 3, 0, 0, + 0, 0, 0, 0, + -3, 9, -9, 3, + 6, -12, 6, 0, + -3, 3, 0, 0 }; static const HMM_Mat4 bezier_ddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - -6, 18, -18, 6, - 6, -12, 6, 0 + 0, 0, 0, 0, + 0, 0, 0, 0, + -6, 18, -18, 6, + 6, -12, 6, 0 }; static const HMM_Mat4 bezier_dddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - -6, 18, -18, 6 + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + -6, 18, -18, 6 }; -#define CAT_S 0.5 +/* Catmull–Rom (with tension = 0.5 by default) */ +#define CAT_S 0.5f -/* Position */ static const HMM_Mat4 catmull_rom_m = { - -CAT_S, 2-CAT_S, CAT_S-2, CAT_S, - 2*CAT_S, CAT_S-3, 3-2*CAT_S, -CAT_S, - -CAT_S, 0, CAT_S, 0, - 0, 1, 0, 0 + -CAT_S, 2-CAT_S, CAT_S-2, CAT_S, + 2*CAT_S, CAT_S-3, 3-2*CAT_S, -CAT_S, + -CAT_S, 0, CAT_S, 0, + 0, 1, 0, 0 }; -/* Tangent */ static const HMM_Mat4 catmull_rom_dm = { - 0, 0, 0, 0, - -3*CAT_S, 9*CAT_S, -9*CAT_S, 3*CAT_S, - 4*CAT_S, -10*CAT_S, 8*CAT_S, -2*CAT_S, - -CAT_S, 0, CAT_S, 0, + 0, 0, 0, 0, + -3*CAT_S, 9*CAT_S, -9*CAT_S, 3*CAT_S, + 4*CAT_S, -10*CAT_S, 8*CAT_S, -2*CAT_S, + -CAT_S, 0, CAT_S, 0 }; -/* Curvature */ static const HMM_Mat4 catmull_rom_ddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - -9*CAT_S, 18*CAT_S, -18*CAT_S, 6*CAT_S, - 4*CAT_S, -10*CAT_S, 8*CAT_S, -2*CAT_S + 0, 0, 0, 0, + 0, 0, 0, 0, + -9*CAT_S, 18*CAT_S, -18*CAT_S, 6*CAT_S, + 4*CAT_S, -10*CAT_S, 8*CAT_S, -2*CAT_S }; -/* Wiggle */ static const HMM_Mat4 catmull_rom_dddm = { - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - -9*CAT_S, 18*CAT_S, -18*CAT_S, 6*CAT_S + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + -9*CAT_S, 18*CAT_S, -18*CAT_S, 6*CAT_S }; -/* - [t3 t2 t1 1] B [p1 - p2 that is, point 1, tangent at point 1, point 2, tan and point 2 - t1 - t2] - -*/ - +/* ------------------------------------------------------------------------- + Core “C·T” multiplication: [ t^3, t^2, t, 1 ] * C + ------------------------------------------------------------------------- */ HMM_Vec4 spline_CT(HMM_Mat4 *C, float t) { - float t2 = t*t; - float t3 = t2*t; - HMM_Vec4 T = {t3, t2, t, 1}; - return HMM_MulM4V4(*C, T); + float t2 = t * t; + float t3 = t2 * t; + HMM_Vec4 T = { t3, t2, t, 1.0f }; + return HMM_MulM4V4(*C, T); } +/* Construct the “geometry matrix” G from four 2D points, then multiply by B */ HMM_Mat4 make_C(const HMM_Vec2 *p, const HMM_Mat4 *B) { - HMM_Mat4 G; - G.Columns[0].xy = p[0]; - G.Columns[1].xy = p[1]; - G.Columns[2].xy = p[2]; - G.Columns[3].xy = p[3]; - return HMM_MulM4(G, *B); + HMM_Mat4 G = HMM_M4(); // Zeroed out + // Only fill XY of each column; if you are storing 3D in HMM_Vec4, adapt as needed + G.Columns[0].XY = p[0]; + G.Columns[1].XY = p[1]; + G.Columns[2].XY = p[2]; + G.Columns[3].XY = p[3]; + + return HMM_MulM4(G, *B); } +/* Evaluate a single-segment cubic spline at parameter d in [0,1]. + p must be 4 control points, m is the cubic basis matrix. */ HMM_Vec2 cubic_spline_d(HMM_Vec2 *p, HMM_Mat4 *m, float d) { - HMM_Mat4 C = make_C(p, m); - return spline_CT(&C, d).xy; + HMM_Mat4 C = make_C(p, m); + HMM_Vec4 v4 = spline_CT(&C, d); + return v4.XY; } +/* ------------------------------------------------------------------------- + Convenience single-segment functions for each basis + (pos / tan / curv / wig all require the appropriate matrix) + Typically you pass p[0..3] as the 4 relevant control points. + ------------------------------------------------------------------------- */ +HMM_Vec2 cubic_hermite_pos(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&cubic_hermite_m, d); } +HMM_Vec2 cubic_hermite_tan(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&cubic_hermite_dm, d); } +HMM_Vec2 cubic_hermite_curv(HMM_Vec2 *p, float d){ return cubic_spline_d(p, (HMM_Mat4 *)&cubic_hermite_ddm, d); } +HMM_Vec2 cubic_hermite_wig(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&cubic_hermite_dddm, d); } + +HMM_Vec2 b_spline_pos(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&b_spline_m, d); } +HMM_Vec2 b_spline_tan(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&b_spline_dm, d); } +HMM_Vec2 b_spline_curv(HMM_Vec2 *p, float d){ return cubic_spline_d(p, (HMM_Mat4 *)&b_spline_ddm, d); } +HMM_Vec2 b_spline_wig(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&b_spline_dddm, d); } + +HMM_Vec2 bezier_pos(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&bezier_m, d); } +HMM_Vec2 bezier_tan(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&bezier_dm, d); } +HMM_Vec2 bezier_curv(HMM_Vec2 *p, float d){ return cubic_spline_d(p, (HMM_Mat4 *)&bezier_ddm, d); } +HMM_Vec2 bezier_wig(HMM_Vec2 *p, float d) { return cubic_spline_d(p, (HMM_Mat4 *)&bezier_dddm, d); } + +/* ------------------------------------------------------------------------- + Multi-segment sampling (“spline_v2”) for uniform division + ------------------------------------------------------------------------- */ HMM_Vec2 *spline_v2(HMM_Vec2 *p, HMM_Mat4 *m, int segs) { - HMM_Vec2 *ret = NULL; - if (segs < 2) return NULL; - - HMM_Mat4 C = make_C(p, m); - float s = (float)1/segs; + // For a single 4-point segment, produce 'segs' points along [0..1). + // If you want the final 1.0 also, you can do <= 1.0 in the loop, etc. + HMM_Vec2 *ret = NULL; + if (segs < 2) return NULL; - for (float t = 0; t < 1; t += s) - arrput(ret, spline_CT(&C, t).xy); - - return ret; + HMM_Mat4 C = make_C(p, m); + float s = 1.0f / (float)segs; + for (int i = 0; i <= segs; i++) + { + float t = s * i; + arrput(ret, spline_CT(&C, t).XY); + } + return ret; } +/* ------------------------------------------------------------------------- + Adaptive subdivision by min segment length + (spline2d_min_seg) – for a single 4-point segment + ------------------------------------------------------------------------- */ HMM_Vec2 *spline2d_min_seg(float u0, float u1, float min_seg, HMM_Mat4 *C, HMM_Vec2 *ret) { - HMM_Vec2 a = spline_CT(C, u0).xy; - HMM_Vec2 b = spline_CT(C, u1).xy; - if (HMM_DistV2(a,b) > min_seg) { - float umid = (u0+u1)/2; - spline2d_min_seg(u0, umid, min_seg, C, ret); - spline2d_min_seg(umid, u1, min_seg, C, ret); - } - else - arrput(ret, b); - - return ret; + HMM_Vec2 a = spline_CT(C, u0).XY; + HMM_Vec2 b = spline_CT(C, u1).XY; + if (HMM_DistV2(a, b) > min_seg) + { + float umid = 0.5f * (u0 + u1); + spline2d_min_seg(u0, umid, min_seg, C, ret); + spline2d_min_seg(umid, u1, min_seg, C, ret); + } + else + { + // We push 'b' so that we don't double-push 'a' + arrput(ret, b); + } + return ret; } +/* Example: catmull_rom_min_seg -> subdiv for catmull–rom over one segment + You would decide how to pick the 4 points from a,b,c,d, then run. */ HMM_Vec2 *catmull_rom_min_seg(HMM_Vec2 *a, HMM_Vec2 *b, HMM_Vec2 *c, HMM_Vec2 *d, float min_seg) { - HMM_Vec2 *ret = NULL; - arrsetcap(ret, 1000); - arrput(ret, *b); -// spline2d_min_seg(0, 1, min_seg, &C, ret); - return ret; + HMM_Vec2 *ret = NULL; + arrsetcap(ret, 1000); + + // Build the matrix for these four points + HMM_Vec2 p[4] = { *a, *b, *c, *d }; + HMM_Mat4 C = make_C(p, &catmull_rom_m); + + // Always push the starting point (b in your original code was the second ctrl point, etc.) + // But usually we want the first actual point in the segment: + arrput(ret, cubic_spline_d(p, (HMM_Mat4*)&catmull_rom_m, 0.0f)); + + // Actually subdiv + spline2d_min_seg(0.0f, 1.0f, min_seg, &C, ret); + + return ret; } -HMM_Vec2 *spline2d_min_angle_2(float u0, float u1, float max_angle, HMM_Mat4 *C, HMM_Vec2 *arr) +/* ------------------------------------------------------------------------- + Adaptive subdivision by “max angle” proxy + (spline2d_min_angle_2) – for single 4-point segment + ------------------------------------------------------------------------- */ +HMM_Vec2 *spline2d_min_angle_2(float u0, float u1, float max_angle, HMM_Mat4 *C, HMM_Vec2 *arr) { - float ustep = (u1-u0)/4; - float um0 = u0+ustep; - float um1 = u0+(ustep*2); - float um2 = u0+(ustep*3); - - HMM_Vec2 m0 = spline_CT(C, um0)._2; - HMM_Vec2 m1 = spline_CT(C, um1)._2; - HMM_Vec2 m2 = spline_CT(C,um2)._2; + // Heuristic approach: sample midpoints, check “chord vs polyline” difference + float ustep = (u1 - u0) / 4.0f; + float um0 = u0 + ustep; + float um1 = u0 + 2.0f * ustep; + float um2 = u0 + 3.0f * ustep; - HMM_Vec2 a = spline_CT(C,u0)._2; - HMM_Vec2 b = spline_CT(C,u1)._2; - - float ab = HMM_DistV2(a,b); - float cdist = HMM_DistV2(a,m0) + HMM_DistV2(m0,m1) + HMM_DistV2(m1,m2) + HMM_DistV2(m2,b); + HMM_Vec2 m0 = spline_CT(C, um0).XY; + HMM_Vec2 m1 = spline_CT(C, um1).XY; + HMM_Vec2 m2 = spline_CT(C, um2).XY; - if (cdist-ab > max_angle) { - arr = spline2d_min_angle_2(u0,um1,max_angle,C,arr); - arr = spline2d_min_angle_2(um1,u1,max_angle,C,arr); - } else - arrput(arr,b); - - return arr; + HMM_Vec2 a = spline_CT(C, u0).XY; + HMM_Vec2 b = spline_CT(C, u1).XY; + + // Chord = distance from a to b + float chord = HMM_DistV2(a, b); + // Polyline = a->m0->m1->m2->b + float cdist = HMM_DistV2(a, m0) + + HMM_DistV2(m0, m1) + + HMM_DistV2(m1, m2) + + HMM_DistV2(m2, b); + + // If the difference is bigger than some threshold (max_angle), + // subdivide. Otherwise, keep it. + if ((cdist - chord) > max_angle) + { + arr = spline2d_min_angle_2(u0, um1, max_angle, C, arr); + arr = spline2d_min_angle_2(um1, u1, max_angle, C, arr); + } + else + { + // We accept “b” as a new point + arrput(arr, b); + } + return arr; } HMM_Vec2 *spline_min_angle(HMM_Vec2 *p, const HMM_Mat4 *B, float min_angle, HMM_Vec2 *arr) { - HMM_Mat4 C = make_C(p, B); - arr = spline2d_min_angle_2(0,1,min_angle, &C, arr); - return arr; + HMM_Mat4 C = make_C(p, B); + // Subdivide from 0..1 + float u0 = 0.0f, u1 = 1.0f; + // Usually we want to ensure the start point is in arr: + HMM_Vec2 startPt = spline_CT(&C, u0).XY; + if (arrlen(arr) == 0) { + arrput(arr, startPt); + } + // Now subdiv for angle + arr = spline2d_min_angle_2(u0, u1, min_angle, &C, arr); + return arr; } +/* Example: catmull_rom_ma_v2 – uses “min_angle” over multiple segments + Each 4 consecutive points is one segment. We do this for all segments. */ HMM_Vec2 *catmull_rom_ma_v2(HMM_Vec2 *cp, float ma) { - if (arrlen(cp) < 4) return NULL; - HMM_Vec2 *ret = NULL; + int n = arrlen(cp); + if (n < 4) return NULL; - int segments = arrlen(cp)-3; - arrsetcap(ret,segments*(ma>=2 ? 3 : 7)); - arrput(ret, cp[1]); - for (int i = 0; i < arrlen(cp)-3; i++) - ret = spline_min_angle(&cp[i], &catmull_rom_m, ma, ret); + HMM_Vec2 *ret = NULL; + // Pre-allocate some capacity + arrsetcap(ret, (n-3) * 8); - return ret; + // For convenience, let's always ensure we push the very first point: + arrput(ret, cp[0]); + + // For each segment [i, i+1, i+2, i+3], adaptively sample + // Then move i by 1 each time if you want Catmull–Rom in “overlapped” fashion + for (int i = 0; i < n - 3; i++) + { + // p[i..i+3] + ret = spline_min_angle(&cp[i], &catmull_rom_m, ma, ret); + } + return ret; } +/* Example: do the same with Bezier in “cubic-bezier” style (control points in groups of 3 “handles”) */ HMM_Vec2 *bezier_cb_ma_v2(HMM_Vec2 *cp, float ma) { - if (arrlen(cp) < 4) return NULL; - HMM_Vec2 *ret = NULL; - int segments = arrlen(cp)-3; - arrsetcap(ret,segments*(ma>=2?3:7)); - arrput(ret,cp[0]); - for (int i = 0; i < arrlen(cp)-3; i+=3) - ret = spline_min_angle(&cp[i], &bezier_m, ma, ret); + int n = arrlen(cp); + // Typically a Bezier “chain” would use control points in multiples of 3 + 1, etc. + // E.g. p[0] is start, p[1..3] are control handles for first segment, then p[3..6] for second, etc. + // Adjust logic to your liking. + if (n < 4) return NULL; - return ret; + HMM_Vec2 *ret = NULL; + arrsetcap(ret, (n/3) * 8); + + // First point + arrput(ret, cp[0]); + + // For each cubic Bezier segment: i += 3 + for (int i = 0; i < n - 3; i += 3) + { + ret = spline_min_angle(&cp[i], &bezier_m, ma, ret); + } + return ret; } -HMM_Vec2 catmull_rom_query(HMM_Vec2 *cp, float d, const HMM_Mat4 *G) +static HMM_Vec2 catmull_rom_query_internal(HMM_Vec2 *cp, float d, const HMM_Mat4 *M) { - if (arrlen(cp) < 4 || d < 0 || d > 1) return HMM_V2(0,0); + int n = arrlen(cp); + if (n < 4) return HMM_V2(0,0); - int segs = arrlen(cp)-3; - float d_per_seg = (float)1/segs; - float maxi = d_per_seg; - int p1 = 2; - while (maxi < d) { - maxi += d_per_seg; - p1++; - } + // Number of segments: + int seg_count = n - 3; + // Scale d in [0..1] -> which segment? + float segf = d * seg_count; + int seg_idx = (int) floorf(segf); + if (seg_idx >= seg_count) seg_idx = seg_count - 1; + if (seg_idx < 0) seg_idx = 0; - return cp[0]; -// return cubic_spline_d(p0, cp[p1], cp[p1+1], p3, G, d); + // Local parameter in [0..1] + float u = segf - seg_idx; + + // The control points for that segment are cp[ seg_idx .. seg_idx+3 ] + return cubic_spline_d(cp + seg_idx, (HMM_Mat4 *)M, u); } -float spline_seglen(float t0, float t1, float max_angle, HMM_Mat4 *Cd, HMM_Mat4 *C) +HMM_Vec2 catmull_rom_pos(HMM_Vec2 *cp, float d) { - float total = 0; - float step = 0.1; - for (float i = t0; i < t1; i += step) - total += HMM_LenV2(spline_CT(Cd, i).xy) * step; - - return total; - - /* Estimation via max angle */ -/* float total = 0.0; - float tmid = (t0+t1)/2; - HMM_Vec2 a = spline_CT(C, t0).xy; - HMM_Vec2 b = spline_CT(C, t1).xy; - HMM_Vec2 m = spline_CT(C, tmid).xy; - - if (HMM_AngleV2(m,b) > max_angle) { - total += spline_seglen(t0, tmid, max_angle, Cd, C); - total += spline_seglen(tmid, t1, max_angle, Cd, C); - } else - return HMM_LenV2(spline_CT(Cd, t0).xy)*(t1-t0); - - return total; -*/ + return catmull_rom_query_internal(cp, d, &catmull_rom_m); +} +HMM_Vec2 catmull_rom_tan(HMM_Vec2 *cp, float d) +{ + return catmull_rom_query_internal(cp, d, &catmull_rom_dm); +} +HMM_Vec2 catmull_rom_curv(HMM_Vec2 *cp, float d) +{ + return catmull_rom_query_internal(cp, d, &catmull_rom_ddm); +} +HMM_Vec2 catmull_rom_wig(HMM_Vec2 *cp, float d) +{ + return catmull_rom_query_internal(cp, d, &catmull_rom_dddm); } +/* ------------------------------------------------------------------------- + Approximate length of a single 4-point cubic spline segment by + numeric integration (or sampling) of the velocity magnitude. + “spline_seglen” below does a quick sampling approach. + ------------------------------------------------------------------------- */ +float spline_seglen(float t0, float t1, int steps, HMM_Mat4 *Cd, HMM_Mat4 *C) +{ + // Simple uniform sampling of the tangent magnitude + float total = 0.0f; + float dt = (t1 - t0) / (float) steps; + for (int i = 0; i < steps; i++) + { + float t = t0 + (i + 0.5f) * dt; // midpoint rule + // derivative at t + HMM_Vec2 vel = spline_CT(Cd, t).XY; + float speed = HMM_LenV2(vel); + total += speed * dt; + } + return total; +} + +/* Summation of lengths across all Catmull–Rom segments. */ float catmull_rom_len(HMM_Vec2 *cp) { - float len = 0.0; - int segs = arrlen(cp)-3; - float d_per_seg = (float)1/segs; - float maxi = d_per_seg; - for (int i = 0; i < arrlen(cp)-3; i++) { - HMM_Mat4 C = make_C(&cp[i], &catmull_rom_m); - HMM_Mat4 Cd = make_C(&cp[i], &catmull_rom_dm); - len += spline_seglen(0, 1, 0.1, &Cd, &C); - } - return len; -} + int stepsPerSegment = 64; + float len = 0.0f; + int n = arrlen(cp); + if (n < 4) return 0.0f; -/* d is from 0 to 1 for the entire spline */ -HMM_Vec2 catmull_rom_pos(HMM_Vec2 *cp, float d) { return catmull_rom_query(cp,d,&catmull_rom_m); } -HMM_Vec2 catmull_rom_tan(HMM_Vec2 *cp, float d) { return catmull_rom_query(cp,d,&catmull_rom_dm); } -HMM_Vec2 catmull_rom_curv(HMM_Vec2 *cp, float d) { return catmull_rom_query(cp,d,&catmull_rom_ddm); } -HMM_Vec2 catmull_rom_wig(HMM_Vec2 *cp, float d) { return catmull_rom_query(cp,d,&catmull_rom_dddm); } + for (int i = 0; i < n - 3; i++) + { + // Build the position matrix & derivative matrix for this segment + HMM_Mat4 C = make_C(&cp[i], &catmull_rom_m); + HMM_Mat4 Cd = make_C(&cp[i], &catmull_rom_dm); + // integrate from 0..1 + len += spline_seglen(0.0f, 1.0f, stepsPerSegment, &Cd, &C); + } + return len; +} HMM_Vec2 catmull_rom_closest(HMM_Vec2 *cp, HMM_Vec2 p) { - return p; + int n = arrlen(cp); + if (n < 4) return p; + + float bestDist = FLT_MAX; + HMM_Vec2 bestPt = p; + + int steps = 64; // more steps => more accurate + for (int seg = 0; seg < n - 3; seg++) + { + // Build a single-segment matrix + HMM_Vec2 segCP[4] = { cp[seg], cp[seg+1], cp[seg+2], cp[seg+3] }; + HMM_Mat4 C = make_C(segCP, &catmull_rom_m); + for (int i = 0; i <= steps; i++) + { + float t = (float)i / steps; + HMM_Vec2 pt = spline_CT(&C, t).XY; + float dist = HMM_DistV2(p, pt); + if (dist < bestDist) + { + bestDist = dist; + bestPt = pt; + } + } + } + + return bestPt; } diff --git a/source/spline.h b/source/spline.h index 772843c2..4ee0e8c1 100644 --- a/source/spline.h +++ b/source/spline.h @@ -3,21 +3,83 @@ #include "HandmadeMath.h" -HMM_Vec2 *catmull_rom_ma_v2(HMM_Vec2 *cp, float ma); -HMM_Vec3 *catmull_rom_ma_v3(HMM_Vec3 *cp, float ma); -HMM_Vec4 *catmull_rom_ma_v4(HMM_Vec4 *cp, float ma); +#ifdef __cplusplus +extern "C" { +#endif +/* + These were already in your original header: +*/ + +// Adaptive Catmull–Rom in 2D / 3D / 4D (by minimum angle): +HMM_Vec2 *catmull_rom_ma_v2(HMM_Vec2 *cp, float ma); +HMM_Vec3 *catmull_rom_ma_v3(HMM_Vec3 *cp, float ma); /* not yet implemented in .c, placeholder */ +HMM_Vec4 *catmull_rom_ma_v4(HMM_Vec4 *cp, float ma); /* not yet implemented in .c, placeholder */ + +// Adaptive Bezier in 2D (by minimum angle): HMM_Vec2 *bezier_cb_ma_v2(HMM_Vec2 *cp, float ma); + +// Generic “single-segment” query for 2D control points + basis matrix: HMM_Vec2 spline_query(HMM_Vec2 *cp, float d, HMM_Mat4 *basis); -HMM_Vec2 catmull_rom_pos(HMM_Vec2 *cp, float d); -HMM_Vec2 catmull_rom_tan(HMM_Vec2 *cp, float d); -HMM_Vec2 catmull_rom_curv(HMM_Vec2 *cp, float d); -HMM_Vec2 catmull_rom_wig(HMM_Vec2 *cp, float d); +// Catmull–Rom “entire spline” queries: +HMM_Vec2 catmull_rom_pos(HMM_Vec2 *cp, float d); // position +HMM_Vec2 catmull_rom_tan(HMM_Vec2 *cp, float d); // tangent +HMM_Vec2 catmull_rom_curv(HMM_Vec2 *cp, float d); // curvature +HMM_Vec2 catmull_rom_wig(HMM_Vec2 *cp, float d); // 3rd derivative (“wiggle”) +// Computes approximate length of a 2D Catmull–Rom spline: float catmull_rom_len(HMM_Vec2 *cp); -/* Returns closest point on a curve given a point p */ +// Returns closest point on a 2D Catmull–Rom curve given an external 2D point `p`: HMM_Vec2 catmull_rom_closest(HMM_Vec2 *cp, HMM_Vec2 p); + +/* + Additional convenience functions for *single-segment* cubic splines: + + Each of these expects exactly 4 control points in `p[0..3]`, + and a parameter t in [0..1]. They pick the appropriate matrix internally. +*/ + +// Hermite: +HMM_Vec2 cubic_hermite_pos(HMM_Vec2 *p, float d); +HMM_Vec2 cubic_hermite_tan(HMM_Vec2 *p, float d); +HMM_Vec2 cubic_hermite_curv(HMM_Vec2 *p, float d); +HMM_Vec2 cubic_hermite_wig(HMM_Vec2 *p, float d); + +// B-spline: +HMM_Vec2 b_spline_pos(HMM_Vec2 *p, float d); +HMM_Vec2 b_spline_tan(HMM_Vec2 *p, float d); +HMM_Vec2 b_spline_curv(HMM_Vec2 *p, float d); +HMM_Vec2 b_spline_wig(HMM_Vec2 *p, float d); + +// Bezier: +HMM_Vec2 bezier_pos(HMM_Vec2 *p, float d); +HMM_Vec2 bezier_tan(HMM_Vec2 *p, float d); +HMM_Vec2 bezier_curv(HMM_Vec2 *p, float d); +HMM_Vec2 bezier_wig(HMM_Vec2 *p, float d); + + +/* + Uniform sampling of a *single* 4-point segment in 2D: + Returns an array of points (stb_ds dynamic array). +*/ +HMM_Vec2 *spline_v2(HMM_Vec2 *p, HMM_Mat4 *m, int segs); + +/* + Adaptive subdivision routines (single-segment) in 2D: + - Subdivide by min segment length + - Subdivide by “max angle” proxy +*/ +HMM_Vec2 *spline2d_min_seg(float u0, float u1, float min_seg, HMM_Mat4 *C, HMM_Vec2 *ret); +HMM_Vec2 *catmull_rom_min_seg(HMM_Vec2 *a, HMM_Vec2 *b, HMM_Vec2 *c, HMM_Vec2 *d, float min_seg); + +HMM_Vec2 *spline2d_min_angle_2(float u0, float u1, float max_angle, HMM_Mat4 *C, HMM_Vec2 *arr); +HMM_Vec2 *spline_min_angle(HMM_Vec2 *p, const HMM_Mat4 *B, float min_angle, HMM_Vec2 *arr); + +#ifdef __cplusplus +} #endif + +#endif /* SPLINE_H */ diff --git a/source/sprite.c b/source/sprite.c index e873b725..ad3ea069 100644 --- a/source/sprite.c +++ b/source/sprite.c @@ -1,17 +1,18 @@ #include "sprite.h" static sprite model = { - .affine = {x:0,y:0,w:0,h:0}, - .image = JS_UNDEFINED, + .affine = {.x = 0, .y = 0, .w = 0, .h = 0}, .tex = NULL, - .uv = {x:0,y:0,w:1,h:1}, + .uv = {.x = 0, .y = 0, .w = 1, .h = 1}, .layer = 0, - .color = {1,1,1,1} + .color = {1, 1, 1, 1} }; + sprite *make_sprite() { sprite *sprite = malloc(sizeof(*sprite)); *sprite = model; + sprite->image = JS_UNDEFINED; return sprite; } diff --git a/source/thirdparty/stb/stb_c_lexer.h b/source/thirdparty/stb/stb_c_lexer.h index bf89dca3..fd42f1c3 100644 --- a/source/thirdparty/stb/stb_c_lexer.h +++ b/source/thirdparty/stb/stb_c_lexer.h @@ -38,6 +38,7 @@ // Contributors: // Arpad Goretity (bugfix) // Alan Hickman (hex floats) +// github:mundusnine (bugfix) // // LICENSE // @@ -562,7 +563,6 @@ int stb_c_lexer_get_token(stb_lexer *lexer) { int n = 0; lexer->string = lexer->string_storage; - lexer->string_len = n; do { if (n+1 >= lexer->string_storage_len) return stb__clex_token(lexer, CLEX_parse_error, p, p+n); @@ -576,6 +576,7 @@ int stb_c_lexer_get_token(stb_lexer *lexer) STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' ) ); lexer->string[n] = 0; + lexer->string_len = n; return stb__clex_token(lexer, CLEX_id, p, p+n-1); } diff --git a/source/thirdparty/stb/stb_image.h b/source/thirdparty/stb/stb_image.h index a632d543..9eedabed 100644 --- a/source/thirdparty/stb/stb_image.h +++ b/source/thirdparty/stb/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,6 +48,7 @@ LICENSE RECENT REVISION HISTORY: + 2.30 (2024-05-31) avoid erroneous gcc warning 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes @@ -5159,9 +5160,11 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } if (z->depth == 16) { - for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning + tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is } else { - for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + for (k = 0; k < s->img_n && k < 3; ++k) + tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger } } break; diff --git a/source/thirdparty/stb/stb_image_resize2.h b/source/thirdparty/stb/stb_image_resize2.h index 1cd379a7..2f262746 100644 --- a/source/thirdparty/stb/stb_image_resize2.h +++ b/source/thirdparty/stb/stb_image_resize2.h @@ -1,4 +1,4 @@ -/* stb_image_resize2 - v2.06 - public domain image resizing +/* stb_image_resize2 - v2.12 - public domain image resizing by Jeff Roberts (v2) and Jorge L Rodriguez http://github.com/nothings/stb @@ -11,35 +11,6 @@ #define STB_IMAGE_RESIZE_IMPLEMENTATION before the #include. That will create the implementation in that file. - PORTING FROM VERSION 1 - - The API has changed. You can continue to use the old version of stb_image_resize.h, - which is available in the "deprecated/" directory. - - If you're using the old simple-to-use API, porting is straightforward. - (For more advanced APIs, read the documentation.) - - stbir_resize_uint8(): - - call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout` - - stbir_resize_float(): - - call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout` - - stbir_resize_uint8_srgb(): - - function name is unchanged - - cast channel count to `stbir_pixel_layout` - - above is sufficient unless your image has alpha and it's not RGBA/BGRA - - in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode - - stbir_resize_uint8_srgb_edgemode() - - switch to the "medium complexity" API - - stbir_resize(), very similar API but a few more parameters: - - pixel_layout: cast channel count to `stbir_pixel_layout` - - data_type: STBIR_TYPE_UINT8_SRGB - - edge: unchanged (STBIR_EDGE_WRAP, etc.) - - filter: STBIR_FILTER_DEFAULT - - which channel is alpha is specified in stbir_pixel_layout, see enum for details - EASY API CALLS: Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge. @@ -296,6 +267,34 @@ ASSERT Define STBIR_ASSERT(boolval) to override assert() and not use assert.h + PORTING FROM VERSION 1 + The API has changed. You can continue to use the old version of stb_image_resize.h, + which is available in the "deprecated/" directory. + + If you're using the old simple-to-use API, porting is straightforward. + (For more advanced APIs, read the documentation.) + + stbir_resize_uint8(): + - call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout` + + stbir_resize_float(): + - call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout` + + stbir_resize_uint8_srgb(): + - function name is unchanged + - cast channel count to `stbir_pixel_layout` + - above is sufficient unless your image has alpha and it's not RGBA/BGRA + - in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode + + stbir_resize_uint8_srgb_edgemode() + - switch to the "medium complexity" API + - stbir_resize(), very similar API but a few more parameters: + - pixel_layout: cast channel count to `stbir_pixel_layout` + - data_type: STBIR_TYPE_UINT8_SRGB + - edge: unchanged (STBIR_EDGE_WRAP, etc.) + - filter: STBIR_FILTER_DEFAULT + - which channel is alpha is specified in stbir_pixel_layout, see enum for details + FUTURE TODOS * For polyphase integral filters, we just memcpy the coeffs to dupe them, but we should indirect and use the same coeff memory. @@ -320,7 +319,7 @@ CONTRIBUTORS Jeff Roberts: 2.0 implementation, optimizations, SIMD - Martins Mozeiko: NEON simd, WASM simd, clang and GCC whisperer. + Martins Mozeiko: NEON simd, WASM simd, clang and GCC whisperer Fabian Giesen: half float and srgb converters Sean Barrett: API design, optimizations Jorge L Rodriguez: Original 1.0 implementation @@ -328,15 +327,30 @@ Nathan Reed: warning fixes for 1.0 REVISIONS - 2.06 (2024-02-10) fix for indentical width/height 3x or more down-scaling - undersampling a single row on rare resize ratios (about 1%) - 2.05 (2024-02-07) fix for 2 pixel to 1 pixel resizes with wrap (thanks Aras) - fix for output callback (thanks Julien Koenen) + 2.12 (2024-10-18) fix incorrect use of user_data with STBIR_FREE + 2.11 (2024-09-08) fix harmless asan warnings in 2-channel and 3-channel mode + with AVX-2, fix some weird scaling edge conditions with + point sample mode. + 2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control, + fix MSVC 32-bit arm half float routines. + 2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting + hardware half floats). + 2.08 (2024-06-10) fix for RGB->BGR three channel flips and add SIMD (thanks + to Ryan Salsbury), fix for sub-rect resizes, use the + pragmas to control unrolling when they are available. + 2.07 (2024-05-24) fix for slow final split during threaded conversions of very + wide scanlines when downsampling (caused by extra input + converting), fix for wide scanline resamples with many + splits (int overflow), fix GCC warning. + 2.06 (2024-02-10) fix for identical width/height 3x or more down-scaling + undersampling a single row on rare resize ratios (about 1%). + 2.05 (2024-02-07) fix for 2 pixel to 1 pixel resizes with wrap (thanks Aras), + fix for output callback (thanks Julien Koenen). 2.04 (2023-11-17) fix for rare AVX bug, shadowed symbol (thanks Nikola Smiljanic). 2.03 (2023-11-01) ASAN and TSAN warnings fixed, minor tweaks. 2.00 (2023-10-10) mostly new source: new api, optimizations, simd, vertical-first, etc - (2x-5x faster without simd, 4x-12x faster with simd) - (in some cases, 20x to 40x faster - resizing to very small for example) + 2x-5x faster without simd, 4x-12x faster with simd, + in some cases, 20x to 40x faster esp resizing large to very small. 0.96 (2019-03-04) fixed warnings 0.95 (2017-07-23) fixed warnings 0.94 (2017-03-18) fixed warnings @@ -406,13 +420,13 @@ typedef uint64_t stbir_uint64; #endif #endif -#if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(_M_ARM) || (__ARM_NEON_FP & 4) != 0 && __ARM_FP16_FORMAT_IEEE != 0 +#if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__) #ifndef STBIR_NEON #define STBIR_NEON #endif #endif -#if defined(_M_ARM) +#if defined(_M_ARM) || defined(__arm__) #ifdef STBIR_USE_FMA #undef STBIR_USE_FMA // no FMA for 32-bit arm on MSVC #endif @@ -1068,7 +1082,7 @@ struct stbir__info stbir__alpha_unweight_func * alpha_unweight; stbir__encode_pixels_func * encode_pixels; - int alloced_total; + int alloc_ring_buffer_num_entries; // Number of entries in the ring buffer that will be allocated int splits; // count of splits stbir_internal_pixel_layout input_pixel_layout_internal; @@ -1079,7 +1093,7 @@ struct stbir__info int vertical_first; int channels; int effective_channels; // same as channels, except on RGBA/ARGB (7), or XA/AX (3) - int alloc_ring_buffer_num_entries; // Number of entries in the ring buffer that will be allocated + size_t alloced_total; }; @@ -1090,10 +1104,11 @@ struct stbir__info #define stbir__small_float ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20)) // min/max friendly -#define STBIR_CLAMP(x, xmin, xmax) do { \ +#define STBIR_CLAMP(x, xmin, xmax) for(;;) { \ if ( (x) < (xmin) ) (x) = (xmin); \ if ( (x) > (xmax) ) (x) = (xmax); \ -} while (0) + break; \ +} static stbir__inline int stbir__min(int a, int b) { @@ -1190,19 +1205,40 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) #define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split? #endif -// restrict pointers for the output pointers +// restrict pointers for the output pointers, other loop and unroll control #if defined( _MSC_VER ) && !defined(__clang__) #define STBIR_STREAMOUT_PTR( star ) star __restrict #define STBIR_NO_UNROLL( ptr ) __assume(ptr) // this oddly keeps msvc from unrolling a loop -#elif defined( __clang__ ) - #define STBIR_STREAMOUT_PTR( star ) star __restrict__ - #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr)) -#elif defined( __GNUC__ ) + #if _MSC_VER >= 1900 + #define STBIR_NO_UNROLL_LOOP_START __pragma(loop( no_vector )) + #else + #define STBIR_NO_UNROLL_LOOP_START + #endif +#elif defined( __clang__ ) + #define STBIR_STREAMOUT_PTR( star ) star __restrict__ + #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr)) + #if ( __clang_major__ >= 4 ) || ( ( __clang_major__ >= 3 ) && ( __clang_minor__ >= 5 ) ) + #define STBIR_NO_UNROLL_LOOP_START _Pragma("clang loop unroll(disable)") _Pragma("clang loop vectorize(disable)") + #else + #define STBIR_NO_UNROLL_LOOP_START + #endif +#elif defined( __GNUC__ ) #define STBIR_STREAMOUT_PTR( star ) star __restrict__ #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr)) + #if __GNUC__ >= 14 + #define STBIR_NO_UNROLL_LOOP_START _Pragma("GCC unroll 0") _Pragma("GCC novector") + #else + #define STBIR_NO_UNROLL_LOOP_START + #endif + #define STBIR_NO_UNROLL_LOOP_START_INF_FOR #else #define STBIR_STREAMOUT_PTR( star ) star #define STBIR_NO_UNROLL( ptr ) + #define STBIR_NO_UNROLL_LOOP_START +#endif + +#ifndef STBIR_NO_UNROLL_LOOP_START_INF_FOR +#define STBIR_NO_UNROLL_LOOP_START_INF_FOR STBIR_NO_UNROLL_LOOP_START #endif #ifdef STBIR_NO_SIMD // force simd off for whatever reason @@ -1754,11 +1790,19 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) ((stbir_uint64)(4*b+0)<<32) | ((stbir_uint64)(4*b+1)<<40) | ((stbir_uint64)(4*b+2)<<48) | ((stbir_uint64)(4*b+3)<<56)), \ vcreate_u8( (4*c+0) | ((4*c+1)<<8) | ((4*c+2)<<16) | ((4*c+3)<<24) | \ ((stbir_uint64)(4*d+0)<<32) | ((stbir_uint64)(4*d+1)<<40) | ((stbir_uint64)(4*d+2)<<48) | ((stbir_uint64)(4*d+3)<<56) ) ) + + static stbir__inline uint8x16x2_t stbir_make16x2(float32x4_t rega,float32x4_t regb) + { + uint8x16x2_t r = { vreinterpretq_u8_f32(rega), vreinterpretq_u8_f32(regb) }; + return r; + } #else #define stbir_make16(a,b,c,d) (uint8x16_t){4*a+0,4*a+1,4*a+2,4*a+3,4*b+0,4*b+1,4*b+2,4*b+3,4*c+0,4*c+1,4*c+2,4*c+3,4*d+0,4*d+1,4*d+2,4*d+3} + #define stbir_make16x2(a,b) (uint8x16x2_t){{vreinterpretq_u8_f32(a),vreinterpretq_u8_f32(b)}} #endif #define stbir__simdf_swiz( reg, one, two, three, four ) vreinterpretq_f32_u8( vqtbl1q_u8( vreinterpretq_u8_f32(reg), stbir_make16(one, two, three, four) ) ) + #define stbir__simdf_swiz2( rega, regb, one, two, three, four ) vreinterpretq_f32_u8( vqtbl2q_u8( stbir_make16x2(rega,regb), stbir_make16(one, two, three, four) ) ) #define stbir__simdi_16madd( out, reg0, reg1 ) \ { \ @@ -2142,7 +2186,7 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) #endif -#if defined(STBIR_NEON) && !defined(_M_ARM) +#if defined(STBIR_NEON) && !defined(_M_ARM) && !defined(__arm__) #if defined( _MSC_VER ) && !defined(__clang__) typedef __int16 stbir__FP16; @@ -2159,7 +2203,7 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) #endif -#if !defined(STBIR_NEON) && !defined(STBIR_FP16C) || defined(STBIR_NEON) && defined(_M_ARM) +#if (!defined(STBIR_NEON) && !defined(STBIR_FP16C)) || (defined(STBIR_NEON) && defined(_M_ARM)) || (defined(STBIR_NEON) && defined(__arm__)) // Fabian's half float routines, see: https://gist.github.com/rygorous/2156668 @@ -2386,24 +2430,6 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) stbir__simdi_store( output,final ); } -#elif defined(STBIR_WASM) || (defined(STBIR_NEON) && defined(_MSC_VER) && defined(_M_ARM)) // WASM or 32-bit ARM on MSVC/clang - - static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) - { - for (int i=0; i<8; i++) - { - output[i] = stbir__half_to_float(input[i]); - } - } - - static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input) - { - for (int i=0; i<8; i++) - { - output[i] = stbir__float_to_half(input[i]); - } - } - #elif defined(STBIR_NEON) && defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__) // 64-bit ARM on MSVC (not clang) static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) @@ -2432,7 +2458,7 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0).n16_u16[0]; } -#elif defined(STBIR_NEON) // 64-bit ARM +#elif defined(STBIR_NEON) && ( defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) ) // 64-bit ARM static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) { @@ -2458,6 +2484,23 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0); } +#elif defined(STBIR_WASM) || (defined(STBIR_NEON) && (defined(_MSC_VER) || defined(_M_ARM) || defined(__arm__))) // WASM or 32-bit ARM on MSVC/clang + + static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) + { + for (int i=0; i<8; i++) + { + output[i] = stbir__half_to_float(input[i]); + } + } + static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input) + { + for (int i=0; i<8; i++) + { + output[i] = stbir__float_to_half(input[i]); + } + } + #endif @@ -2510,11 +2553,13 @@ static const STBIR__SIMDI_CONST(STBIR_topscale, 0x02000000); // Adding this switch saves about 5K on clang which is Captain Unroll the 3rd. #define STBIR_SIMD_STREAMOUT_PTR( star ) STBIR_STREAMOUT_PTR( star ) #define STBIR_SIMD_NO_UNROLL(ptr) STBIR_NO_UNROLL(ptr) +#define STBIR_SIMD_NO_UNROLL_LOOP_START STBIR_NO_UNROLL_LOOP_START +#define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR STBIR_NO_UNROLL_LOOP_START_INF_FOR #ifdef STBIR_MEMCPY #undef STBIR_MEMCPY -#define STBIR_MEMCPY stbir_simd_memcpy #endif +#define STBIR_MEMCPY stbir_simd_memcpy // override normal use of memcpy with much simpler copy (faster and smaller with our sized copies) static void stbir_simd_memcpy( void * dest, void const * src, size_t bytes ) @@ -2532,6 +2577,7 @@ static void stbir_simd_memcpy( void * dest, void const * src, size_t bytes ) { if ( bytes ) { + STBIR_SIMD_NO_UNROLL_LOOP_START do { STBIR_SIMD_NO_UNROLL(d); @@ -2546,8 +2592,9 @@ static void stbir_simd_memcpy( void * dest, void const * src, size_t bytes ) // do one unaligned to get us aligned for the stream out below stbir__simdf_load( x, ( d + ofs_to_src ) ); stbir__simdf_store( d, x ); - d = (char*)( ( ( (ptrdiff_t)d ) + 16 ) & ~15 ); + d = (char*)( ( ( (size_t)d ) + 16 ) & ~15 ); + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { STBIR_SIMD_NO_UNROLL(d); @@ -2578,8 +2625,9 @@ static void stbir_simd_memcpy( void * dest, void const * src, size_t bytes ) stbir__simdfX_store( d + 4*stbir__simdfX_float_count, x1 ); stbir__simdfX_store( d + 8*stbir__simdfX_float_count, x2 ); stbir__simdfX_store( d + 12*stbir__simdfX_float_count, x3 ); - d = (char*)( ( ( (ptrdiff_t)d ) + (16*stbir__simdfX_float_count) ) & ~((16*stbir__simdfX_float_count)-1) ); + d = (char*)( ( ( (size_t)d ) + (16*stbir__simdfX_float_count) ) & ~((16*stbir__simdfX_float_count)-1) ); + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { STBIR_SIMD_NO_UNROLL(d); @@ -2616,6 +2664,7 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte if ( ofs_to_dest >= 16 ) // is the overlap more than 16 away? { char STBIR_SIMD_STREAMOUT_PTR( * ) s_end16 = ((char*) src) + (bytes&~15); + STBIR_SIMD_NO_UNROLL_LOOP_START do { stbir__simdf x; @@ -2642,12 +2691,16 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte // when in scalar mode, we let unrolling happen, so this macro just does the __restrict #define STBIR_SIMD_STREAMOUT_PTR( star ) STBIR_STREAMOUT_PTR( star ) #define STBIR_SIMD_NO_UNROLL(ptr) +#define STBIR_SIMD_NO_UNROLL_LOOP_START +#define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR #endif // SSE2 #ifdef STBIR_PROFILE +#ifndef STBIR_PROFILE_FUNC + #if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ ) #ifdef _MSC_VER @@ -2687,8 +2740,9 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte #error Unknown platform for profiling. -#endif //x64 and +#endif // x64, arm +#endif // STBIR_PROFILE_FUNC #define STBIR_ONLY_PROFILE_GET_SPLIT_INFO ,stbir__per_split_info * split_info #define STBIR_ONLY_PROFILE_SET_SPLIT_INFO ,split_info @@ -2753,7 +2807,7 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte #ifndef STBIR_SIMD -// memcpy that is specically intentionally overlapping (src is smaller then dest, so can be +// memcpy that is specifically intentionally overlapping (src is smaller then dest, so can be // a normal forward copy, bytes is divisible by 4 and bytes is greater than or equal to // the diff between dest and src) static void stbir_overlapping_memcpy( void * dest, void const * src, size_t bytes ) @@ -2765,6 +2819,7 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte if ( ofs_to_dest >= 8 ) // is the overlap more than 8 away? { char STBIR_SIMD_STREAMOUT_PTR( * ) s_end8 = ((char*) src) + (bytes&~7); + STBIR_NO_UNROLL_LOOP_START do { STBIR_NO_UNROLL(sd); @@ -2776,6 +2831,7 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte return; } + STBIR_NO_UNROLL_LOOP_START do { STBIR_NO_UNROLL(sd); @@ -2880,13 +2936,6 @@ static float stbir__filter_mitchell(float x, float s, void * user_data) return (0.0f); } -static float stbir__support_zero(float s, void * user_data) -{ - STBIR__UNUSED(s); - STBIR__UNUSED(user_data); - return 0; -} - static float stbir__support_zeropoint5(float s, void * user_data) { STBIR__UNUSED(s); @@ -3201,6 +3250,7 @@ static void stbir__calculate_in_pixel_range( int * first_pixel, int * last_pixel first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f)); last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f)); + if ( last < first ) last = first; // point sample mode can span a value *right* at 0.5, and cause these to cross if ( edge == STBIR_EDGE_WRAP ) { @@ -3236,6 +3286,11 @@ static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_ stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge ); + // make sure we never generate a range larger than our precalculated coeff width + // this only happens in point sample mode, but it's a good safe thing to do anyway + if ( ( in_last_pixel - in_first_pixel + 1 ) > coefficient_width ) + in_last_pixel = in_first_pixel + coefficient_width - 1; + last_non_zero = -1; for (i = 0; i <= in_last_pixel - in_first_pixel; i++) { @@ -3271,19 +3326,22 @@ static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_ } } -static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff ) +static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff, int max_width ) { if ( new_pixel <= contribs->n1 ) // before the end { if ( new_pixel < contribs->n0 ) // before the front? { - int j, o = contribs->n0 - new_pixel; - for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- ) - coeffs[ j + o ] = coeffs[ j ]; - for ( j = 1 ; j < o ; j-- ) - coeffs[ j ] = coeffs[ 0 ]; - coeffs[ 0 ] = new_coeff; - contribs->n0 = new_pixel; + if ( ( contribs->n1 - new_pixel + 1 ) <= max_width ) + { + int j, o = contribs->n0 - new_pixel; + for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- ) + coeffs[ j + o ] = coeffs[ j ]; + for ( j = 1 ; j < o ; j-- ) + coeffs[ j ] = coeffs[ 0 ]; + coeffs[ 0 ] = new_coeff; + contribs->n0 = new_pixel; + } } else { @@ -3292,12 +3350,15 @@ static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, } else { - int j, e = new_pixel - contribs->n0; - for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any - coeffs[j] = 0; + if ( ( new_pixel - contribs->n0 + 1 ) <= max_width ) + { + int j, e = new_pixel - contribs->n0; + for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any + coeffs[j] = 0; - coeffs[ e ] = new_coeff; - contribs->n1 = new_pixel; + coeffs[ e ] = new_coeff; + contribs->n1 = new_pixel; + } } } @@ -3476,6 +3537,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter coeffs = coefficient_group; contribs = contributors; + for (n = 0; n < num_contributors; n++) { int i; @@ -3515,7 +3577,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter int endi = contribs->n1; contribs->n1 = input_last_n1; for( i = input_size; i <= endi; i++ ) - stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start] ); + stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start], coefficient_width ); } // now check left hand edge @@ -3527,7 +3589,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter // reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist) for( i = -1 ; i > contribs->n0 ; i-- ) - stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c-- ); + stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c--, coefficient_width ); save_n0 = contribs->n0; save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)! @@ -3537,7 +3599,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter coeffs[i] = coeffs[i-save_n0]; // now that we have shrunk down the contribs, we insert the first one safely - stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff ); + stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff, coefficient_width ); } } @@ -3546,6 +3608,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter int diff = contribs->n1 - contribs->n0 + 1; while ( diff && ( coeffs[ diff-1 ] == 0.0f ) ) --diff; + contribs->n1 = contribs->n0 + diff - 1; if ( contribs->n0 <= contribs->n1 ) @@ -3594,6 +3657,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* switch( widest ) { case 1: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_1( pc, coeffs ); ++pc; @@ -3601,6 +3665,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 2: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_2( pc, coeffs ); pc += 2; @@ -3608,6 +3673,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 3: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_2( pc, coeffs ); STBIR_MOVE_1( pc+2, coeffs+2 ); @@ -3616,6 +3682,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 4: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); pc += 4; @@ -3623,6 +3690,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 5: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_1( pc+4, coeffs+4 ); @@ -3631,6 +3699,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 6: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_2( pc+4, coeffs+4 ); @@ -3639,6 +3708,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 7: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_2( pc+4, coeffs+4 ); @@ -3648,6 +3718,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 8: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_4( pc+4, coeffs+4 ); @@ -3656,6 +3727,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 9: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_4( pc+4, coeffs+4 ); @@ -3665,6 +3737,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 10: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_4( pc+4, coeffs+4 ); @@ -3674,6 +3747,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 11: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_4( pc+4, coeffs+4 ); @@ -3684,6 +3758,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; case 12: + STBIR_NO_UNROLL_LOOP_START do { STBIR_MOVE_4( pc, coeffs ); STBIR_MOVE_4( pc+4, coeffs+4 ); @@ -3693,6 +3768,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* } while ( pc < pc_end ); break; default: + STBIR_NO_UNROLL_LOOP_START do { float * copy_end = pc + widest - 4; float * c = coeffs; @@ -3703,6 +3779,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* c += 4; } while ( pc <= copy_end ); copy_end += 4; + STBIR_NO_UNROLL_LOOP_START while ( pc < copy_end ) { STBIR_MOVE_1( pc, c ); @@ -3904,7 +3981,7 @@ static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * ot } else { - stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc ); + stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc, scatter_coefficient_width ); } STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width ); } @@ -4013,6 +4090,7 @@ static void stbir__fancy_alpha_weight_4ch( float * out_buffer, int width_times_c #ifdef STBIR_SIMD8 decode += 16; + STBIR_NO_UNROLL_LOOP_START while ( decode <= end_decode ) { stbir__simdf8 d0,d1,a0,a1,p0,p1; @@ -4037,6 +4115,7 @@ static void stbir__fancy_alpha_weight_4ch( float * out_buffer, int width_times_c decode -= 16; #else decode += 8; + STBIR_NO_UNROLL_LOOP_START while ( decode <= end_decode ) { stbir__simdf d0,a0,d1,a1,p0,p1; @@ -4059,12 +4138,14 @@ static void stbir__fancy_alpha_weight_4ch( float * out_buffer, int width_times_c // might be one last odd pixel #ifdef STBIR_SIMD8 + STBIR_NO_UNROLL_LOOP_START while ( decode < end_decode ) #else if ( decode < end_decode ) #endif { stbir__simdf d,a,p; + STBIR_NO_UNROLL(decode); stbir__simdf_load( d, decode ); stbir__simdf_0123to3333( a, d ); stbir__simdf_mult( p, a, d ); @@ -4106,6 +4187,7 @@ static void stbir__fancy_alpha_weight_2ch( float * out_buffer, int width_times_c decode += 8; if ( decode <= end_decode ) { + STBIR_NO_UNROLL_LOOP_START do { #ifdef STBIR_SIMD8 stbir__simdf8 d0,a0,p0; @@ -4149,6 +4231,7 @@ static void stbir__fancy_alpha_weight_2ch( float * out_buffer, int width_times_c decode -= 8; #endif + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode < end_decode ) { float x = decode[0], y = decode[1]; @@ -4169,6 +4252,7 @@ static void stbir__fancy_alpha_unweight_4ch( float * encode_buffer, int width_ti // fancy RGBA is stored internally as R G B A Rpm Gpm Bpm + STBIR_SIMD_NO_UNROLL_LOOP_START do { float alpha = input[3]; #ifdef STBIR_SIMD @@ -4236,6 +4320,7 @@ static void stbir__simple_alpha_weight_4ch( float * decode_buffer, int width_tim #ifdef STBIR_SIMD { decode += 2 * stbir__simdfX_float_count; + STBIR_NO_UNROLL_LOOP_START while ( decode <= end_decode ) { stbir__simdfX d0,a0,d1,a1; @@ -4254,6 +4339,7 @@ static void stbir__simple_alpha_weight_4ch( float * decode_buffer, int width_tim // few last pixels remnants #ifdef STBIR_SIMD8 + STBIR_NO_UNROLL_LOOP_START while ( decode < end_decode ) #else if ( decode < end_decode ) @@ -4289,6 +4375,7 @@ static void stbir__simple_alpha_weight_2ch( float * decode_buffer, int width_tim #ifdef STBIR_SIMD decode += 2 * stbir__simdfX_float_count; + STBIR_NO_UNROLL_LOOP_START while ( decode <= end_decode ) { stbir__simdfX d0,a0,d1,a1; @@ -4306,6 +4393,7 @@ static void stbir__simple_alpha_weight_2ch( float * decode_buffer, int width_tim decode -= 2 * stbir__simdfX_float_count; #endif + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode < end_decode ) { float alpha = decode[1]; @@ -4320,6 +4408,7 @@ static void stbir__simple_alpha_unweight_4ch( float * encode_buffer, int width_t float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer; float const * end_output = encode_buffer + width_times_channels; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float alpha = encode[3]; @@ -4367,9 +4456,77 @@ static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_chann float STBIR_STREAMOUT_PTR(*) decode = decode_buffer; float const * end_decode = decode_buffer + width_times_channels; - decode += 12; +#ifdef STBIR_SIMD + #ifdef stbir__simdf_swiz2 // do we have two argument swizzles? + end_decode -= 12; + STBIR_NO_UNROLL_LOOP_START + while( decode <= end_decode ) + { + // on arm64 8 instructions, no overlapping stores + stbir__simdf a,b,c,na,nb; + STBIR_SIMD_NO_UNROLL(decode); + stbir__simdf_load( a, decode ); + stbir__simdf_load( b, decode+4 ); + stbir__simdf_load( c, decode+8 ); + + na = stbir__simdf_swiz2( a, b, 2, 1, 0, 5 ); + b = stbir__simdf_swiz2( a, b, 4, 3, 6, 7 ); + nb = stbir__simdf_swiz2( b, c, 0, 1, 4, 3 ); + c = stbir__simdf_swiz2( b, c, 2, 7, 6, 5 ); + + stbir__simdf_store( decode, na ); + stbir__simdf_store( decode+4, nb ); + stbir__simdf_store( decode+8, c ); + decode += 12; + } + end_decode += 12; + #else + end_decode -= 24; + STBIR_NO_UNROLL_LOOP_START + while( decode <= end_decode ) + { + // 26 instructions on x64 + stbir__simdf a,b,c,d,e,f,g; + float i21, i23; + STBIR_SIMD_NO_UNROLL(decode); + stbir__simdf_load( a, decode ); + stbir__simdf_load( b, decode+3 ); + stbir__simdf_load( c, decode+6 ); + stbir__simdf_load( d, decode+9 ); + stbir__simdf_load( e, decode+12 ); + stbir__simdf_load( f, decode+15 ); + stbir__simdf_load( g, decode+18 ); + + a = stbir__simdf_swiz( a, 2, 1, 0, 3 ); + b = stbir__simdf_swiz( b, 2, 1, 0, 3 ); + c = stbir__simdf_swiz( c, 2, 1, 0, 3 ); + d = stbir__simdf_swiz( d, 2, 1, 0, 3 ); + e = stbir__simdf_swiz( e, 2, 1, 0, 3 ); + f = stbir__simdf_swiz( f, 2, 1, 0, 3 ); + g = stbir__simdf_swiz( g, 2, 1, 0, 3 ); + + // stores overlap, need to be in order, + stbir__simdf_store( decode, a ); + i21 = decode[21]; + stbir__simdf_store( decode+3, b ); + i23 = decode[23]; + stbir__simdf_store( decode+6, c ); + stbir__simdf_store( decode+9, d ); + stbir__simdf_store( decode+12, e ); + stbir__simdf_store( decode+15, f ); + stbir__simdf_store( decode+18, g ); + decode[21] = i23; + decode[23] = i21; + decode += 24; + } + end_decode += 24; + #endif +#else + end_decode -= 12; + STBIR_NO_UNROLL_LOOP_START while( decode <= end_decode ) { + // 16 instructions float t0,t1,t2,t3; STBIR_NO_UNROLL(decode); t0 = decode[0]; t1 = decode[3]; t2 = decode[6]; t3 = decode[9]; @@ -4377,8 +4534,10 @@ static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_chann decode[2] = t0; decode[5] = t1; decode[8] = t2; decode[11] = t3; decode += 12; } - decode -= 12; + end_decode += 12; +#endif + STBIR_NO_UNROLL_LOOP_START while( decode < end_decode ) { float t = decode[0]; @@ -4399,7 +4558,7 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float stbir_edge edge_horizontal = stbir_info->horizontal.edge; stbir_edge edge_vertical = stbir_info->vertical.edge; int row = stbir__edge_wrap(edge_vertical, n, stbir_info->vertical.scale_info.input_full_size); - const void* input_plane_data = ( (char *) stbir_info->input_data ) + (ptrdiff_t)row * (ptrdiff_t) stbir_info->input_stride_bytes; + const void* input_plane_data = ( (char *) stbir_info->input_data ) + (size_t)row * (size_t) stbir_info->input_stride_bytes; stbir__span const * spans = stbir_info->scanline_extents.spans; float* full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels; @@ -4668,12 +4827,13 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); #define stbir__1_coeff_remnant( ofs ) \ - { stbir__simdf t; \ + { stbir__simdf t,d; \ stbir__simdf_load1z( t, hc + (ofs) ); \ + stbir__simdf_load2( d, decode + (ofs) * 2 ); \ stbir__simdf_0123to0011( t, t ); \ - stbir__simdf_mult_mem( t, t, decode+(ofs)*2 ); \ + stbir__simdf_mult( t, t, d ); \ stbir__simdf8_add4( tot0, tot0, t ); } - + #define stbir__2_coeff_remnant( ofs ) \ { stbir__simdf t; \ stbir__simdf_load2( t, hc + (ofs) ); \ @@ -6052,7 +6212,7 @@ static void stbir__resample_vertical_gather(stbir__info const * stbir_info, stbi stbir__resample_horizontal_gather(stbir_info, encode_buffer, decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); } - stbir__encode_scanline( stbir_info, ( (char *) stbir_info->output_data ) + ((ptrdiff_t)n * (ptrdiff_t)stbir_info->output_stride_bytes), + stbir__encode_scanline( stbir_info, ( (char *) stbir_info->output_data ) + ((size_t)n * (size_t)stbir_info->output_stride_bytes), encode_buffer, n STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); } @@ -6093,7 +6253,7 @@ static void stbir__vertical_gather_loop( stbir__info const * stbir_info, stbir__ // initialize the ring buffer for gathering split_info->ring_buffer_begin_index = 0; - split_info->ring_buffer_first_scanline = stbir_info->vertical.extent_info.lowest; + split_info->ring_buffer_first_scanline = vertical_contributors->n0; split_info->ring_buffer_last_scanline = split_info->ring_buffer_first_scanline - 1; // means "empty" for (y = start_output_y; y < end_output_y; y++) @@ -6147,7 +6307,7 @@ static void stbir__encode_first_scanline_from_scatter(stbir__info const * stbir_ float* ring_buffer_entry = stbir__get_ring_buffer_entry(stbir_info, split_info, split_info->ring_buffer_begin_index ); // dump the scanline out - stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (ptrdiff_t)split_info->ring_buffer_first_scanline * (ptrdiff_t)stbir_info->output_stride_bytes ), ring_buffer_entry, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (size_t)split_info->ring_buffer_first_scanline * (size_t)stbir_info->output_stride_bytes ), ring_buffer_entry, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); // mark it as empty ring_buffer_entry[ 0 ] = STBIR__FLOAT_EMPTY_MARKER; @@ -6168,7 +6328,7 @@ static void stbir__horizontal_resample_and_encode_first_scanline_from_scatter(st stbir__resample_horizontal_gather( stbir_info, split_info->vertical_buffer, ring_buffer_entry STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); // dump the scanline out - stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (ptrdiff_t)split_info->ring_buffer_first_scanline * (ptrdiff_t)stbir_info->output_stride_bytes ), split_info->vertical_buffer, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (size_t)split_info->ring_buffer_first_scanline * (size_t)stbir_info->output_stride_bytes ), split_info->vertical_buffer, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); // mark it as empty ring_buffer_entry[ 0 ] = STBIR__FLOAT_EMPTY_MARKER; @@ -6572,7 +6732,7 @@ static void stbir__free_internal_mem( stbir__info *info ) STBIR__FREE_AND_CLEAR( info->horizontal.coefficients ); STBIR__FREE_AND_CLEAR( info->horizontal.contributors ); STBIR__FREE_AND_CLEAR( info->alloced_mem ); - STBIR__FREE_AND_CLEAR( info ); + STBIR_FREE( info, info->user_data ); #endif } @@ -6765,7 +6925,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample stbir__info * info = 0; void * alloced = 0; - int alloced_total = 0; + size_t alloced_total = 0; int vertical_first; int decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size, alloc_ring_buffer_num_entries; @@ -6970,6 +7130,11 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample #ifdef STBIR__SEPARATE_ALLOCATIONS temp_mem_amt = decode_buffer_size; + + #ifdef STBIR_SIMD8 + if ( effective_channels == 3 ) + --temp_mem_amt; // avx in 3 channel mode needs one float at the start of the buffer + #endif #else temp_mem_amt = ( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * splits; #endif @@ -7067,36 +7232,33 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample info->ring_buffer_num_entries = conservative_split_output_size; STBIR_ASSERT( info->ring_buffer_num_entries <= info->alloc_ring_buffer_num_entries ); - // a few of the horizontal gather functions read one dword past the end (but mask it out), so put in a normal value so no snans or denormals accidentally sneak in + // a few of the horizontal gather functions read past the end of the decode (but mask it out), + // so put in normal values so no snans or denormals accidentally sneak in (also, in the ring + // buffer for vertical first) for( i = 0 ; i < splits ; i++ ) { - int width, ofs; + int t, ofs, start; - // find the right most span - if ( info->scanline_extents.spans[0].n1 > info->scanline_extents.spans[1].n1 ) - width = info->scanline_extents.spans[0].n1 - info->scanline_extents.spans[0].n0; - else - width = info->scanline_extents.spans[1].n1 - info->scanline_extents.spans[1].n0; + ofs = decode_buffer_size / 4; - // this calc finds the exact end of the decoded scanline for all filter modes. - // usually this is just the width * effective channels. But we have to account - // for the area to the left of the scanline for wrap filtering and alignment, this - // is stored as a negative value in info->scanline_extents.conservative.n0. Next, - // we need to skip the exact size of the right hand size filter area (again for - // wrap mode), this is in info->scanline_extents.edge_sizes[1]). - ofs = ( width + 1 - info->scanline_extents.conservative.n0 + info->scanline_extents.edge_sizes[1] ) * effective_channels; + #if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8) + if ( effective_channels == 3 ) + --ofs; // avx in 3 channel mode needs one float at the start of the buffer, so we snap back for clearing + #endif - // place a known, but numerically valid value in the decode buffer - info->split_info[i].decode_buffer[ ofs ] = 9999.0f; + start = ofs - 4; + if ( start < 0 ) start = 0; + + for( t = start ; t < ofs; t++ ) + info->split_info[i].decode_buffer[ t ] = 9999.0f; - // if vertical filtering first, place a known, but numerically valid value in the all - // of the ring buffer accumulators if ( vertical_first ) { int j; for( j = 0; j < info->ring_buffer_num_entries ; j++ ) { - stbir__get_ring_buffer_entry( info, info->split_info + i, j )[ ofs ] = 9999.0f; + for( t = start ; t < ofs; t++ ) + stbir__get_ring_buffer_entry( info, info->split_info + i, j )[ t ] = 9999.0f; } } } @@ -7108,7 +7270,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample // is this the first time through loop? if ( info == 0 ) { - alloced_total = (int) ( 15 + (size_t)advance_mem ); + alloced_total = ( 15 + (size_t)advance_mem ); alloced = STBIR_MALLOC( alloced_total, user_data ); if ( alloced == 0 ) return 0; @@ -7225,7 +7387,7 @@ static void stbir__update_info_from_resize( stbir__info * info, STBIR_RESIZE * r info->output_stride_bytes = info->channels * info->horizontal.scale_info.output_sub_size * stbir__type_size[output_type]; // calc offset - info->output_data = ( (char*) resize->output_pixels ) + ( (ptrdiff_t) info->offset_y * (ptrdiff_t) resize->output_stride_in_bytes ) + ( info->offset_x * info->channels * stbir__type_size[output_type] ); + info->output_data = ( (char*) resize->output_pixels ) + ( (size_t) info->offset_y * (size_t) resize->output_stride_in_bytes ) + ( info->offset_x * info->channels * stbir__type_size[output_type] ); info->in_pixels_cb = resize->input_cb; info->user_data = resize->user_data; @@ -7797,7 +7959,7 @@ static int stbir__check_output_stuff( void ** ret_ptr, int * ret_pitch, void * o if ( output_stride_in_bytes < pitch ) return 0; - size = output_stride_in_bytes * output_h; + size = (size_t)output_stride_in_bytes * (size_t)output_h; if ( size == 0 ) return 0; @@ -8075,6 +8237,7 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco if ( width_times_channels >= 16 ) { decode_end -= 16; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { #ifdef STBIR_SIMD8 @@ -8130,6 +8293,7 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode <= decode_end ) { STBIR_SIMD_NO_UNROLL(decode); @@ -8145,6 +8309,7 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -8171,6 +8336,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outpu { float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; end_output -= stbir__simdfX_float_count*2; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdfX e0, e1; @@ -8202,6 +8368,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outpu // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_NO_UNROLL_LOOP_START while( output <= end_output ) { stbir__simdf e0; @@ -8220,6 +8387,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outpu // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { stbir__simdf e0; @@ -8256,6 +8424,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outpu // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { float f; @@ -8285,6 +8454,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int if ( width_times_channels >= 16 ) { decode_end -= 16; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { #ifdef STBIR_SIMD8 @@ -8334,6 +8504,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode <= decode_end ) { STBIR_SIMD_NO_UNROLL(decode); @@ -8349,6 +8520,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -8375,6 +8547,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int { float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; end_output -= stbir__simdfX_float_count*2; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdfX e0, e1; @@ -8406,6 +8579,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_NO_UNROLL_LOOP_START while( output <= end_output ) { stbir__simdf e0; @@ -8444,6 +8618,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { float f; @@ -8484,6 +8659,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int wi // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -8569,12 +8745,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int w unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels; #ifdef STBIR_SIMD - stbir_uint32 const * to_srgb = fp32_to_srgb8_tab4 - (127-13)*8; if ( width_times_channels >= 16 ) { float const * end_encode_m16 = encode + width_times_channels - 16; end_output -= 16; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdf f0, f1, f2, f3; @@ -8588,7 +8764,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int w stbir__min_max_shift20( i2, f2 ); stbir__min_max_shift20( i3, f3 ); - stbir__simdi_table_lookup4( i0, i1, i2, i3, to_srgb ); + stbir__simdi_table_lookup4( i0, i1, i2, i3, ( fp32_to_srgb8_tab4 - (127-13)*8 ) ); stbir__linear_to_srgb_finish( i0, f0 ); stbir__linear_to_srgb_finish( i1, f1 ); @@ -8613,6 +8789,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int w // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while ( output <= end_output ) { STBIR_SIMD_NO_UNROLL(encode); @@ -8630,6 +8807,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int w // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { STBIR_NO_UNROLL(encode); @@ -8670,12 +8848,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * o unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels; #ifdef STBIR_SIMD - stbir_uint32 const * to_srgb = fp32_to_srgb8_tab4 - (127-13)*8; if ( width_times_channels >= 16 ) { float const * end_encode_m16 = encode + width_times_channels - 16; end_output -= 16; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdf f0, f1, f2, f3; @@ -8689,7 +8867,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * o stbir__min_max_shift20( i2, f2 ); stbir__scale_and_convert( i3, f3 ); - stbir__simdi_table_lookup3( i0, i1, i2, to_srgb ); + stbir__simdi_table_lookup3( i0, i1, i2, ( fp32_to_srgb8_tab4 - (127-13)*8 ) ); stbir__linear_to_srgb_finish( i0, f0 ); stbir__linear_to_srgb_finish( i1, f1 ); @@ -8711,6 +8889,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * o } #endif + STBIR_SIMD_NO_UNROLL_LOOP_START do { float f; STBIR_SIMD_NO_UNROLL(encode); @@ -8761,12 +8940,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * o unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels; #ifdef STBIR_SIMD - stbir_uint32 const * to_srgb = fp32_to_srgb8_tab4 - (127-13)*8; if ( width_times_channels >= 16 ) { float const * end_encode_m16 = encode + width_times_channels - 16; end_output -= 16; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdf f0, f1, f2, f3; @@ -8780,7 +8959,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * o stbir__min_max_shift20( i2, f2 ); stbir__scale_and_convert( i3, f3 ); - stbir__simdi_table_lookup2( i0, i2, to_srgb ); + stbir__simdi_table_lookup2( i0, i2, ( fp32_to_srgb8_tab4 - (127-13)*8 ) ); stbir__linear_to_srgb_finish( i0, f0 ); stbir__linear_to_srgb_finish( i2, f2 ); @@ -8800,6 +8979,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * o } #endif + STBIR_SIMD_NO_UNROLL_LOOP_START do { float f; STBIR_SIMD_NO_UNROLL(encode); @@ -8828,6 +9008,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod if ( width_times_channels >= 8 ) { decode_end -= 8; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { #ifdef STBIR_SIMD8 @@ -8871,6 +9052,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode <= decode_end ) { STBIR_SIMD_NO_UNROLL(decode); @@ -8886,6 +9068,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -8914,6 +9097,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output { float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; end_output -= stbir__simdfX_float_count*2; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdfX e0, e1; @@ -8941,6 +9125,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_NO_UNROLL_LOOP_START while( output <= end_output ) { stbir__simdf e; @@ -8959,6 +9144,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { stbir__simdf e; @@ -8980,6 +9166,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( output <= end_output ) { float f; @@ -8996,6 +9183,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { float f; @@ -9025,6 +9213,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int if ( width_times_channels >= 8 ) { decode_end -= 8; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { #ifdef STBIR_SIMD8 @@ -9065,6 +9254,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode <= decode_end ) { STBIR_SIMD_NO_UNROLL(decode); @@ -9080,6 +9270,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -9107,6 +9298,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int { float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; end_output -= stbir__simdfX_float_count*2; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdfX e0, e1; @@ -9134,6 +9326,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_NO_UNROLL_LOOP_START while( output <= end_output ) { stbir__simdf e; @@ -9155,6 +9348,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( output <= end_output ) { float f; @@ -9173,6 +9367,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { float f; @@ -9201,6 +9396,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, { stbir__FP16 const * end_input_m8 = input + width_times_channels - 8; decode_end -= 8; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { STBIR_NO_UNROLL(decode); @@ -9242,6 +9438,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode <= decode_end ) { STBIR_SIMD_NO_UNROLL(decode); @@ -9257,6 +9454,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -9283,6 +9481,7 @@ static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp { float const * end_encode_m8 = encode + width_times_channels - 8; end_output -= 8; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { STBIR_SIMD_NO_UNROLL(encode); @@ -9323,6 +9522,7 @@ static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( output <= end_output ) { STBIR_SIMD_NO_UNROLL(output); @@ -9338,6 +9538,7 @@ static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { STBIR_NO_UNROLL(output); @@ -9366,6 +9567,7 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int { float const * end_input_m16 = input + width_times_channels - 16; decode_end -= 16; + STBIR_NO_UNROLL_LOOP_START_INF_FOR for(;;) { STBIR_NO_UNROLL(decode); @@ -9414,6 +9616,7 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( decode <= decode_end ) { STBIR_SIMD_NO_UNROLL(decode); @@ -9429,6 +9632,7 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( decode < decode_end ) { STBIR_NO_UNROLL(decode); @@ -9488,6 +9692,7 @@ static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int { float const * end_encode_m8 = encode + width_times_channels - ( stbir__simdfX_float_count * 2 ); end_output -= ( stbir__simdfX_float_count * 2 ); + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR for(;;) { stbir__simdfX e0, e1; @@ -9521,6 +9726,7 @@ static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_NO_UNROLL_LOOP_START while( output <= end_output ) { stbir__simdf e0; @@ -9545,6 +9751,7 @@ static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int // try to do blocks of 4 when you can #if stbir__coder_min_num != 3 // doesn't divide cleanly by four output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START while( output <= end_output ) { float e; @@ -9564,6 +9771,7 @@ static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int // do the remnants #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START while( output < end_output ) { float e; @@ -9674,6 +9882,7 @@ static void STBIR_chans( stbir__vertical_scatter_with_,_coeffs)( float ** output stbIF5(stbir__simdfX c5 = stbir__simdf_frepX( c5s ); ) stbIF6(stbir__simdfX c6 = stbir__simdf_frepX( c6s ); ) stbIF7(stbir__simdfX c7 = stbir__simdf_frepX( c7s ); ) + STBIR_SIMD_NO_UNROLL_LOOP_START while ( ( (char*)input_end - (char*) input ) >= (16*stbir__simdfX_float_count) ) { stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3; @@ -9728,6 +9937,7 @@ static void STBIR_chans( stbir__vertical_scatter_with_,_coeffs)( float ** output input += (4*stbir__simdfX_float_count); stbIF0( output0 += (4*stbir__simdfX_float_count); ) stbIF1( output1 += (4*stbir__simdfX_float_count); ) stbIF2( output2 += (4*stbir__simdfX_float_count); ) stbIF3( output3 += (4*stbir__simdfX_float_count); ) stbIF4( output4 += (4*stbir__simdfX_float_count); ) stbIF5( output5 += (4*stbir__simdfX_float_count); ) stbIF6( output6 += (4*stbir__simdfX_float_count); ) stbIF7( output7 += (4*stbir__simdfX_float_count); ) } + STBIR_SIMD_NO_UNROLL_LOOP_START while ( ( (char*)input_end - (char*) input ) >= 16 ) { stbir__simdf o0, r0; @@ -9760,6 +9970,7 @@ static void STBIR_chans( stbir__vertical_scatter_with_,_coeffs)( float ** output } } #else + STBIR_NO_UNROLL_LOOP_START while ( ( (char*)input_end - (char*) input ) >= 16 ) { float r0, r1, r2, r3; @@ -9791,6 +10002,7 @@ static void STBIR_chans( stbir__vertical_scatter_with_,_coeffs)( float ** output stbIF0( output0 += 4; ) stbIF1( output1 += 4; ) stbIF2( output2 += 4; ) stbIF3( output3 += 4; ) stbIF4( output4 += 4; ) stbIF5( output5 += 4; ) stbIF6( output6 += 4; ) stbIF7( output7 += 4; ) } #endif + STBIR_NO_UNROLL_LOOP_START while ( input < input_end ) { float r = input[0]; @@ -9854,6 +10066,7 @@ static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp, stbIF6(stbir__simdfX c6 = stbir__simdf_frepX( c6s ); ) stbIF7(stbir__simdfX c7 = stbir__simdf_frepX( c7s ); ) + STBIR_SIMD_NO_UNROLL_LOOP_START while ( ( (char*)input0_end - (char*) input0 ) >= (16*stbir__simdfX_float_count) ) { stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3; @@ -9898,6 +10111,7 @@ static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp, stbIF0( input0 += (4*stbir__simdfX_float_count); ) stbIF1( input1 += (4*stbir__simdfX_float_count); ) stbIF2( input2 += (4*stbir__simdfX_float_count); ) stbIF3( input3 += (4*stbir__simdfX_float_count); ) stbIF4( input4 += (4*stbir__simdfX_float_count); ) stbIF5( input5 += (4*stbir__simdfX_float_count); ) stbIF6( input6 += (4*stbir__simdfX_float_count); ) stbIF7( input7 += (4*stbir__simdfX_float_count); ) } + STBIR_SIMD_NO_UNROLL_LOOP_START while ( ( (char*)input0_end - (char*) input0 ) >= 16 ) { stbir__simdf o0, r0; @@ -9922,6 +10136,7 @@ static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp, } } #else + STBIR_NO_UNROLL_LOOP_START while ( ( (char*)input0_end - (char*) input0 ) >= 16 ) { float o0, o1, o2, o3; @@ -9943,6 +10158,7 @@ static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp, stbIF0( input0 += 4; ) stbIF1( input1 += 4; ) stbIF2( input2 += 4; ) stbIF3( input3 += 4; ) stbIF4( input4 += 4; ) stbIF5( input5 += 4; ) stbIF6( input6 += 4; ) stbIF7( input7 += 4; ) } #endif + STBIR_NO_UNROLL_LOOP_START while ( input0 < input0_end ) { float o0; @@ -10035,6 +10251,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_1_coeff)( floa { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10047,6 +10264,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_2_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10059,6 +10277,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_3_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10071,6 +10290,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_4_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10083,6 +10303,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_5_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10096,6 +10317,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_6_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10110,6 +10332,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_7_coeffs)( flo float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; stbir__3_coeff_setup(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10124,6 +10347,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_8_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10137,6 +10361,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_9_coeffs)( flo { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10151,6 +10376,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_10_coeffs)( fl { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10166,6 +10392,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_11_coeffs)( fl float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; stbir__3_coeff_setup(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10180,6 +10407,7 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_12_coeffs)( fl { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; float const * hc = horizontal_coefficients; @@ -10194,12 +10422,14 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod0 { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 4 + 3 ) >> 2; float const * hc = horizontal_coefficients; stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { hc += 4; decode += STBIR__horizontal_channels * 4; @@ -10214,12 +10444,14 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod1 { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 5 + 3 ) >> 2; float const * hc = horizontal_coefficients; stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { hc += 4; decode += STBIR__horizontal_channels * 4; @@ -10235,12 +10467,14 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod2 { float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 6 + 3 ) >> 2; float const * hc = horizontal_coefficients; stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { hc += 4; decode += STBIR__horizontal_channels * 4; @@ -10258,12 +10492,14 @@ static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod3 float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; stbir__3_coeff_setup(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 7 + 3 ) >> 2; float const * hc = horizontal_coefficients; stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START do { hc += 4; decode += STBIR__horizontal_channels * 4; diff --git a/source/thirdparty/stb/stb_truetype.h b/source/thirdparty/stb/stb_truetype.h index bbf2284b..90a5c2e2 100644 --- a/source/thirdparty/stb/stb_truetype.h +++ b/source/thirdparty/stb/stb_truetype.h @@ -54,7 +54,7 @@ // Hou Qiming Derek Vinyard // Rob Loach Cort Stratton // Kenney Phillis Jr. Brian Costabile -// Ken Voskuil (kaesve) +// Ken Voskuil (kaesve) Yakov Galka // // VERSION HISTORY // @@ -4604,6 +4604,8 @@ STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float sc scale_y = -scale_y; { + // distance from singular values (in the same units as the pixel grid) + const float eps = 1./1024, eps2 = eps*eps; int x,y,i,j; float *precompute; stbtt_vertex *verts; @@ -4616,15 +4618,15 @@ STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float sc float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y; float x1 = verts[j].x*scale_x, y1 = verts[j].y*scale_y; float dist = (float) STBTT_sqrt((x1-x0)*(x1-x0) + (y1-y0)*(y1-y0)); - precompute[i] = (dist == 0) ? 0.0f : 1.0f / dist; + precompute[i] = (dist < eps) ? 0.0f : 1.0f / dist; } else if (verts[i].type == STBTT_vcurve) { float x2 = verts[j].x *scale_x, y2 = verts[j].y *scale_y; float x1 = verts[i].cx*scale_x, y1 = verts[i].cy*scale_y; float x0 = verts[i].x *scale_x, y0 = verts[i].y *scale_y; float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2; float len2 = bx*bx + by*by; - if (len2 != 0.0f) - precompute[i] = 1.0f / (bx*bx + by*by); + if (len2 >= eps2) + precompute[i] = 1.0f / len2; else precompute[i] = 0.0f; } else @@ -4689,8 +4691,8 @@ STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float sc float a = 3*(ax*bx + ay*by); float b = 2*(ax*ax + ay*ay) + (mx*bx+my*by); float c = mx*ax+my*ay; - if (a == 0.0) { // if a is 0, it's linear - if (b != 0.0) { + if (STBTT_fabs(a) < eps2) { // if a is 0, it's linear + if (STBTT_fabs(b) >= eps2) { res[num++] = -c/b; } } else { diff --git a/source/transform.c b/source/transform.c index 67f3f423..8302b093 100644 --- a/source/transform.c +++ b/source/transform.c @@ -13,14 +13,14 @@ static transform model = { .cache = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}, .gcache = {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}, .dirty = 0, - .jsparent = JS_UNDEFINED, - .change_hook = JS_UNDEFINED }; transform *make_transform() { transform *t = malloc(sizeof(transform)); *t = model; + t->jsparent = JS_UNDEFINED; + t->change_hook = JS_UNDEFINED; return t; }