From 6be3e6185de95143de6640d150c803cf59ced18a Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 14 Sep 2022 19:20:24 +0100 Subject: [PATCH] Faster inversion --- GL/platforms/sh4.c | 9 +++++++-- GL/texture.c | 10 ++++++---- samples/quadmark/main.c | 5 +++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 7974f61..6fb3093 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -47,8 +47,13 @@ void SceneListBegin(GPUList list) { pvr_list_begin(list); } +GL_FORCE_INLINE float _glFastInvert(float x) { + const float sgn = (x > 0) - (x < 0); + return sgn * MATH_fsrra(x * x); +} + GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { - const float f = MATH_Fast_Invert(vertex->w); + const float f = _glFastInvert(vertex->w); /* Convert to NDC and apply viewport */ vertex->xyz[0] = __builtin_fmaf( @@ -65,7 +70,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { avoid a divide by zero. */ if(unlikely(vertex->w == 1.0f)) { - vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]); + vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]); } else { vertex->xyz[2] = f; } diff --git a/GL/texture.c b/GL/texture.c index 2c93377..2adaf94 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -161,9 +161,11 @@ static void GPUTextureTwiddle8PPP(void* src, void* dst, uint32_t w, uint32_t h) for(y = 0; y < h; y += 2) { yout = y; for(x = 0; x < w; x++) { - vtex[TWIDOUT((yout & mask) / 2, x & mask) + - (x / min + yout / min)*min * min / 2] = - pixels[y * w + x] | (pixels[(y + 1) * w + x] << 8); + int32_t idx = TWIDOUT((yout & mask) / 2, x & mask) + + (x / min + yout / min)*min * min / 2; + + gl_assert(idx < (w * h)); + vtex[idx] = pixels[y * w + x] | (pixels[(y + 1) * w + x] << 8); } } } @@ -1719,7 +1721,7 @@ GLAPI void APIENTRY glColorTableEXT(GLenum target, GLenum internalFormat, GLsize sharedPaletteUsed = GL_TRUE; } - for (GLbyte i = 1; i < MAX_GLDC_SHARED_PALETTES; ++i) { + for (GLubyte i = 1; i < MAX_GLDC_SHARED_PALETTES; ++i) { if (target == GL_SHARED_TEXTURE_PALETTE_0_KOS + i) { palette = SHARED_PALETTES[i]; sharedPaletteUsed = GL_TRUE; diff --git a/samples/quadmark/main.c b/samples/quadmark/main.c index 26ea433..4f83bde 100644 --- a/samples/quadmark/main.c +++ b/samples/quadmark/main.c @@ -68,6 +68,7 @@ int check_start() { } void setup() { + //PVR needs to warm up for a frame, or results will be low glKosInit(); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); @@ -76,6 +77,10 @@ void setup() { glLoadIdentity(); glDisable(GL_NEARZ_CLIPPING_KOS); + + pvr_wait_ready(); + pvr_scene_begin(); + pvr_scene_finish(); } void do_frame() {