From cb9d699576c8b7201d0d660939496014c879838e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 1 May 2021 14:32:16 +0100 Subject: [PATCH] Use invW as the Z axis (abandon standards compliance for speed) --- GL/flush.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-- include/GL/glkos.h | 1 + 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/GL/flush.c b/GL/flush.c index 23d5c1f..6040804 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -6,6 +6,14 @@ static PolyList OP_LIST; static PolyList PT_LIST; static PolyList TR_LIST; +/** Don't fully comply to the GL standard to make some performance + * gains. Specifically glDepthRange will be ignored, and the final + * Z coordinate will be invW and not between 0 and 1. + * + * Defaults to TRUE set to FALSE if you experience issues. + **/ + +#define FAST_MODE GL_TRUE PolyList* _glActivePolyList() { if(_glIsBlendingEnabled()) { @@ -94,7 +102,7 @@ GL_FORCE_INLINE bool glIsVertex(const float flags) { } -GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) { +GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) { TRACE(); /* Perform perspective divide on each vertex */ @@ -117,7 +125,7 @@ GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) { VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight ); - /* Apply depth range */ + /* FIXME: Apply depth range */ vertex->xyz[2] = MAX( 1.0f - MATH_fmac(vertex->xyz[2] * f, 0.5f, 0.5f), PVR_MIN_Z @@ -128,6 +136,43 @@ GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) { } } +GL_FORCE_INLINE void glPerspectiveDivideFastMode(void* src, uint32_t n) { + TRACE(); + + /* Perform perspective divide on each vertex */ + Vertex* vertex = (Vertex*) src; + + const float h = GetVideoMode()->height; + + while(n--) { + __asm__("pref @%0" : : "r"(vertex + 1)); + + if(likely(glIsVertex(vertex->flags))) { + const float f = MATH_Fast_Invert(vertex->w); + + /* Convert to NDC and apply viewport */ + vertex->xyz[0] = MATH_fmac( + VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth + ); + + vertex->xyz[1] = h - MATH_fmac( + VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight + ); + + vertex->xyz[2] = f; + } + + ++vertex; + } +} + +GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) { +#if FAST_MODE + glPerspectiveDivideFastMode(src, n); +#else + glPerspectiveDivideStandard(src, n); +#endif +} void APIENTRY glKosSwapBuffers() { TRACE(); diff --git a/include/GL/glkos.h b/include/GL/glkos.h index 184ecaf..a46277e 100644 --- a/include/GL/glkos.h +++ b/include/GL/glkos.h @@ -56,6 +56,7 @@ typedef struct { GLuint initial_tr_capacity; GLuint initial_pt_capacity; GLuint initial_immediate_capacity; + } GLdcConfig;