From 3160fc517d7baea5cc34346cc6dead8f4027bf4d Mon Sep 17 00:00:00 2001 From: T_chan Date: Wed, 2 Feb 2022 09:22:23 +0000 Subject: [PATCH 01/20] declare glFogiv() --- include/GL/gl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/GL/gl.h b/include/GL/gl.h index 1c19df6..975bc1e 100644 --- a/include/GL/gl.h +++ b/include/GL/gl.h @@ -643,6 +643,7 @@ GLAPI void APIENTRY glFrustum(GLfloat left, GLfloat right, /* Fog Functions - client must enable GL_FOG for this to take effect */ GLAPI void APIENTRY glFogi(GLenum pname, GLint param); GLAPI void APIENTRY glFogf(GLenum pname, GLfloat param); +GLAPI void APIENTRY glFogiv(GLenum pname, const GLint* params); GLAPI void APIENTRY glFogfv(GLenum pname, const GLfloat *params); /* Lighting Functions - client must enable GL_LIGHTING for this to take effect */ From 69a2310a3c6ad36553fad56c939fe8c7a9b58d7a Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 3 Dec 2022 13:45:53 +0000 Subject: [PATCH 02/20] Default to small culling to avoid artifacts --- GL/state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/state.c b/GL/state.c index 20f28c5..a302157 100644 --- a/GL/state.c +++ b/GL/state.c @@ -49,7 +49,7 @@ void _glApplyScissor(bool force); static int _calc_pvr_face_culling() { if(!CULLING_ENABLED) { - return GPU_CULLING_NONE; + return GPU_CULLING_SMALL; } else { if(CULL_FACE == GL_BACK) { return (FRONT_FACE == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW; From d78c6275bb5843b9ce9f0385218247e6f72d856b Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 3 Dec 2022 14:00:24 +0000 Subject: [PATCH 03/20] Increase tri-strip length --- GL/draw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GL/draw.c b/GL/draw.c index 654feb4..1d205a1 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1034,6 +1034,9 @@ GL_FORCE_INLINE void push(PolyHeader* header, GLboolean multiTextureHeader, Poly CompilePolyHeader(header, &cxt); + /* Force bits 18 and 19 on to switch to 6 triangle strips */ + header->cmd |= 0xC0000; + /* Post-process the vertex list */ /* * This is currently unnecessary. aligned_vector memsets the allocated objects From ac8650454942a6202e52e08fae90662149b94f2a Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 3 Dec 2022 20:01:43 +0000 Subject: [PATCH 04/20] Clean up --- GL/draw.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 1d205a1..9b0bfd0 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -999,7 +999,7 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) { } } -GL_FORCE_INLINE void push(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) { +GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) { TRACE(); // Compile the header @@ -1128,6 +1128,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL /* Make room for the vertices and header */ aligned_vector_extend(&target->output->vector, target->count + 1); + apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); + /* If we're lighting, then we need to do some work in * eye-space, so we only transform vertices by the modelview * matrix, and then later multiply by projection. @@ -1158,8 +1160,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL transform(target); } - push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); - /* Now, if multitexturing is enabled, we want to send exactly the same vertices again, except: - We want to enable blending, and send them to the TR list @@ -1191,6 +1191,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL gl_assert(vertex); PolyHeader* mtHeader = (PolyHeader*) vertex++; + /* Send the buffer again to the transparent list */ + apply_poly_header(mtHeader, GL_TRUE, _glTransparentPolyList(), 1); /* Replace the UV coordinates with the ST ones */ VertexExtra* ve = aligned_vector_at(target->extras, 0); @@ -1200,9 +1202,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL ++vertex; ++ve; } - - /* Send the buffer again to the transparent list */ - push(mtHeader, GL_TRUE, _glTransparentPolyList(), 1); } void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { From 8fff6ee1aa3ea9b0ed5c3d656e53856fc3fdc244 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 9 Dec 2022 20:17:46 +0000 Subject: [PATCH 05/20] Treat GL_CLAMP_TO_EDGE the same as GL_CLAMP --- GL/texture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GL/texture.c b/GL/texture.c index 2adaf94..cd4ce97 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -1593,6 +1593,7 @@ void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) { break; case GL_TEXTURE_WRAP_S: switch(param) { + case GL_CLAMP_TO_EDGE: case GL_CLAMP: active->uv_clamp |= CLAMP_U; break; @@ -1606,6 +1607,7 @@ void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) { case GL_TEXTURE_WRAP_T: switch(param) { + case GL_CLAMP_TO_EDGE: case GL_CLAMP: active->uv_clamp |= CLAMP_V; break; From 8481fd05ccbe52b5d61825c51bf1c0645be9069c Mon Sep 17 00:00:00 2001 From: Colton Pawielski Date: Sat, 4 Mar 2023 16:10:34 -0600 Subject: [PATCH 06/20] Add dcbuild & pcbuild folders to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index aeef46b..fdf6f9c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ dc-build.sh build/* builddir/* version.[c|h] +pcbuild/* +dcbuild/* \ No newline at end of file From ac6914398c1492243308b902fa6237dbc78857a2 Mon Sep 17 00:00:00 2001 From: Colton Pawielski Date: Sat, 4 Mar 2023 16:11:11 -0600 Subject: [PATCH 07/20] Fix .elf extension on C example executables --- toolchains/Dreamcast.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchains/Dreamcast.cmake b/toolchains/Dreamcast.cmake index fa5110c..a35b73c 100644 --- a/toolchains/Dreamcast.cmake +++ b/toolchains/Dreamcast.cmake @@ -49,7 +49,7 @@ ENDIF() add_link_options(-L$ENV{KOS_BASE}/lib/dreamcast) link_libraries(-Wl,--start-group -lstdc++ -lkallisti -lc -lgcc -Wl,--end-group m) -SET(CMAKE_EXECUTABLE_SUFFIX ".elf") +SET(CMAKE_EXECUTABLE_SUFFIX_C ".elf") SET(CMAKE_EXECUTABLE_SUFFIX_CXX ".elf") ADD_DEFINITIONS( From 5bb2691b91c7458b971863ade27444b70e8a5745 Mon Sep 17 00:00:00 2001 From: Colton Pawielski Date: Sat, 4 Mar 2023 16:13:44 -0600 Subject: [PATCH 08/20] Change CMakeLists.txt to add -mfsrra and -mfsca only if supported by compiler GCC4.7 does not support these flags and cause a failure to build --- CMakeLists.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d54ba2..a9d9982 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,16 @@ if(NOT PLATFORM_DREAMCAST) set(FIND_LIBRARY_USE_LIB32_PATHS true) set(FIND_LIBRARY_USE_LIB64_PATHS false) else() - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra -ffp-contract=fast -mfsca -ffast-math") + include(CheckCCompilerFlag) + check_c_compiler_flag("-mfsrra" COMPILER_HAS_FSRRA) + check_c_compiler_flag("-mfsca" COMPILER_HAS_FSCA) + if(COMPILER_HAS_FSRRA) + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra") + endif() + if(COMPILER_HAS_FSCA) + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsca") + endif() + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -ffp-contract=fast -ffast-math") endif() set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations") From e27c276daa300ecc3c8d86774b8fc0d51d6f1bca Mon Sep 17 00:00:00 2001 From: falco Date: Thu, 16 Feb 2023 17:09:15 -0500 Subject: [PATCH 09/20] Fixed poly/tri/quadmarks to build with GCC12 "start" is a reserved symbol within KOS, per crt1.s. Not sure why the old toolchains let us use this symbol name elsewhere, but the new GCC12 toolchain rightfully complains about duplicate symbols. - Renamed "start" timestamp to "begin." --- samples/polymark/main.c | 8 ++++---- samples/quadmark/main.c | 8 ++++---- samples/trimark/main.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/polymark/main.c b/samples/polymark/main.c index 60d396b..2a58c45 100644 --- a/samples/polymark/main.c +++ b/samples/polymark/main.c @@ -100,7 +100,7 @@ void do_frame() { glKosSwapBuffers(); } -time_t start; +time_t begin; void switch_tests(int ppf) { printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", ppf * 3, ppf * 3 * 60); @@ -113,8 +113,8 @@ void check_switch() { now = time(NULL); - if(now >= (start + 5)) { - start = time(NULL); + if(now >= (begin + 5)) { + begin = time(NULL); printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2)); switch(phase) { @@ -165,7 +165,7 @@ int main(int argc, char **argv) { /* Start off with something obscene */ switch_tests(200000 / 60); - start = time(NULL); + begin = time(NULL); for(;;) { if(check_start()) diff --git a/samples/quadmark/main.c b/samples/quadmark/main.c index 847c9f6..4da3046 100644 --- a/samples/quadmark/main.c +++ b/samples/quadmark/main.c @@ -112,7 +112,7 @@ void do_frame() { glKosSwapBuffers(); } -time_t start; +time_t begin; void switch_tests(int ppf) { printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", ppf * 2, ppf * 2 * 60); @@ -125,8 +125,8 @@ void check_switch() { now = time(NULL); - if(now >= (start + 5)) { - start = time(NULL); + if(now >= (begin + 5)) { + begin = time(NULL); printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2)); switch(phase) { @@ -184,7 +184,7 @@ int main(int argc, char **argv) { /* Start off with something obscene */ switch_tests(200000 / 60); - start = time(NULL); + begin = time(NULL); uint32_t iterations = 2000; diff --git a/samples/trimark/main.c b/samples/trimark/main.c index 3bd2004..2b542cb 100644 --- a/samples/trimark/main.c +++ b/samples/trimark/main.c @@ -93,7 +93,7 @@ void do_frame() { glKosSwapBuffers(); } -time_t start; +time_t begin; void switch_tests(int ppf) { printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", ppf * 2, ppf * 2 * 60); @@ -106,8 +106,8 @@ void check_switch() { now = time(NULL); - if(now >= (start + 5)) { - start = time(NULL); + if(now >= (begin + 5)) { + begin = time(NULL); printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2)); switch(phase) { @@ -155,7 +155,7 @@ int main(int argc, char **argv) { /* Start off with something obscene */ switch_tests(220000 / 60); - start = time(NULL); + begin = time(NULL); for(;;) { if(check_start()) From b08bbebf1263342113a55ca920d419911a6944f9 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 5 Mar 2023 20:26:12 +0000 Subject: [PATCH 10/20] Mipmap fix --- GL/texture.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index cd4ce97..ce2414e 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -1215,12 +1215,15 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) { GLuint size = active->baseDataSize; /* Copy the data out of the pvr and back to ram */ - GLubyte* temp = (GLubyte*) malloc(size); - memcpy(temp, active->data, size); + GLubyte* temp = NULL; + if(active->data) { + temp = (GLubyte*) malloc(size); + memcpy(temp, active->data, size); - /* Free the PVR data */ - yalloc_free(YALLOC_BASE, active->data); - active->data = NULL; + /* Free the PVR data */ + yalloc_free(YALLOC_BASE, active->data); + active->data = NULL; + } /* Figure out how much room to allocate for mipmaps */ GLuint bytes = _glGetMipmapDataSize(active); @@ -1228,17 +1231,15 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) { active->data = yalloc_alloc_and_defrag(bytes); gl_assert(active->data); - if(!active->data) { + + if(temp) { + /* If there was existing data, then copy it where it should go */ + memcpy(_glGetMipmapLocation(active, 0), temp, size); + + /* We no longer need this */ free(temp); - return; } - /* If there was existing data, then copy it where it should go */ - memcpy(_glGetMipmapLocation(active, 0), temp, size); - - /* We no longer need this */ - free(temp); - /* Set the data offset depending on whether or not this is a * paletted texure */ active->baseDataOffset = _glGetMipmapDataOffset(active, 0); From f0a3465486b1508a874885ef6906dbd11ed6be06 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 5 Mar 2023 21:16:12 +0000 Subject: [PATCH 11/20] Use Moops memcpy --- GL/platforms/sh4.h | 33 +++++++++++++++++++++++++++++++-- GL/types.h | 2 +- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 87f0f91..6bc89c1 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -26,6 +26,34 @@ #define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr))) +GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) { + if(!len) { + return dest; + } + + const uint8_t *s = (uint8_t *)src; + uint8_t *d = (uint8_t *)dest; + + uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated + // Underflow would be like adding a negative offset + + // Can use 'd' as a scratch reg now + asm volatile ( + "clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn + ".align 2\n" + "0:\n\t" + "dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1) + "mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2) + "bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2) + " mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1) + : [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs + : [offset] "z" (diff) // inputs + : "t", "memory" // clobbers + ); + + return dest; +} + /* We use sq_cpy if the src and size is properly aligned. We control that the * destination is properly aligned so we assert that. */ #define FASTCPY(dst, src, bytes) \ @@ -34,11 +62,12 @@ gl_assert(((uintptr_t) dst) % 32 == 0); \ sq_cpy(dst, src, bytes); \ } else { \ - memcpy(dst, src, bytes); \ + memcpy_fast(dst, src, bytes); \ } \ } while(0) -#define MEMCPY4(dst, src, bytes) memcpy4(dst, src, bytes) + +#define MEMCPY4(dst, src, bytes) memcpy_fast(dst, src, bytes) #define MEMSET4(dst, v, size) memset4((dst), (v), (size)) diff --git a/GL/types.h b/GL/types.h index 99758e2..85df8ba 100644 --- a/GL/types.h +++ b/GL/types.h @@ -13,4 +13,4 @@ typedef struct { * but we're not using that for now, so having W here makes the code * simpler */ float w; -} Vertex; +} __attribute__ ((aligned (32))) Vertex; From a9f3e3a74464475abc03d11b85b1a43aed7063ea Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 6 Mar 2023 13:44:17 +0000 Subject: [PATCH 12/20] Fix alignments --- GL/platforms/sh4.c | 4 +++- GL/private.h | 2 +- containers/aligned_vector.h | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 33a5f8f..78e1772 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -100,7 +100,7 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { d += 8; } -static struct { +static struct __attribute__((aligned(32))) { Vertex* v; int visible; } triangle[3]; @@ -182,6 +182,8 @@ GL_FORCE_INLINE void ShiftRotateTriangle() { #define SPAN_SORT_CFG 0x005F8030 void SceneListSubmit(void* src, int n) { + Vertex __attribute__((aligned(32))) tmp; + /* Do everything, everywhere, all at once */ PVR_SET(SPAN_SORT_CFG, 0x0); diff --git a/GL/private.h b/GL/private.h index b61f29f..9fa45cf 100644 --- a/GL/private.h +++ b/GL/private.h @@ -251,7 +251,7 @@ typedef struct { * when a realloc could invalidate pointers. This structure holds all the information * we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.) */ -typedef struct { +typedef struct __attribute__((aligned(32))) { PolyList* output; uint32_t header_offset; // The offset of the header in the output list uint32_t start_offset; // The offset into the output list diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index cf89ffe..12f3ae3 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -25,10 +25,10 @@ static inline void* memalign(size_t alignment, size_t size) { #endif typedef struct { - unsigned int size; - unsigned int capacity; - unsigned char* data; - unsigned int element_size; + uint8_t* __attribute__((aligned(32))) data; + uint32_t size; + uint32_t capacity; + uint32_t element_size; } AlignedVector; #define ALIGNED_VECTOR_CHUNK_SIZE 256u From be4c1bc14c0128df791e313ebac6391effb64cd4 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 6 Mar 2023 13:54:31 +0000 Subject: [PATCH 13/20] Add missing header --- containers/aligned_vector.h | 1 + 1 file changed, 1 insertion(+) diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index 12f3ae3..c9d1b53 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -3,6 +3,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { From c195d471e14b38deffad78946f7a1cc58e2b58e2 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 6 Mar 2023 14:05:14 +0000 Subject: [PATCH 14/20] Faster memcpy --- containers/aligned_vector.h | 48 ++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index c9d1b53..3400fe1 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -18,9 +18,48 @@ static inline void* memalign(size_t alignment, size_t size) { #include #endif +#ifdef __cplusplus +#define AV_FORCE_INLINE static inline +#else +#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function)) +#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline)) +#define AV_FORCE_INLINE static AV_INLINE_DEBUG +#endif + + #ifdef __DREAMCAST__ #include -#define AV_MEMCPY4 memcpy4 + +AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len) +{ + if(!len) + { + return dest; + } + + const uint8_t *s = (uint8_t *)src; + uint8_t *d = (uint8_t *)dest; + + uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated + // Underflow would be like adding a negative offset + + // Can use 'd' as a scratch reg now + asm volatile ( + "clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn + ".align 2\n" + "0:\n\t" + "dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1) + "mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2) + "bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2) + " mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1) + : [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs + : [offset] "z" (diff) // inputs + : "t", "memory" // clobbers + ); + + return dest; +} + #else #define AV_MEMCPY4 memcpy #endif @@ -34,13 +73,6 @@ typedef struct { #define ALIGNED_VECTOR_CHUNK_SIZE 256u -#ifdef __cplusplus -#define AV_FORCE_INLINE static inline -#else -#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function)) -#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline)) -#define AV_FORCE_INLINE static AV_INLINE_DEBUG -#endif #define ROUND_TO_CHUNK_SIZE(v) \ ((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE) From 8beb295c6bb997f70738519e848548b9db17c847 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Tue, 7 Mar 2023 10:19:09 +0000 Subject: [PATCH 15/20] Refactor state management --- GL/draw.c | 12 +- GL/lighting.c | 332 ++++++++++++++++++++++------------------- GL/platforms/sh4.c | 363 ++++++++++++++++++++++----------------------- GL/private.h | 43 +++--- GL/state.c | 337 ++++++++++++++++++++++++++++------------- 5 files changed, 615 insertions(+), 472 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 9b0bfd0..be58241 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -13,9 +13,12 @@ GLuint ENABLED_VERTEX_ATTRIBUTES = 0; GLuint FAST_PATH_ENABLED = GL_FALSE; static GLubyte ACTIVE_CLIENT_TEXTURE = 0; +static const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; extern inline GLuint _glRecalcFastPath(); +extern GLboolean AUTOSORT_ENABLED; + #define ITERATE(count) \ GLuint i = count; \ while(i--) @@ -116,8 +119,6 @@ static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) { static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) { - const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - float* output = (float*) out; output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; @@ -138,8 +139,6 @@ static void _readVertexData2f3f(const GLubyte* in, GLubyte* out) { } static void _readVertexData2ub3f(const GLubyte* input, GLubyte* out) { - const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - float* output = (float*) out; output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; @@ -173,7 +172,6 @@ static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) { } static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) { - const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; float* output = (float*) out; output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; @@ -1137,7 +1135,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL * If we're not doing lighting though we can optimise by taking * vertices straight to clip-space */ - if(LIGHTING_ENABLED) { + if(_glIsLightingEnabled()) { _glMatrixLoadModelView(); } else { _glMatrixLoadModelViewProjection(); @@ -1152,7 +1150,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL transform(target); } - if(LIGHTING_ENABLED){ + if(_glIsLightingEnabled()){ light(target); /* OK eye-space work done, now move into clip space */ diff --git a/GL/lighting.c b/GL/lighting.c index 8218228..c63defd 100644 --- a/GL/lighting.c +++ b/GL/lighting.c @@ -12,126 +12,107 @@ * multiplier ends up less than this value */ #define ATTENUATION_THRESHOLD 100.0f -static GLfloat SCENE_AMBIENT [] = {0.2f, 0.2f, 0.2f, 1.0f}; -static GLboolean VIEWER_IN_EYE_COORDINATES = GL_TRUE; -static GLenum COLOR_CONTROL = GL_SINGLE_COLOR; -static GLenum COLOR_MATERIAL_MODE = GL_AMBIENT_AND_DIFFUSE; - -#define AMBIENT_MASK 1 -#define DIFFUSE_MASK 2 -#define EMISSION_MASK 4 -#define SPECULAR_MASK 8 -#define SCENE_AMBIENT_MASK 16 - -static GLenum COLOR_MATERIAL_MASK = AMBIENT_MASK | DIFFUSE_MASK; - -static LightSource LIGHTS[MAX_GLDC_LIGHTS]; -static GLuint ENABLED_LIGHT_COUNT = 0; -static Material MATERIAL; - -GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask); - -static void recalcEnabledLights() { - GLubyte i; - - ENABLED_LIGHT_COUNT = 0; - for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { - if(LIGHTS[i].isEnabled) { - ENABLED_LIGHT_COUNT++; - } - } -} - -void _glInitLights() { - static GLfloat ONE [] = {1.0f, 1.0f, 1.0f, 1.0f}; - static GLfloat ZERO [] = {0.0f, 0.0f, 0.0f, 1.0f}; - static GLfloat PARTIAL [] = {0.2f, 0.2f, 0.2f, 1.0f}; - static GLfloat MOSTLY [] = {0.8f, 0.8f, 0.8f, 1.0f}; - - memcpy(MATERIAL.ambient, PARTIAL, sizeof(GLfloat) * 4); - memcpy(MATERIAL.diffuse, MOSTLY, sizeof(GLfloat) * 4); - memcpy(MATERIAL.specular, ZERO, sizeof(GLfloat) * 4); - memcpy(MATERIAL.emissive, ZERO, sizeof(GLfloat) * 4); - MATERIAL.exponent = 0.0f; - - GLubyte i; - for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { - memcpy(LIGHTS[i].ambient, ZERO, sizeof(GLfloat) * 4); - memcpy(LIGHTS[i].diffuse, ONE, sizeof(GLfloat) * 4); - memcpy(LIGHTS[i].specular, ONE, sizeof(GLfloat) * 4); - - if(i > 0) { - memcpy(LIGHTS[i].diffuse, ZERO, sizeof(GLfloat) * 4); - memcpy(LIGHTS[i].specular, ZERO, sizeof(GLfloat) * 4); - } - - LIGHTS[i].position[0] = LIGHTS[i].position[1] = LIGHTS[i].position[3] = 0.0f; - LIGHTS[i].position[2] = 1.0f; - LIGHTS[i].isDirectional = GL_TRUE; - LIGHTS[i].isEnabled = GL_FALSE; - - LIGHTS[i].spot_direction[0] = LIGHTS[i].spot_direction[1] = 0.0f; - LIGHTS[i].spot_direction[2] = -1.0f; - - LIGHTS[i].spot_exponent = 0.0f; - LIGHTS[i].spot_cutoff = 180.0f; - - LIGHTS[i].constant_attenuation = 1.0f; - LIGHTS[i].linear_attenuation = 0.0f; - LIGHTS[i].quadratic_attenuation = 0.0f; - } - - _glPrecalcLightingValues(~0); - recalcEnabledLights(); -} - -void _glEnableLight(GLubyte light, GLboolean value) { - LIGHTS[light].isEnabled = value; - recalcEnabledLights(); -} - -GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask) { +void _glPrecalcLightingValues(GLuint mask) { /* Pre-calculate lighting values */ GLshort i; + Material* material = _glActiveMaterial(); + if(mask & AMBIENT_MASK) { for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { - LIGHTS[i].ambientMaterial[0] = LIGHTS[i].ambient[0] * MATERIAL.ambient[0]; - LIGHTS[i].ambientMaterial[1] = LIGHTS[i].ambient[1] * MATERIAL.ambient[1]; - LIGHTS[i].ambientMaterial[2] = LIGHTS[i].ambient[2] * MATERIAL.ambient[2]; - LIGHTS[i].ambientMaterial[3] = LIGHTS[i].ambient[3] * MATERIAL.ambient[3]; + LightSource* light = _glLightAt(i); + + light->ambientMaterial[0] = light->ambient[0] * material->ambient[0]; + light->ambientMaterial[1] = light->ambient[1] * material->ambient[1]; + light->ambientMaterial[2] = light->ambient[2] * material->ambient[2]; + light->ambientMaterial[3] = light->ambient[3] * material->ambient[3]; + } } if(mask & DIFFUSE_MASK) { for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { - LIGHTS[i].diffuseMaterial[0] = LIGHTS[i].diffuse[0] * MATERIAL.diffuse[0]; - LIGHTS[i].diffuseMaterial[1] = LIGHTS[i].diffuse[1] * MATERIAL.diffuse[1]; - LIGHTS[i].diffuseMaterial[2] = LIGHTS[i].diffuse[2] * MATERIAL.diffuse[2]; - LIGHTS[i].diffuseMaterial[3] = LIGHTS[i].diffuse[3] * MATERIAL.diffuse[3]; + LightSource* light = _glLightAt(i); + + light->diffuseMaterial[0] = light->diffuse[0] * material->diffuse[0]; + light->diffuseMaterial[1] = light->diffuse[1] * material->diffuse[1]; + light->diffuseMaterial[2] = light->diffuse[2] * material->diffuse[2]; + light->diffuseMaterial[3] = light->diffuse[3] * material->diffuse[3]; } } if(mask & SPECULAR_MASK) { for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { - LIGHTS[i].specularMaterial[0] = LIGHTS[i].specular[0] * MATERIAL.specular[0]; - LIGHTS[i].specularMaterial[1] = LIGHTS[i].specular[1] * MATERIAL.specular[1]; - LIGHTS[i].specularMaterial[2] = LIGHTS[i].specular[2] * MATERIAL.specular[2]; - LIGHTS[i].specularMaterial[3] = LIGHTS[i].specular[3] * MATERIAL.specular[3]; + LightSource* light = _glLightAt(i); + + light->specularMaterial[0] = light->specular[0] * material->specular[0]; + light->specularMaterial[1] = light->specular[1] * material->specular[1]; + light->specularMaterial[2] = light->specular[2] * material->specular[2]; + light->specularMaterial[3] = light->specular[3] * material->specular[3]; } } /* If ambient or emission are updated, we need to update * the base colour. */ if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) { - MATERIAL.baseColour[0] = MATH_fmac(SCENE_AMBIENT[0], MATERIAL.ambient[0], MATERIAL.emissive[0]); - MATERIAL.baseColour[1] = MATH_fmac(SCENE_AMBIENT[1], MATERIAL.ambient[1], MATERIAL.emissive[1]); - MATERIAL.baseColour[2] = MATH_fmac(SCENE_AMBIENT[2], MATERIAL.ambient[2], MATERIAL.emissive[2]); - MATERIAL.baseColour[3] = MATH_fmac(SCENE_AMBIENT[3], MATERIAL.ambient[3], MATERIAL.emissive[3]); + GLfloat* scene_ambient = _glLightModelSceneAmbient(); + + material->baseColour[0] = MATH_fmac(scene_ambient[0], material->ambient[0], material->emissive[0]); + material->baseColour[1] = MATH_fmac(scene_ambient[1], material->ambient[1], material->emissive[1]); + material->baseColour[2] = MATH_fmac(scene_ambient[2], material->ambient[2], material->emissive[2]); + material->baseColour[3] = MATH_fmac(scene_ambient[3], material->ambient[3], material->emissive[3]); } } +void _glInitLights() { + Material* material = _glActiveMaterial(); + + static GLfloat ONE [] = {1.0f, 1.0f, 1.0f, 1.0f}; + static GLfloat ZERO [] = {0.0f, 0.0f, 0.0f, 1.0f}; + static GLfloat PARTIAL [] = {0.2f, 0.2f, 0.2f, 1.0f}; + static GLfloat MOSTLY [] = {0.8f, 0.8f, 0.8f, 1.0f}; + + memcpy(material->ambient, PARTIAL, sizeof(GLfloat) * 4); + memcpy(material->diffuse, MOSTLY, sizeof(GLfloat) * 4); + memcpy(material->specular, ZERO, sizeof(GLfloat) * 4); + memcpy(material->emissive, ZERO, sizeof(GLfloat) * 4); + material->exponent = 0.0f; + + GLubyte i; + for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { + LightSource* light = _glLightAt(i); + + memcpy(light->ambient, ZERO, sizeof(GLfloat) * 4); + memcpy(light->diffuse, ONE, sizeof(GLfloat) * 4); + memcpy(light->specular, ONE, sizeof(GLfloat) * 4); + + if(i > 0) { + memcpy(light->diffuse, ZERO, sizeof(GLfloat) * 4); + memcpy(light->specular, ZERO, sizeof(GLfloat) * 4); + } + + light->position[0] = light->position[1] = light->position[3] = 0.0f; + light->position[2] = 1.0f; + light->isDirectional = GL_TRUE; + light->isEnabled = GL_FALSE; + + light->spot_direction[0] = light->spot_direction[1] = 0.0f; + light->spot_direction[2] = -1.0f; + + light->spot_exponent = 0.0f; + light->spot_cutoff = 180.0f; + + light->constant_attenuation = 1.0f; + light->linear_attenuation = 0.0f; + light->quadratic_attenuation = 0.0f; + } + + _glPrecalcLightingValues(~0); + _glRecalcEnabledLights(); +} + + void APIENTRY glLightModelf(GLenum pname, const GLfloat param) { glLightModelfv(pname, ¶m); } @@ -143,11 +124,11 @@ void APIENTRY glLightModeli(GLenum pname, const GLint param) { void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) { switch(pname) { case GL_LIGHT_MODEL_AMBIENT: { - for(int i = 0; i < 4; ++i) SCENE_AMBIENT[i] = params[i]; + _glSetLightModelSceneAmbient(params); _glPrecalcLightingValues(SCENE_AMBIENT_MASK); } break; case GL_LIGHT_MODEL_LOCAL_VIEWER: - VIEWER_IN_EYE_COORDINATES = (*params) ? GL_TRUE : GL_FALSE; + _glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE); break; case GL_LIGHT_MODEL_TWO_SIDE: /* Not implemented */ @@ -159,10 +140,10 @@ void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) { void APIENTRY glLightModeliv(GLenum pname, const GLint* params) { switch(pname) { case GL_LIGHT_MODEL_COLOR_CONTROL: - COLOR_CONTROL = *params; + _glSetLightModelColorControl(*params); break; case GL_LIGHT_MODEL_LOCAL_VIEWER: - VIEWER_IN_EYE_COORDINATES = (*params) ? GL_TRUE : GL_FALSE; + _glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE); break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); @@ -173,6 +154,7 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) { GLubyte idx = light & 0xF; if(idx >= MAX_GLDC_LIGHTS) { + _glKosThrowError(GL_INVALID_VALUE, __func__); return; } @@ -180,33 +162,35 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) { (pname == GL_DIFFUSE) ? DIFFUSE_MASK : (pname == GL_SPECULAR) ? SPECULAR_MASK : 0; + LightSource* l = _glLightAt(idx); + switch(pname) { case GL_AMBIENT: - memcpy(LIGHTS[idx].ambient, params, sizeof(GLfloat) * 4); + memcpy(l->ambient, params, sizeof(GLfloat) * 4); break; case GL_DIFFUSE: - memcpy(LIGHTS[idx].diffuse, params, sizeof(GLfloat) * 4); + memcpy(l->diffuse, params, sizeof(GLfloat) * 4); break; case GL_SPECULAR: - memcpy(LIGHTS[idx].specular, params, sizeof(GLfloat) * 4); + memcpy(l->specular, params, sizeof(GLfloat) * 4); break; case GL_POSITION: { _glMatrixLoadModelView(); - memcpy(LIGHTS[idx].position, params, sizeof(GLfloat) * 4); + memcpy(l->position, params, sizeof(GLfloat) * 4); - LIGHTS[idx].isDirectional = params[3] == 0.0f; + l->isDirectional = params[3] == 0.0f; - if(LIGHTS[idx].isDirectional) { + if(l->isDirectional) { //FIXME: Do we need to rotate directional lights? } else { - TransformVec3(LIGHTS[idx].position); + TransformVec3(l->position); } } break; case GL_SPOT_DIRECTION: { - LIGHTS[idx].spot_direction[0] = params[0]; - LIGHTS[idx].spot_direction[1] = params[1]; - LIGHTS[idx].spot_direction[2] = params[2]; + l->spot_direction[0] = params[0]; + l->spot_direction[1] = params[1]; + l->spot_direction[2] = params[2]; } break; case GL_CONSTANT_ATTENUATION: case GL_LINEAR_ATTENUATION: @@ -227,24 +211,26 @@ void APIENTRY glLightf(GLenum light, GLenum pname, GLfloat param) { GLubyte idx = light & 0xF; if(idx >= MAX_GLDC_LIGHTS) { + _glKosThrowError(GL_INVALID_VALUE, __func__); return; } + LightSource* l = _glLightAt(idx); switch(pname) { case GL_CONSTANT_ATTENUATION: - LIGHTS[idx].constant_attenuation = param; + l->constant_attenuation = param; break; case GL_LINEAR_ATTENUATION: - LIGHTS[idx].linear_attenuation = param; + l->linear_attenuation = param; break; case GL_QUADRATIC_ATTENUATION: - LIGHTS[idx].quadratic_attenuation = param; + l->quadratic_attenuation = param; break; case GL_SPOT_EXPONENT: - LIGHTS[idx].spot_exponent = param; + l->spot_exponent = param; break; case GL_SPOT_CUTOFF: - LIGHTS[idx].spot_cutoff = param; + l->spot_cutoff = param; break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); @@ -257,7 +243,7 @@ void APIENTRY glMaterialf(GLenum face, GLenum pname, const GLfloat param) { return; } - MATERIAL.exponent = _MIN(param, 128); /* 128 is the max according to the GL spec */ + _glActiveMaterial()->exponent = _MIN(param, 128); /* 128 is the max according to the GL spec */ } void APIENTRY glMateriali(GLenum face, GLenum pname, const GLint param) { @@ -270,25 +256,27 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) { return; } + Material* material = _glActiveMaterial(); + switch(pname) { case GL_SHININESS: glMaterialf(face, pname, *params); break; case GL_AMBIENT: - vec4cpy(MATERIAL.ambient, params); + vec4cpy(material->ambient, params); break; case GL_DIFFUSE: - vec4cpy(MATERIAL.diffuse, params); + vec4cpy(material->diffuse, params); break; case GL_SPECULAR: - vec4cpy(MATERIAL.specular, params); + vec4cpy(material->specular, params); break; case GL_EMISSION: - vec4cpy(MATERIAL.emissive, params); + vec4cpy(material->emissive, params); break; case GL_AMBIENT_AND_DIFFUSE: { - vec4cpy(MATERIAL.ambient, params); - vec4cpy(MATERIAL.diffuse, params); + vec4cpy(material->ambient, params); + vec4cpy(material->diffuse, params); } break; case GL_COLOR_INDEXES: default: { @@ -318,12 +306,13 @@ void APIENTRY glColorMaterial(GLenum face, GLenum mode) { return; } - COLOR_MATERIAL_MASK = (mode == GL_AMBIENT) ? AMBIENT_MASK: + GLenum mask = (mode == GL_AMBIENT) ? AMBIENT_MASK: (mode == GL_DIFFUSE) ? DIFFUSE_MASK: (mode == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK: (mode == GL_EMISSION) ? EMISSION_MASK : SPECULAR_MASK; - COLOR_MATERIAL_MODE = mode; + _glSetColorMaterialMask(mask); + _glSetColorMaterialMode(mode); } GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) { @@ -336,44 +325,68 @@ GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) { } void _glUpdateColourMaterialA(const GLubyte* argb) { + Material* material = _glActiveMaterial(); + float colour[4]; bgra_to_float(argb, colour); - vec4cpy(MATERIAL.ambient, colour); - _glPrecalcLightingValues(COLOR_MATERIAL_MASK); + vec4cpy(material->ambient, colour); + GLenum mask = _glColorMaterialMode(); + _glPrecalcLightingValues(mask); } void _glUpdateColourMaterialD(const GLubyte* argb) { + Material* material = _glActiveMaterial(); + float colour[4]; bgra_to_float(argb, colour); - vec4cpy(MATERIAL.diffuse, colour); - _glPrecalcLightingValues(COLOR_MATERIAL_MASK); + vec4cpy(material->diffuse, colour); + + GLenum mask = _glColorMaterialMode(); + _glPrecalcLightingValues(mask); } void _glUpdateColourMaterialE(const GLubyte* argb) { + Material* material = _glActiveMaterial(); + float colour[4]; bgra_to_float(argb, colour); - vec4cpy(MATERIAL.emissive, colour); - _glPrecalcLightingValues(COLOR_MATERIAL_MASK); + vec4cpy(material->emissive, colour); + + GLenum mask = _glColorMaterialMode(); + _glPrecalcLightingValues(mask); } void _glUpdateColourMaterialAD(const GLubyte* argb) { + Material* material = _glActiveMaterial(); + float colour[4]; bgra_to_float(argb, colour); - vec4cpy(MATERIAL.ambient, colour); - vec4cpy(MATERIAL.diffuse, colour); - _glPrecalcLightingValues(COLOR_MATERIAL_MASK); + vec4cpy(material->ambient, colour); + vec4cpy(material->diffuse, colour); + + GLenum mask = _glColorMaterialMode(); + _glPrecalcLightingValues(mask); } GL_FORCE_INLINE GLboolean isDiffuseColorMaterial() { - return (COLOR_MATERIAL_MODE == GL_DIFFUSE || COLOR_MATERIAL_MODE == GL_AMBIENT_AND_DIFFUSE); + GLenum mode = _glColorMaterialMode(); + return ( + mode == GL_DIFFUSE || + mode == GL_AMBIENT_AND_DIFFUSE + ); } GL_FORCE_INLINE GLboolean isAmbientColorMaterial() { - return (COLOR_MATERIAL_MODE == GL_AMBIENT || COLOR_MATERIAL_MODE == GL_AMBIENT_AND_DIFFUSE); + GLenum mode = _glColorMaterialMode(); + return ( + mode == GL_AMBIENT || + mode == GL_AMBIENT_AND_DIFFUSE + ); } GL_FORCE_INLINE GLboolean isSpecularColorMaterial() { - return (COLOR_MATERIAL_MODE == GL_SPECULAR); + GLenum mode = _glColorMaterialMode(); + return (mode == GL_SPECULAR); } /* @@ -408,12 +421,15 @@ GL_FORCE_INLINE void _glLightVertexDirectional( float* final, uint8_t lid, float LdotN, float NdotH) { - float FI = (MATERIAL.exponent) ? - faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f; + Material* material = _glActiveMaterial(); + LightSource* light = _glLightAt(lid); + + float FI = (material->exponent) ? + faster_pow((LdotN != 0.0f) * NdotH, material->exponent) : 1.0f; #define _PROCESS_COMPONENT(X) \ - final[X] += (LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \ - + (FI * LIGHTS[lid].specularMaterial[X]); \ + final[X] += (LdotN * light->diffuseMaterial[X] + light->ambientMaterial[X]) \ + + (FI * light->specularMaterial[X]); \ _PROCESS_COMPONENT(0); _PROCESS_COMPONENT(1); @@ -426,12 +442,15 @@ GL_FORCE_INLINE void _glLightVertexPoint( float* final, uint8_t lid, float LdotN, float NdotH, float att) { - float FI = (MATERIAL.exponent) ? - faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f; + Material* material = _glActiveMaterial(); + LightSource* light = _glLightAt(lid); + + float FI = (material->exponent) ? + faster_pow((LdotN != 0.0f) * NdotH, material->exponent) : 1.0f; #define _PROCESS_COMPONENT(X) \ - final[X] += ((LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \ - + (FI * LIGHTS[lid].specularMaterial[X])) * att; \ + final[X] += ((LdotN * light->diffuseMaterial[X] + light->ambientMaterial[X]) \ + + (FI * light->specularMaterial[X])) * att; \ _PROCESS_COMPONENT(0); _PROCESS_COMPONENT(1); @@ -444,6 +463,8 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count GLubyte i; GLuint j; + Material* material = _glActiveMaterial(); + Vertex* vertex = vertices; EyeSpaceData* data = es; @@ -451,7 +472,8 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count void (*updateColourMaterial)(const GLubyte*) = NULL; if(_glIsColorMaterialEnabled()) { - switch(COLOR_MATERIAL_MODE) { + GLenum mode = _glColorMaterialMode(); + switch(mode) { case GL_AMBIENT: updateColourMaterial = _glUpdateColourMaterialA; break; @@ -474,10 +496,10 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count } /* Copy the base colour across */ - vec4cpy(data->finalColour, MATERIAL.baseColour); + vec4cpy(data->finalColour, material->baseColour); } - if(!ENABLED_LIGHT_COUNT) { + if(!_glEnabledLightCount()) { return; } @@ -495,15 +517,17 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count const float Nz = data->n[2]; for(i = 0; i < MAX_GLDC_LIGHTS; ++i) { - if(!LIGHTS[i].isEnabled) { + LightSource* light = _glLightAt(i); + + if(!light->isEnabled) { continue; } - float Lx = LIGHTS[i].position[0] - vertex->xyz[0]; - float Ly = LIGHTS[i].position[1] - vertex->xyz[1]; - float Lz = LIGHTS[i].position[2] - vertex->xyz[2]; + float Lx = light->position[0] - vertex->xyz[0]; + float Ly = light->position[1] - vertex->xyz[1]; + float Lz = light->position[2] - vertex->xyz[2]; - if(LIGHTS[i].isDirectional) { + if(light->isDirectional) { float Hx = (Lx + 0); float Hy = (Ly + 0); float Hz = (Lz + 1); @@ -532,9 +556,9 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count VEC3_LENGTH(Lx, Ly, Lz, D); float att = ( - LIGHTS[i].constant_attenuation + ( - LIGHTS[i].linear_attenuation * D - ) + (LIGHTS[i].quadratic_attenuation * D * D) + light->constant_attenuation + ( + light->linear_attenuation * D + ) + (light->quadratic_attenuation * D * D) ); /* Anything over the attenuation threshold will diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 78e1772..da16cce 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -74,9 +74,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; } -static uint32_t *d; // SQ target - -GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { +GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { #ifndef NDEBUG gl_assert(!isnan(v->xyz[2])); gl_assert(!isnan(v->w)); @@ -123,11 +121,14 @@ GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, con } GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { + static const float E [] = { + 0.00001f, -0.00001f + }; + /* Clipping time! */ const float d0 = v1->w + v1->xyz[2]; const float d1 = v2->w + v2->xyz[2]; - - const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f; + const float epsilon = E[d0 < d1]; float t = MATH_Fast_Divide(d0, (d0 - d1)) + epsilon; @@ -182,28 +183,27 @@ GL_FORCE_INLINE void ShiftRotateTriangle() { #define SPAN_SORT_CFG 0x005F8030 void SceneListSubmit(void* src, int n) { - Vertex __attribute__((aligned(32))) tmp; + const float h = GetVideoMode()->height; - /* Do everything, everywhere, all at once */ PVR_SET(SPAN_SORT_CFG, 0x0); - /* Prep store queues */ - d = (uint32_t*) SQ_BASE_ADDRESS; - + uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS; *PVR_LMMODE0 = 0x0; /* Enable 64bit mode */ + Vertex __attribute__((aligned(32))) tmp; + /* Perform perspective divide on each vertex */ Vertex* vertex = (Vertex*) src; - const float h = GetVideoMode()->height; + if(!_glNearZClippingEnabled()) { + /* Prep store queues */ - if(!ZNEAR_CLIPPING_ENABLED) { for(int i = 0; i < n; ++i, ++vertex) { PREFETCH(vertex + 1); if(glIsVertex(vertex->flags)) { _glPerspectiveDivideVertex(vertex, h); } - _glSubmitHeaderOrVertex(vertex); + _glSubmitHeaderOrVertex(d, vertex); } /* Wait for both store queues to complete */ @@ -221,25 +221,22 @@ void SceneListSubmit(void* src, int n) { #endif for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - - bool is_last_in_strip = glIsLastVertex(vertex->flags); + PREFETCH(vertex + 12); /* Wait until we fill the triangle */ if(tri_count < 3) { - if(likely(glIsVertex(vertex->flags))) { + if(glIsVertex(vertex->flags)) { + ++strip_count; triangle[tri_count].v = vertex; triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w; - tri_count++; - strip_count++; + if(++tri_count < 3) { + continue; + } } else { /* We hit a header */ tri_count = 0; strip_count = 0; - _glSubmitHeaderOrVertex(vertex); - } - - if(tri_count < 3) { + _glSubmitHeaderOrVertex(d, vertex); continue; } } @@ -250,199 +247,189 @@ void SceneListSubmit(void* src, int n) { /* If we got here, then triangle contains 3 vertices */ int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); - if(visible_mask == 7) { -#if CLIP_DEBUG - printf("Visible\n"); -#endif - /* All the vertices are visible! We divide and submit v0, then shift */ - _glPerspectiveDivideVertex(vertex - 2, h); - _glSubmitHeaderOrVertex(vertex - 2); - if(is_last_in_strip) { - _glPerspectiveDivideVertex(vertex - 1, h); - _glSubmitHeaderOrVertex(vertex - 1); - _glPerspectiveDivideVertex(vertex, h); - _glSubmitHeaderOrVertex(vertex); - tri_count = 0; - strip_count = 0; - } + /* Clipping time! - ShiftRotateTriangle(); + There are 6 distinct possibilities when clipping a triangle. 3 of them result + in another triangle, 3 of them result in a quadrilateral. - } else if(visible_mask) { - /* Clipping time! + Assuming you iterate the edges of the triangle in order, and create a new *visible* + vertex when you cross the plane, and discard vertices behind the plane, then the only + difference between the two cases is that the final two vertices that need submitting have + to be reversed. - There are 6 distinct possibilities when clipping a triangle. 3 of them result - in another triangle, 3 of them result in a quadrilateral. + Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may + be used in a subsequent triangle in the strip and would end up being double divided. + */ - Assuming you iterate the edges of the triangle in order, and create a new *visible* - vertex when you cross the plane, and discard vertices behind the plane, then the only - difference between the two cases is that the final two vertices that need submitting have - to be reversed. +#define SUBMIT_QUEUED() \ + if(strip_count > 3) { \ + tmp = *(vertex - 2); \ + /* If we had triangles ahead of this one, submit and finalize */ \ + _glPerspectiveDivideVertex(&tmp, h); \ + _glSubmitHeaderOrVertex(d, &tmp); \ + tmp = *(vertex - 1); \ + tmp.flags = GPU_CMD_VERTEX_EOL; \ + _glPerspectiveDivideVertex(&tmp, h); \ + _glSubmitHeaderOrVertex(d, &tmp); \ + } - Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may - be used in a subsequent triangle in the strip and would end up being double divided. - */ -#if CLIP_DEBUG - printf("Clip: %d, SC: %d\n", visible_mask, strip_count); - printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1); -#endif - Vertex tmp; - if(strip_count > 3) { -#if CLIP_DEBUG - printf("Flush\n"); -#endif - tmp = *(vertex - 2); - /* If we had triangles ahead of this one, submit and finalize */ + bool is_last_in_strip = glIsLastVertex(vertex->flags); + + switch(visible_mask) { + case 1: { + SUBMIT_QUEUED(); + /* 0, 0a, 2a */ + tmp = *triangle[0].v; + tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glSubmitHeaderOrVertex(d, &tmp); - tmp = *(vertex - 1); + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); + + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } + _glSubmitHeaderOrVertex(d, &tmp); + } break; + case 2: { + SUBMIT_QUEUED(); + /* 0a, 1, 1a */ + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - switch(visible_mask) { - case 1: { - /* 0, 0a, 2a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + tmp = *triangle[1].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); + } break; + case 3: { + SUBMIT_QUEUED(); + /* 0, 1, 2a, 1a */ + tmp = *triangle[0].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 2: { - /* 0a, 1, 1a */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + tmp = *triangle[1].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 3: { - /* 0, 1, 2a, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); + } break; + case 4: { + SUBMIT_QUEUED(); + /* 1a, 2, 2a */ + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + tmp = *triangle[2].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); + } break; + case 5: { + SUBMIT_QUEUED(); + /* 0, 0a, 2, 1a */ + tmp = *triangle[0].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 4: { - /* 1a, 2, 2a */ - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + tmp = *triangle[2].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 5: { - /* 0, 0a, 2, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); + } break; + case 6: { + SUBMIT_QUEUED(); + /* 0a, 1, 2a, 2 */ + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + tmp = *triangle[1].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 6: { - /* 0a, 1, 2a, 2 */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + tmp = *triangle[2].v; + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(d, &tmp); + } break; + case 7: { + /* All the vertices are visible! We divide and submit v0, then shift */ + _glPerspectiveDivideVertex(vertex - 2, h); + _glSubmitHeaderOrVertex(d, vertex - 2); - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + if(is_last_in_strip) { + _glPerspectiveDivideVertex(vertex - 1, h); + _glSubmitHeaderOrVertex(d, vertex - 1); + _glPerspectiveDivideVertex(vertex, h); + _glSubmitHeaderOrVertex(d, vertex); + tri_count = 0; + strip_count = 0; + } - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - default: - break; - } - - /* If this was the last in the strip, we don't need to - submit anything else, we just wipe the tri_count */ - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } else { ShiftRotateTriangle(); - strip_count = 2; - } - } else { - /* Invisible? Move to the next in the strip */ + continue; + } break; + case 0: + default: + break; + } - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } - strip_count = 2; + /* If this was the last in the strip, we don't need to + submit anything else, we just wipe the tri_count */ + if(is_last_in_strip) { + tri_count = 0; + strip_count = 0; + } else { ShiftRotateTriangle(); + strip_count = 2; } } diff --git a/GL/private.h b/GL/private.h index 9fa45cf..f728d05 100644 --- a/GL/private.h +++ b/GL/private.h @@ -354,26 +354,17 @@ void _glSetInternalPaletteFormat(GLenum val); GLboolean _glIsSharedTexturePaletteEnabled(); void _glApplyColorTable(TexturePalette *palette); -extern GLboolean BLEND_ENABLED; -extern GLboolean ALPHA_TEST_ENABLED; -extern GLboolean AUTOSORT_ENABLED; - -GL_FORCE_INLINE GLboolean _glIsBlendingEnabled() { - return BLEND_ENABLED; -} - -GL_FORCE_INLINE GLboolean _glIsAlphaTestEnabled() { - return ALPHA_TEST_ENABLED; -} +GLboolean _glIsBlendingEnabled(); +GLboolean _glIsAlphaTestEnabled(); extern PolyList OP_LIST; extern PolyList PT_LIST; extern PolyList TR_LIST; GL_FORCE_INLINE PolyList* _glActivePolyList() { - if(BLEND_ENABLED) { + if(_glIsBlendingEnabled()) { return &TR_LIST; - } else if(ALPHA_TEST_ENABLED) { + } else if(_glIsAlphaTestEnabled()) { return &PT_LIST; } else { return &OP_LIST; @@ -383,13 +374,9 @@ GL_FORCE_INLINE PolyList* _glActivePolyList() { GLboolean _glIsMipmapComplete(const TextureObject* obj); GLubyte* _glGetMipmapLocation(const TextureObject* obj, GLuint level); GLuint _glGetMipmapLevelCount(const TextureObject* obj); - -extern GLboolean ZNEAR_CLIPPING_ENABLED; - -extern GLboolean LIGHTING_ENABLED; GLboolean _glIsLightingEnabled(); -void _glEnableLight(GLubyte light, unsigned char value); +void _glEnableLight(GLubyte light, GLboolean value); GLboolean _glIsColorMaterialEnabled(); GLboolean _glIsNormalizeEnabled(); @@ -513,10 +500,30 @@ GLuint _glUsedTextureMemory(); GLuint _glFreeContiguousTextureMemory(); void _glApplyScissor(bool force); +void _glSetColorMaterialMask(GLenum mask); +void _glSetColorMaterialMode(GLenum mode); +GLenum _glColorMaterialMode(); + +Material* _glActiveMaterial(); +void _glSetLightModelViewerInEyeCoordinates(GLboolean v); +void _glSetLightModelSceneAmbient(const GLfloat* v); +void _glSetLightModelColorControl(GLint v); +GLuint _glEnabledLightCount(); +void _glRecalcEnabledLights(); +GLfloat* _glLightModelSceneAmbient(); +LightSource* _glLightAt(GLuint i); +GLboolean _glNearZClippingEnabled(); #define MAX_GLDC_TEXTURE_UNITS 2 #define MAX_GLDC_LIGHTS 8 +#define AMBIENT_MASK 1 +#define DIFFUSE_MASK 2 +#define EMISSION_MASK 4 +#define SPECULAR_MASK 8 +#define SCENE_AMBIENT_MASK 16 + + /* This is from KOS pvr_buffers.c */ #define PVR_MIN_Z 0.0001f diff --git a/GL/state.c b/GL/state.c index a302157..393aaec 100644 --- a/GL/state.c +++ b/GL/state.c @@ -10,64 +10,176 @@ PolyContext *_glGetPVRContext() { return &GL_CONTEXT; } + +static struct { + GLboolean is_dirty; + /* We can't just use the GL_CONTEXT for this state as the two * GL states are combined, so we store them separately and then * calculate the appropriate PVR state from them. */ -static GLenum CULL_FACE = GL_BACK; -static GLenum FRONT_FACE = GL_CCW; -static GLboolean CULLING_ENABLED = GL_FALSE; -static GLboolean COLOR_MATERIAL_ENABLED = GL_FALSE; + GLenum depth_func; + GLboolean depth_test_enabled; + GLenum cull_face; + GLenum front_face; + GLboolean culling_enabled; + GLboolean color_material_enabled; + GLboolean znear_clipping_enabled; + GLboolean lighting_enabled; + GLboolean shared_palette_enabled; + GLboolean alpha_test_enabled; + GLboolean polygon_offset_enabled; + GLboolean normalize_enabled;; -GLboolean ZNEAR_CLIPPING_ENABLED = GL_TRUE; + struct { + GLint x; + GLint y; + GLsizei width; + GLsizei height; + GLboolean applied; + } scissor_rect; -GLboolean LIGHTING_ENABLED = GL_FALSE; + GLenum blend_sfactor; + GLenum blend_dfactor; + GLboolean blend_enabled; + GLfloat offset_factor; + GLfloat offset_units; -/* Is the shared texture palette enabled? */ -static GLboolean SHARED_PALETTE_ENABLED = GL_FALSE; + GLfloat scene_ambient[4]; + GLboolean viewer_in_eye_coords; + GLenum color_control; + GLenum color_material_mode; + GLenum color_material_mask; -GLboolean ALPHA_TEST_ENABLED = GL_FALSE; - -static GLboolean POLYGON_OFFSET_ENABLED = GL_FALSE; - -static GLboolean NORMALIZE_ENABLED = GL_FALSE; - -static struct { - GLint x; - GLint y; - GLsizei width; - GLsizei height; - GLboolean applied; -} SCISSOR_RECT = { - 0, 0, 640, 480, false + LightSource lights[MAX_GLDC_LIGHTS]; + GLuint enabled_light_count; + Material material; +} GPUState = { + GL_TRUE, + GL_LESS, + GL_FALSE, + GL_BACK, + GL_CCW, + GL_FALSE, + GL_FALSE, + GL_TRUE, + GL_FALSE, + GL_FALSE, + GL_FALSE, + GL_FALSE, + GL_FALSE, + {0, 0, 640, 480, false}, + GL_ONE, + GL_ZERO, + GL_FALSE, + 0.0f, + 0.0f, + {0.2f, 0.2f, 0.2f, 1.0f}, + GL_TRUE, + GL_SINGLE_COLOR, + GL_AMBIENT_AND_DIFFUSE, + AMBIENT_MASK | DIFFUSE_MASK, + {0}, + 0, + {0} }; +Material* _glActiveMaterial() { + return &GPUState.material; +} + +LightSource* _glLightAt(GLuint i) { + assert(i < MAX_GLDC_LIGHTS); + return &GPUState.lights[i]; +} + +void _glEnableLight(GLubyte light, GLboolean value) { + GPUState.lights[light].isEnabled = value; +} + +GLuint _glEnabledLightCount() { + return GPUState.enabled_light_count; +} + +GLfloat* _glLightModelSceneAmbient() { + return GPUState.scene_ambient; +} + +GLboolean _glIsBlendingEnabled() { + return GPUState.blend_enabled; +} + +GLboolean _glIsAlphaTestEnabled() { + return GPUState.alpha_test_enabled; +} + +void _glRecalcEnabledLights() { + GLubyte i; + + GPUState.enabled_light_count = 0; + for(int i = 0; i < MAX_GLDC_LIGHTS; ++i) { + if(_glLightAt(i)->isEnabled) { + GPUState.enabled_light_count++; + } + } +} + +void _glSetLightModelViewerInEyeCoordinates(GLboolean v) { + GPUState.viewer_in_eye_coords = v; +} + +void _glSetLightModelSceneAmbient(const GLfloat* v) { + vec4cpy(GPUState.scene_ambient, v); +} + +void _glSetLightModelColorControl(GLint v) { + GPUState.color_control = v; +} + +GLenum _glColorMaterialMask() { + return GPUState.color_material_mask; +} + +void _glSetColorMaterialMask(GLenum mask) { + GPUState.color_material_mask = mask; +} + +void _glSetColorMaterialMode(GLenum mode) { + GPUState.color_material_mode = mode; +} + +GLenum _glColorMaterialMode() { + return GPUState.color_material_mode; +} + GLboolean _glIsSharedTexturePaletteEnabled() { - return SHARED_PALETTE_ENABLED; + return GPUState.shared_palette_enabled; +} + +GLboolean _glNearZClippingEnabled() { + return GPUState.znear_clipping_enabled; } void _glApplyScissor(bool force); static int _calc_pvr_face_culling() { - if(!CULLING_ENABLED) { + if(!GPUState.culling_enabled) { return GPU_CULLING_SMALL; } else { - if(CULL_FACE == GL_BACK) { - return (FRONT_FACE == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW; + if(GPUState.cull_face == GL_BACK) { + return (GPUState.front_face == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW; } else { - return (FRONT_FACE == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW; + return (GPUState.front_face == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW; } } } -static GLenum DEPTH_FUNC = GL_LESS; -static GLboolean DEPTH_TEST_ENABLED = GL_FALSE; static int _calc_pvr_depth_test() { - if(!DEPTH_TEST_ENABLED) { + if(!GPUState.depth_test_enabled) { return GPU_DEPTHCMP_ALWAYS; } - switch(DEPTH_FUNC) { + switch(GPUState.depth_func) { case GL_NEVER: return GPU_DEPTHCMP_NEVER; case GL_LESS: @@ -89,15 +201,9 @@ static int _calc_pvr_depth_test() { } } -static GLenum BLEND_SFACTOR = GL_ONE; -static GLenum BLEND_DFACTOR = GL_ZERO; -GLboolean BLEND_ENABLED = GL_FALSE; - -static GLfloat OFFSET_FACTOR = 0.0f; -static GLfloat OFFSET_UNITS = 0.0f; GLboolean _glIsNormalizeEnabled() { - return NORMALIZE_ENABLED; + return GPUState.normalize_enabled; } static int _calcPVRBlendFactor(GLenum factor) { @@ -125,14 +231,14 @@ static int _calcPVRBlendFactor(GLenum factor) { } static void _updatePVRBlend(PolyContext* context) { - if(BLEND_ENABLED || ALPHA_TEST_ENABLED) { + if(GPUState.blend_enabled || GPUState.alpha_test_enabled) { context->gen.alpha = GPU_ALPHA_ENABLE; } else { context->gen.alpha = GPU_ALPHA_DISABLE; } - context->blend.src = _calcPVRBlendFactor(BLEND_SFACTOR); - context->blend.dst = _calcPVRBlendFactor(BLEND_DFACTOR); + context->blend.src = _calcPVRBlendFactor(GPUState.blend_sfactor); + context->blend.dst = _calcPVRBlendFactor(GPUState.blend_dfactor); } GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func) { @@ -167,7 +273,7 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) { return; } - context->txr.alpha = (BLEND_ENABLED || ALPHA_TEST_ENABLED) ? GPU_TXRALPHA_ENABLE : GPU_TXRALPHA_DISABLE; + context->txr.alpha = (GPUState.blend_enabled || GPUState.alpha_test_enabled) ? GPU_TXRALPHA_ENABLE : GPU_TXRALPHA_DISABLE; GLuint filter = GPU_FILTER_NEAREST; GLboolean enableMipmaps = GL_FALSE; @@ -262,11 +368,11 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) { } GLboolean _glIsLightingEnabled() { - return LIGHTING_ENABLED; + return GPUState.lighting_enabled; } GLboolean _glIsColorMaterialEnabled() { - return COLOR_MATERIAL_ENABLED; + return GPUState.color_material_enabled; } static GLfloat CLEAR_COLOUR[3]; @@ -281,10 +387,10 @@ void _glInitContext() { const VideoMode* mode = GetVideoMode(); - SCISSOR_RECT.x = 0; - SCISSOR_RECT.y = 0; - SCISSOR_RECT.width = mode->width; - SCISSOR_RECT.height = mode->height; + GPUState.scissor_rect.x = 0; + GPUState.scissor_rect.y = 0; + GPUState.scissor_rect.width = mode->width; + GPUState.scissor_rect.height = mode->height; glClearDepth(1.0f); glDepthFunc(GL_LESS); @@ -310,20 +416,24 @@ void _glInitContext() { } GLAPI void APIENTRY glEnable(GLenum cap) { + GLboolean was_dirty = GPUState.is_dirty; + + GPUState.is_dirty = GL_TRUE; + switch(cap) { case GL_TEXTURE_2D: TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE; break; case GL_CULL_FACE: { - CULLING_ENABLED = GL_TRUE; + GPUState.cull_face = GL_TRUE; GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } break; case GL_DEPTH_TEST: { - DEPTH_TEST_ENABLED = GL_TRUE; + GPUState.depth_test_enabled = GL_TRUE; GL_CONTEXT.depth.comparison = _calc_pvr_depth_test(); } break; case GL_BLEND: { - BLEND_ENABLED = GL_TRUE; + GPUState.blend_enabled = GL_TRUE; _updatePVRBlend(&GL_CONTEXT); } break; case GL_SCISSOR_TEST: { @@ -331,20 +441,20 @@ GLAPI void APIENTRY glEnable(GLenum cap) { _glApplyScissor(false); } break; case GL_LIGHTING: { - LIGHTING_ENABLED = GL_TRUE; + GPUState.lighting_enabled = GL_TRUE; } break; case GL_FOG: GL_CONTEXT.gen.fog_type = GPU_FOG_TABLE; break; case GL_COLOR_MATERIAL: - COLOR_MATERIAL_ENABLED = GL_TRUE; + GPUState.color_material_enabled = GL_TRUE; break; case GL_SHARED_TEXTURE_PALETTE_EXT: { - SHARED_PALETTE_ENABLED = GL_TRUE; + GPUState.shared_palette_enabled = GL_TRUE; } break; case GL_ALPHA_TEST: { - ALPHA_TEST_ENABLED = GL_TRUE; + GPUState.alpha_test_enabled = GL_TRUE; _updatePVRBlend(&GL_CONTEXT); } break; case GL_LIGHT0: @@ -355,59 +465,64 @@ GLAPI void APIENTRY glEnable(GLenum cap) { case GL_LIGHT5: case GL_LIGHT6: case GL_LIGHT7: - _glEnableLight(cap & 0xF, GL_TRUE); + _glLightAt(cap & 0xF)->isEnabled = GL_TRUE; + _glRecalcEnabledLights(); break; case GL_NEARZ_CLIPPING_KOS: - ZNEAR_CLIPPING_ENABLED = GL_TRUE; + GPUState.znear_clipping_enabled = GL_TRUE; break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: - POLYGON_OFFSET_ENABLED = GL_TRUE; + GPUState.polygon_offset_enabled = GL_TRUE; break; case GL_NORMALIZE: - NORMALIZE_ENABLED = GL_TRUE; + GPUState.normalize_enabled = GL_TRUE; break; default: + GPUState.is_dirty = was_dirty; break; } } GLAPI void APIENTRY glDisable(GLenum cap) { + GLboolean was_dirty = GPUState.is_dirty; + GPUState.is_dirty = GL_TRUE; + switch(cap) { case GL_TEXTURE_2D: { TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE; } break; case GL_CULL_FACE: { - CULLING_ENABLED = GL_FALSE; + GPUState.culling_enabled = GL_FALSE; GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } break; case GL_DEPTH_TEST: { - DEPTH_TEST_ENABLED = GL_FALSE; + GPUState.depth_test_enabled = GL_FALSE; GL_CONTEXT.depth.comparison = _calc_pvr_depth_test(); } break; case GL_BLEND: - BLEND_ENABLED = GL_FALSE; + GPUState.blend_enabled = GL_FALSE; _updatePVRBlend(&GL_CONTEXT); break; case GL_SCISSOR_TEST: { GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_DISABLE; } break; case GL_LIGHTING: { - LIGHTING_ENABLED = GL_FALSE; + GPUState.lighting_enabled = GL_FALSE; } break; case GL_FOG: GL_CONTEXT.gen.fog_type = GPU_FOG_DISABLE; break; case GL_COLOR_MATERIAL: - COLOR_MATERIAL_ENABLED = GL_FALSE; + GPUState.color_material_enabled = GL_FALSE; break; case GL_SHARED_TEXTURE_PALETTE_EXT: { - SHARED_PALETTE_ENABLED = GL_FALSE; + GPUState.shared_palette_enabled = GL_FALSE; } break; case GL_ALPHA_TEST: { - ALPHA_TEST_ENABLED = GL_FALSE; + GPUState.alpha_test_enabled = GL_FALSE; } break; case GL_LIGHT0: case GL_LIGHT1: @@ -420,17 +535,18 @@ GLAPI void APIENTRY glDisable(GLenum cap) { _glEnableLight(cap & 0xF, GL_FALSE); break; case GL_NEARZ_CLIPPING_KOS: - ZNEAR_CLIPPING_ENABLED = GL_FALSE; + GPUState.znear_clipping_enabled = GL_FALSE; break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: - POLYGON_OFFSET_ENABLED = GL_FALSE; + GPUState.polygon_offset_enabled = GL_FALSE; break; case GL_NORMALIZE: - NORMALIZE_ENABLED = GL_FALSE; + GPUState.normalize_enabled = GL_FALSE; break; default: + GPUState.is_dirty = was_dirty; break; } } @@ -481,7 +597,8 @@ GLAPI void APIENTRY glDepthMask(GLboolean flag) { } GLAPI void APIENTRY glDepthFunc(GLenum func) { - DEPTH_FUNC = func; + GPUState.depth_func = func; + GPUState.is_dirty = GL_TRUE; GL_CONTEXT.depth.comparison = _calc_pvr_depth_test(); } @@ -502,12 +619,14 @@ GLAPI void APIENTRY glPolygonMode(GLenum face, GLenum mode) { /* Culling */ GLAPI void APIENTRY glFrontFace(GLenum mode) { - FRONT_FACE = mode; + GPUState.front_face = mode; + GPUState.is_dirty = GL_TRUE; GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } GLAPI void APIENTRY glCullFace(GLenum mode) { - CULL_FACE = mode; + GPUState.cull_face = mode; + GPUState.is_dirty = GL_TRUE; GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } @@ -522,8 +641,9 @@ GLAPI void APIENTRY glShadeModel(GLenum mode) { /* Blending */ GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor) { - BLEND_SFACTOR = sfactor; - BLEND_DFACTOR = dfactor; + GPUState.blend_sfactor = sfactor; + GPUState.blend_dfactor = dfactor; + GPUState.is_dirty = GL_TRUE; _updatePVRBlend(&GL_CONTEXT); } @@ -547,8 +667,9 @@ void glLineWidth(GLfloat width) { } void glPolygonOffset(GLfloat factor, GLfloat units) { - OFFSET_FACTOR = factor; - OFFSET_UNITS = units; + GPUState.offset_factor = factor; + GPUState.offset_units = units; + GPUState.is_dirty = GL_TRUE; } void glGetTexParameterfv(GLenum target, GLenum pname, GLfloat *params) { @@ -577,18 +698,20 @@ void glPixelStorei(GLenum pname, GLint param) { void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height) { - if(SCISSOR_RECT.x == x && - SCISSOR_RECT.y == y && - SCISSOR_RECT.width == width && - SCISSOR_RECT.height == height) { + + if(GPUState.scissor_rect.x == x && + GPUState.scissor_rect.y == y && + GPUState.scissor_rect.width == width && + GPUState.scissor_rect.height == height) { return; } - SCISSOR_RECT.x = x; - SCISSOR_RECT.y = y; - SCISSOR_RECT.width = width; - SCISSOR_RECT.height = height; - SCISSOR_RECT.applied = false; + GPUState.scissor_rect.x = x; + GPUState.scissor_rect.y = y; + GPUState.scissor_rect.width = width; + GPUState.scissor_rect.height = height; + GPUState.scissor_rect.applied = false; + GPUState.is_dirty = GL_TRUE; // FIXME: do we need this? _glApplyScissor(false); } @@ -623,7 +746,7 @@ void _glApplyScissor(bool force) { } /* Don't apply if we already applied - nothing changed */ - if(SCISSOR_RECT.applied && !force) { + if(GPUState.scissor_rect.applied && !force) { return; } @@ -633,27 +756,31 @@ void _glApplyScissor(bool force) { const VideoMode* vid_mode = GetVideoMode(); - GLsizei scissor_width = MAX(MIN(SCISSOR_RECT.width, vid_mode->width), 0); - GLsizei scissor_height = MAX(MIN(SCISSOR_RECT.height, vid_mode->height), 0); + GLsizei scissor_width = MAX(MIN(GPUState.scissor_rect.width, vid_mode->width), 0); + GLsizei scissor_height = MAX(MIN(GPUState.scissor_rect.height, vid_mode->height), 0); /* force the origin to the lower left-hand corner of the screen */ - miny = (vid_mode->height - scissor_height) - SCISSOR_RECT.y; - maxx = (scissor_width + SCISSOR_RECT.x); + miny = (vid_mode->height - scissor_height) - GPUState.scissor_rect.y; + maxx = (scissor_width + GPUState.scissor_rect.x); maxy = (scissor_height + miny); /* load command structure while mapping screen coords to TA tiles */ c.flags = GPU_CMD_USERCLIP; c.d1 = c.d2 = c.d3 = 0; - c.sx = CLAMP(SCISSOR_RECT.x / 32, 0, vid_mode->width / 32); - c.sy = CLAMP(miny / 32, 0, vid_mode->height / 32); - c.ex = CLAMP((maxx / 32) - 1, 0, vid_mode->width / 32); - c.ey = CLAMP((maxy / 32) - 1, 0, vid_mode->height / 32); + + uint16_t vw = vid_mode->width >> 5; + uint16_t vh = vid_mode->height >> 5; + + c.sx = CLAMP(GPUState.scissor_rect.x >> 5, 0, vw); + c.sy = CLAMP(miny >> 5, 0, vh); + c.ex = CLAMP((maxx >> 5) - 1, 0, vw); + c.ey = CLAMP((maxy >> 5) - 1, 0, vh); aligned_vector_push_back(&_glOpaquePolyList()->vector, &c, 1); aligned_vector_push_back(&_glPunchThruPolyList()->vector, &c, 1); aligned_vector_push_back(&_glTransparentPolyList()->vector, &c, 1); - SCISSOR_RECT.applied = true; + GPUState.scissor_rect.applied = true; } void glStencilFunc(GLenum func, GLint ref, GLuint mask) { @@ -671,19 +798,19 @@ void glStencilOp(GLenum sfail, GLenum dpfail, GLenum dppass) { GLboolean APIENTRY glIsEnabled(GLenum cap) { switch(cap) { case GL_DEPTH_TEST: - return DEPTH_TEST_ENABLED; + return GPUState.depth_test_enabled; case GL_SCISSOR_TEST: return GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_INSIDE; case GL_CULL_FACE: - return CULLING_ENABLED; + return GPUState.culling_enabled; case GL_LIGHTING: - return LIGHTING_ENABLED; + return GPUState.lighting_enabled; case GL_BLEND: - return BLEND_ENABLED; + return GPUState.blend_enabled; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: - return POLYGON_OFFSET_ENABLED; + return GPUState.polygon_offset_enabled; } return GL_FALSE; @@ -738,10 +865,10 @@ void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) { MEMCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16); break; case GL_POLYGON_OFFSET_FACTOR: - *params = OFFSET_FACTOR; + *params = GPUState.offset_factor; break; case GL_POLYGON_OFFSET_UNITS: - *params = OFFSET_UNITS; + *params = GPUState.offset_units; break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); @@ -758,13 +885,13 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) { *params = (_glGetBoundTexture()) ? _glGetBoundTexture()->index : 0; break; case GL_DEPTH_FUNC: - *params = DEPTH_FUNC; + *params = GPUState.depth_func; break; case GL_BLEND_SRC: - *params = BLEND_SFACTOR; + *params = GPUState.blend_sfactor; break; case GL_BLEND_DST: - *params = BLEND_DFACTOR; + *params = GPUState.blend_dfactor; break; case GL_MAX_TEXTURE_SIZE: *params = MAX_TEXTURE_SIZE; From 8789d6557e0c72bbbbc323cbdbde374d2fc6a6db Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Tue, 7 Mar 2023 21:27:48 +0000 Subject: [PATCH 16/20] Huge refactor of internal state --- GL/draw.c | 226 ++++++++++++++++++++++++++++++++++++------------- GL/private.h | 14 +++- GL/state.c | 233 +++++++++++++++++++++------------------------------ 3 files changed, 275 insertions(+), 198 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index be58241..aea1f34 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -997,40 +997,140 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) { } } +GL_FORCE_INLINE int _calc_pvr_face_culling() { + if(!_glIsCullingEnabled()) { + return GPU_CULLING_SMALL; + } else { + if(_glGetCullFace() == GL_BACK) { + return (_glGetFrontFace() == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW; + } else { + return (_glGetFrontFace() == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW; + } + } +} + +GL_FORCE_INLINE int _calc_pvr_depth_test() { + if(!_glIsDepthTestEnabled()) { + return GPU_DEPTHCMP_ALWAYS; + } + + switch(_glGetDepthFunc()) { + case GL_NEVER: + return GPU_DEPTHCMP_NEVER; + case GL_LESS: + return GPU_DEPTHCMP_GREATER; + case GL_EQUAL: + return GPU_DEPTHCMP_EQUAL; + case GL_LEQUAL: + return GPU_DEPTHCMP_GEQUAL; + case GL_GREATER: + return GPU_DEPTHCMP_LESS; + case GL_NOTEQUAL: + return GPU_DEPTHCMP_NOTEQUAL; + case GL_GEQUAL: + return GPU_DEPTHCMP_LEQUAL; + break; + case GL_ALWAYS: + default: + return GPU_DEPTHCMP_ALWAYS; + } +} + +GL_FORCE_INLINE int _calcPVRBlendFactor(GLenum factor) { + switch(factor) { + case GL_ZERO: + return GPU_BLEND_ZERO; + case GL_SRC_ALPHA: + return GPU_BLEND_SRCALPHA; + case GL_DST_COLOR: + return GPU_BLEND_DESTCOLOR; + case GL_DST_ALPHA: + return GPU_BLEND_DESTALPHA; + case GL_ONE_MINUS_DST_COLOR: + return GPU_BLEND_INVDESTCOLOR; + case GL_ONE_MINUS_SRC_ALPHA: + return GPU_BLEND_INVSRCALPHA; + case GL_ONE_MINUS_DST_ALPHA: + return GPU_BLEND_INVDESTALPHA; + case GL_ONE: + return GPU_BLEND_ONE; + default: + fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor); + return GPU_BLEND_ONE; + } +} + + +GL_FORCE_INLINE void _updatePVRBlend(PolyContext* context) { + if(_glIsBlendingEnabled() || _glIsAlphaTestEnabled()) { + context->gen.alpha = GPU_ALPHA_ENABLE; + } else { + context->gen.alpha = GPU_ALPHA_DISABLE; + } + + context->blend.src = _calcPVRBlendFactor(_glGetBlendSourceFactor()); + context->blend.dst = _calcPVRBlendFactor(_glGetBlendDestFactor()); +} + GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) { TRACE(); // Compile the header - PolyContext cxt = *_glGetPVRContext(); - cxt.list_type = activePolyList->list_type; + PolyContext ctx; + memset(&ctx, 0, sizeof(PolyContext)); - if(cxt.list_type == GPU_LIST_OP_POLY) { - /* Opaque polys are always one/zero */ - cxt.blend.src = GPU_BLEND_ONE; - cxt.blend.dst = GPU_BLEND_ZERO; - } else if(cxt.list_type == GPU_LIST_PT_POLY) { - /* Punch-through polys require fixed blending and depth modes */ - cxt.blend.src = GPU_BLEND_SRCALPHA; - cxt.blend.dst = GPU_BLEND_INVSRCALPHA; - cxt.depth.comparison = GPU_DEPTHCMP_LEQUAL; - } else if(cxt.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) { - /* Autosort mode requires this mode for transparent polys */ - cxt.depth.comparison = GPU_DEPTHCMP_GEQUAL; + ctx.list_type = activePolyList->list_type; + ctx.fmt.color = GPU_CLRFMT_ARGBPACKED; + ctx.fmt.uv = GPU_UVFMT_32BIT; + ctx.gen.color_clamp = GPU_CLRCLAMP_DISABLE; + + ctx.gen.culling = _calc_pvr_face_culling(); + ctx.depth.comparison = _calc_pvr_depth_test(); + ctx.depth.write = _glIsDepthWriteEnabled() ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE; + + ctx.gen.shading = (_glGetShadeModel() == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT; + + if(_glIsScissorTestEnabled()) { + ctx.gen.clip_mode = GPU_USERCLIP_INSIDE; + } else { + ctx.gen.clip_mode = GPU_USERCLIP_DISABLE; } - _glUpdatePVRTextureContext(&cxt, textureUnit); + if(_glIsFogEnabled()) { + ctx.gen.fog_type = GPU_FOG_TABLE; + } else { + ctx.gen.fog_type = GPU_FOG_DISABLE; + } + + _updatePVRBlend(&ctx); + + if(ctx.list_type == GPU_LIST_OP_POLY) { + /* Opaque polys are always one/zero */ + ctx.blend.src = GPU_BLEND_ONE; + ctx.blend.dst = GPU_BLEND_ZERO; + } else if(ctx.list_type == GPU_LIST_PT_POLY) { + /* Punch-through polys require fixed blending and depth modes */ + ctx.blend.src = GPU_BLEND_SRCALPHA; + ctx.blend.dst = GPU_BLEND_INVSRCALPHA; + ctx.depth.comparison = GPU_DEPTHCMP_LEQUAL; + } else if(ctx.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) { + /* Autosort mode requires this mode for transparent polys */ + ctx.depth.comparison = GPU_DEPTHCMP_GEQUAL; + } + + _glUpdatePVRTextureContext(&ctx, textureUnit); if(multiTextureHeader) { - gl_assert(cxt.list_type == GPU_LIST_TR_POLY); + gl_assert(ctx.list_type == GPU_LIST_TR_POLY); - cxt.gen.alpha = GPU_ALPHA_ENABLE; - cxt.txr.alpha = GPU_TXRALPHA_ENABLE; - cxt.blend.src = GPU_BLEND_ZERO; - cxt.blend.dst = GPU_BLEND_DESTCOLOR; - cxt.depth.comparison = GPU_DEPTHCMP_EQUAL; + ctx.gen.alpha = GPU_ALPHA_ENABLE; + ctx.txr.alpha = GPU_TXRALPHA_ENABLE; + ctx.blend.src = GPU_BLEND_ZERO; + ctx.blend.dst = GPU_BLEND_DESTCOLOR; + ctx.depth.comparison = GPU_DEPTHCMP_EQUAL; } - CompilePolyHeader(header, &cxt); + CompilePolyHeader(header, &ctx); /* Force bits 18 and 19 on to switch to 6 triangle strips */ header->cmd |= 0xC0000; @@ -1110,13 +1210,16 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL return; } + GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty(); + + // We don't handle this any further, so just make sure we never pass it down */ gl_assert(mode != GL_POLYGON); target->output = _glActivePolyList(); target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; target->header_offset = target->output->vector.size; - target->start_offset = target->header_offset + 1; + target->start_offset = target->header_offset + (header_required); gl_assert(target->count); @@ -1124,9 +1227,12 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL aligned_vector_resize(extras, target->count); /* Make room for the vertices and header */ - aligned_vector_extend(&target->output->vector, target->count + 1); + aligned_vector_extend(&target->output->vector, target->count + (header_required)); - apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); + if(header_required) { + apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); + _glGPUStateMarkClean(); + } /* If we're lighting, then we need to do some work in * eye-space, so we only transform vertices by the modelview @@ -1158,48 +1264,48 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL transform(target); } - /* - Now, if multitexturing is enabled, we want to send exactly the same vertices again, except: - - We want to enable blending, and send them to the TR list - - We want to set the depth func to GL_EQUAL - - We want to set the second texture ID - - We want to set the uv coordinates to the passed st ones - */ + // /* + // Now, if multitexturing is enabled, we want to send exactly the same vertices again, except: + // - We want to enable blending, and send them to the TR list + // - We want to set the depth func to GL_EQUAL + // - We want to set the second texture ID + // - We want to set the uv coordinates to the passed st ones + // */ - if(!TEXTURES_ENABLED[1]) { - /* Multitexture actively disabled */ - return; - } + // if(!TEXTURES_ENABLED[1]) { + // /* Multitexture actively disabled */ + // return; + // } - TextureObject* texture1 = _glGetTexture1(); + // TextureObject* texture1 = _glGetTexture1(); - /* Multitexture implicitly disabled */ - if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { - /* Multitexture actively disabled */ - return; - } + // /* Multitexture implicitly disabled */ + // if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { + // /* Multitexture actively disabled */ + // return; + // } - /* Push back a copy of the list to the transparent poly list, including the header - (hence the + 1) - */ - Vertex* vertex = aligned_vector_push_back( - &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1 - ); + // /* Push back a copy of the list to the transparent poly list, including the header + // (hence the + 1) + // */ + // Vertex* vertex = aligned_vector_push_back( + // &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1 + // ); - gl_assert(vertex); + // gl_assert(vertex); - PolyHeader* mtHeader = (PolyHeader*) vertex++; - /* Send the buffer again to the transparent list */ - apply_poly_header(mtHeader, GL_TRUE, _glTransparentPolyList(), 1); + // PolyHeader* mtHeader = (PolyHeader*) vertex++; + // /* Send the buffer again to the transparent list */ + // apply_poly_header(mtHeader, GL_TRUE, _glTransparentPolyList(), 1); - /* Replace the UV coordinates with the ST ones */ - VertexExtra* ve = aligned_vector_at(target->extras, 0); - ITERATE(target->count) { - vertex->uv[0] = ve->st[0]; - vertex->uv[1] = ve->st[1]; - ++vertex; - ++ve; - } + // /* Replace the UV coordinates with the ST ones */ + // VertexExtra* ve = aligned_vector_at(target->extras, 0); + // ITERATE(target->count) { + // vertex->uv[0] = ve->st[0]; + // vertex->uv[1] = ve->st[1]; + // ++vertex; + // ++ve; + // } } void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { diff --git a/GL/private.h b/GL/private.h index f728d05..200ffde 100644 --- a/GL/private.h +++ b/GL/private.h @@ -306,7 +306,6 @@ Matrix4x4* _glGetModelViewMatrix(); void _glWipeTextureOnFramebuffers(GLuint texture); -PolyContext* _glGetPVRContext(); GLubyte _glInitTextures(); void _glUpdatePVRTextureContext(PolyContext* context, GLshort textureUnit); @@ -356,6 +355,16 @@ void _glApplyColorTable(TexturePalette *palette); GLboolean _glIsBlendingEnabled(); GLboolean _glIsAlphaTestEnabled(); +GLboolean _glIsCullingEnabled(); +GLboolean _glIsDepthTestEnabled(); +GLboolean _glIsDepthWriteEnabled(); +GLboolean _glIsScissorTestEnabled(); +GLboolean _glIsFogEnabled(); +GLenum _glGetDepthFunc(); +GLenum _glGetCullFace(); +GLenum _glGetFrontFace(); +GLenum _glGetBlendSourceFactor(); +GLenum _glGetBlendDestFactor(); extern PolyList OP_LIST; extern PolyList PT_LIST; @@ -514,6 +523,9 @@ GLfloat* _glLightModelSceneAmbient(); LightSource* _glLightAt(GLuint i); GLboolean _glNearZClippingEnabled(); +GLboolean _glGPUStateIsDirty(); +void _glGPUStateMarkClean(); + #define MAX_GLDC_TEXTURE_UNITS 2 #define MAX_GLDC_LIGHTS 8 diff --git a/GL/state.c b/GL/state.c index 393aaec..38c60ee 100644 --- a/GL/state.c +++ b/GL/state.c @@ -4,12 +4,6 @@ #include "private.h" -static PolyContext GL_CONTEXT; - -PolyContext *_glGetPVRContext() { - return &GL_CONTEXT; -} - static struct { GLboolean is_dirty; @@ -28,7 +22,10 @@ static struct { GLboolean shared_palette_enabled; GLboolean alpha_test_enabled; GLboolean polygon_offset_enabled; - GLboolean normalize_enabled;; + GLboolean normalize_enabled; + GLboolean scissor_test_enabled; + GLboolean fog_enabled; + GLboolean depth_mask_enabled; struct { GLint x; @@ -53,36 +50,50 @@ static struct { LightSource lights[MAX_GLDC_LIGHTS]; GLuint enabled_light_count; Material material; + + GLenum shade_model; } GPUState = { - GL_TRUE, - GL_LESS, - GL_FALSE, - GL_BACK, - GL_CCW, - GL_FALSE, - GL_FALSE, - GL_TRUE, - GL_FALSE, - GL_FALSE, - GL_FALSE, - GL_FALSE, - GL_FALSE, - {0, 0, 640, 480, false}, - GL_ONE, - GL_ZERO, - GL_FALSE, - 0.0f, - 0.0f, - {0.2f, 0.2f, 0.2f, 1.0f}, - GL_TRUE, - GL_SINGLE_COLOR, - GL_AMBIENT_AND_DIFFUSE, - AMBIENT_MASK | DIFFUSE_MASK, - {0}, - 0, - {0} + .is_dirty = GL_TRUE, + .depth_func = GL_LESS, + .depth_test_enabled = GL_FALSE, + .cull_face = GL_BACK, + .front_face = GL_CCW, + .culling_enabled = GL_FALSE, + .color_material_enabled = GL_FALSE, + .znear_clipping_enabled = GL_TRUE, + .lighting_enabled = GL_FALSE, + .shared_palette_enabled = GL_FALSE, + .alpha_test_enabled = GL_FALSE, + .polygon_offset_enabled = GL_FALSE, + .normalize_enabled = GL_FALSE, + .scissor_test_enabled = GL_FALSE, + .fog_enabled = GL_FALSE, + .depth_mask_enabled = GL_FALSE, + .scissor_rect = {0, 0, 640, 480, false}, + .blend_sfactor = GL_ONE, + .blend_dfactor = GL_ZERO, + .blend_enabled = GL_FALSE, + .offset_factor = 0.0f, + .offset_units = 0.0f, + .scene_ambient = {0.2f, 0.2f, 0.2f, 1.0f}, + .viewer_in_eye_coords = GL_TRUE, + .color_control = GL_SINGLE_COLOR, + .color_material_mode = GL_AMBIENT_AND_DIFFUSE, + .color_material_mask = AMBIENT_MASK | DIFFUSE_MASK, + .lights = {0}, + .enabled_light_count = 0, + .material = {0}, + .shade_model = GL_SMOOTH }; +void _glGPUStateMarkClean() { + GPUState.is_dirty = GL_FALSE; +} + +GLboolean _glGPUStateIsDirty() { + return GPUState.is_dirty; +} + Material* _glActiveMaterial() { return &GPUState.material; } @@ -96,6 +107,22 @@ void _glEnableLight(GLubyte light, GLboolean value) { GPUState.lights[light].isEnabled = value; } +GLboolean _glIsDepthTestEnabled() { + return GPUState.depth_test_enabled; +} + +GLenum _glGetDepthFunc() { + return GPUState.depth_func; +} + +GLboolean _glIsDepthWriteEnabled() { + return GPUState.depth_mask_enabled; +} + +GLenum _glGetShadeModel() { + return GPUState.shade_model; +} + GLuint _glEnabledLightCount() { return GPUState.enabled_light_count; } @@ -112,6 +139,26 @@ GLboolean _glIsAlphaTestEnabled() { return GPUState.alpha_test_enabled; } +GLboolean _glIsCullingEnabled() { + return GPUState.culling_enabled; +} + +GLenum _glGetCullFace() { + return GPUState.cull_face; +} + +GLenum _glGetFrontFace() { + return GPUState.front_face; +} + +GLboolean _glIsFogEnabled() { + return GPUState.fog_enabled; +} + +GLboolean _glIsScissorTestEnabled() { + return GPUState.scissor_test_enabled; +} + void _glRecalcEnabledLights() { GLubyte i; @@ -161,86 +208,19 @@ GLboolean _glNearZClippingEnabled() { void _glApplyScissor(bool force); -static int _calc_pvr_face_culling() { - if(!GPUState.culling_enabled) { - return GPU_CULLING_SMALL; - } else { - if(GPUState.cull_face == GL_BACK) { - return (GPUState.front_face == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW; - } else { - return (GPUState.front_face == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW; - } - } -} - - -static int _calc_pvr_depth_test() { - if(!GPUState.depth_test_enabled) { - return GPU_DEPTHCMP_ALWAYS; - } - - switch(GPUState.depth_func) { - case GL_NEVER: - return GPU_DEPTHCMP_NEVER; - case GL_LESS: - return GPU_DEPTHCMP_GREATER; - case GL_EQUAL: - return GPU_DEPTHCMP_EQUAL; - case GL_LEQUAL: - return GPU_DEPTHCMP_GEQUAL; - case GL_GREATER: - return GPU_DEPTHCMP_LESS; - case GL_NOTEQUAL: - return GPU_DEPTHCMP_NOTEQUAL; - case GL_GEQUAL: - return GPU_DEPTHCMP_LEQUAL; - break; - case GL_ALWAYS: - default: - return GPU_DEPTHCMP_ALWAYS; - } -} - - GLboolean _glIsNormalizeEnabled() { return GPUState.normalize_enabled; } -static int _calcPVRBlendFactor(GLenum factor) { - switch(factor) { - case GL_ZERO: - return GPU_BLEND_ZERO; - case GL_SRC_ALPHA: - return GPU_BLEND_SRCALPHA; - case GL_DST_COLOR: - return GPU_BLEND_DESTCOLOR; - case GL_DST_ALPHA: - return GPU_BLEND_DESTALPHA; - case GL_ONE_MINUS_DST_COLOR: - return GPU_BLEND_INVDESTCOLOR; - case GL_ONE_MINUS_SRC_ALPHA: - return GPU_BLEND_INVSRCALPHA; - case GL_ONE_MINUS_DST_ALPHA: - return GPU_BLEND_INVDESTALPHA; - case GL_ONE: - return GPU_BLEND_ONE; - default: - fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor); - return GPU_BLEND_ONE; - } +GLenum _glGetBlendSourceFactor() { + return GPUState.blend_sfactor; } -static void _updatePVRBlend(PolyContext* context) { - if(GPUState.blend_enabled || GPUState.alpha_test_enabled) { - context->gen.alpha = GPU_ALPHA_ENABLE; - } else { - context->gen.alpha = GPU_ALPHA_DISABLE; - } - - context->blend.src = _calcPVRBlendFactor(GPUState.blend_sfactor); - context->blend.dst = _calcPVRBlendFactor(GPUState.blend_dfactor); +GLenum _glGetBlendDestFactor() { + return GPUState.blend_dfactor; } + GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func) { GLubyte found = 0; while(*values != 0) { @@ -378,13 +358,6 @@ GLboolean _glIsColorMaterialEnabled() { static GLfloat CLEAR_COLOUR[3]; void _glInitContext() { - memset(&GL_CONTEXT, 0, sizeof(PolyContext)); - - GL_CONTEXT.list_type = GPU_LIST_OP_POLY; - GL_CONTEXT.fmt.color = GPU_CLRFMT_ARGBPACKED; - GL_CONTEXT.fmt.uv = GPU_UVFMT_32BIT; - GL_CONTEXT.gen.color_clamp = GPU_CLRCLAMP_DISABLE; - const VideoMode* mode = GetVideoMode(); GPUState.scissor_rect.x = 0; @@ -426,25 +399,21 @@ GLAPI void APIENTRY glEnable(GLenum cap) { break; case GL_CULL_FACE: { GPUState.cull_face = GL_TRUE; - GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } break; case GL_DEPTH_TEST: { GPUState.depth_test_enabled = GL_TRUE; - GL_CONTEXT.depth.comparison = _calc_pvr_depth_test(); } break; case GL_BLEND: { GPUState.blend_enabled = GL_TRUE; - _updatePVRBlend(&GL_CONTEXT); } break; case GL_SCISSOR_TEST: { - GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_INSIDE; - _glApplyScissor(false); + GPUState.scissor_test_enabled = GL_TRUE; } break; case GL_LIGHTING: { GPUState.lighting_enabled = GL_TRUE; } break; case GL_FOG: - GL_CONTEXT.gen.fog_type = GPU_FOG_TABLE; + GPUState.fog_enabled = GL_TRUE; break; case GL_COLOR_MATERIAL: GPUState.color_material_enabled = GL_TRUE; @@ -455,7 +424,6 @@ GLAPI void APIENTRY glEnable(GLenum cap) { break; case GL_ALPHA_TEST: { GPUState.alpha_test_enabled = GL_TRUE; - _updatePVRBlend(&GL_CONTEXT); } break; case GL_LIGHT0: case GL_LIGHT1: @@ -495,24 +463,21 @@ GLAPI void APIENTRY glDisable(GLenum cap) { } break; case GL_CULL_FACE: { GPUState.culling_enabled = GL_FALSE; - GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } break; case GL_DEPTH_TEST: { GPUState.depth_test_enabled = GL_FALSE; - GL_CONTEXT.depth.comparison = _calc_pvr_depth_test(); } break; case GL_BLEND: GPUState.blend_enabled = GL_FALSE; - _updatePVRBlend(&GL_CONTEXT); break; case GL_SCISSOR_TEST: { - GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_DISABLE; + GPUState.scissor_test_enabled = GL_FALSE; } break; case GL_LIGHTING: { GPUState.lighting_enabled = GL_FALSE; } break; case GL_FOG: - GL_CONTEXT.gen.fog_type = GPU_FOG_DISABLE; + GPUState.fog_enabled = GL_FALSE; break; case GL_COLOR_MATERIAL: GPUState.color_material_enabled = GL_FALSE; @@ -593,13 +558,13 @@ GLAPI void APIENTRY glReadBuffer(GLenum mode) { } GLAPI void APIENTRY glDepthMask(GLboolean flag) { - GL_CONTEXT.depth.write = (flag == GL_TRUE) ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE; + GPUState.depth_mask_enabled = flag; + GPUState.is_dirty = GL_TRUE; } GLAPI void APIENTRY glDepthFunc(GLenum func) { GPUState.depth_func = func; GPUState.is_dirty = GL_TRUE; - GL_CONTEXT.depth.comparison = _calc_pvr_depth_test(); } /* Hints */ @@ -621,22 +586,17 @@ GLAPI void APIENTRY glPolygonMode(GLenum face, GLenum mode) { GLAPI void APIENTRY glFrontFace(GLenum mode) { GPUState.front_face = mode; GPUState.is_dirty = GL_TRUE; - GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); } GLAPI void APIENTRY glCullFace(GLenum mode) { GPUState.cull_face = mode; GPUState.is_dirty = GL_TRUE; - GL_CONTEXT.gen.culling = _calc_pvr_face_culling(); -} - -GLenum _glGetShadeModel() { - return (GL_CONTEXT.gen.shading == GPU_SHADE_FLAT) ? GL_FLAT : GL_SMOOTH; } /* Shading - Flat or Goraud */ GLAPI void APIENTRY glShadeModel(GLenum mode) { - GL_CONTEXT.gen.shading = (mode == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT; + GPUState.shade_model = mode; + GPUState.is_dirty = GL_TRUE; } /* Blending */ @@ -644,7 +604,6 @@ GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor) { GPUState.blend_sfactor = sfactor; GPUState.blend_dfactor = dfactor; GPUState.is_dirty = GL_TRUE; - _updatePVRBlend(&GL_CONTEXT); } @@ -741,7 +700,7 @@ void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height) { */ void _glApplyScissor(bool force) { /* Don't do anyting if clipping is disabled */ - if(GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_DISABLE) { + if(!GPUState.scissor_test_enabled) { return; } @@ -800,7 +759,7 @@ GLboolean APIENTRY glIsEnabled(GLenum cap) { case GL_DEPTH_TEST: return GPUState.depth_test_enabled; case GL_SCISSOR_TEST: - return GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_INSIDE; + return GPUState.scissor_test_enabled; case GL_CULL_FACE: return GPUState.culling_enabled; case GL_LIGHTING: From e5c6fefcd92cb6e877e8501c4b43cbf75460409d Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 16 Mar 2023 21:24:12 +0000 Subject: [PATCH 17/20] Optimisations --- GL/platforms/sh4.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index da16cce..dede241 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -47,12 +47,10 @@ void SceneListBegin(GPUList list) { pvr_list_begin(list); } -__attribute__((optimize("O3", "fast-math"))) GL_FORCE_INLINE float _glFastInvert(float x) { return (1.f / __builtin_sqrtf(x * x)); } -__attribute__((optimize("O3", "fast-math"))) GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { const float f = _glFastInvert(vertex->w); @@ -107,17 +105,16 @@ static int tri_count = 0; static int strip_count = 0; GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) { - const int MASK1 = 0x00FF00FF; - const int MASK2 = 0xFF00FF00; + const static int MASK1 = 0x00FF00FF; + const static int MASK2 = 0xFF00FF00; - const int f2 = 256 * t; + const uint32_t* a = (uint32_t*) v1; + const uint32_t* b = (uint32_t*) v2; + const int f2 = 256.0f * t; const int f1 = 256 - f2; - const uint32_t a = *(uint32_t*) v1; - const uint32_t b = *(uint32_t*) v2; - - *((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) | - (((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2); + *((uint32_t*) out) = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) | + (((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2); } GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { @@ -135,13 +132,13 @@ GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vou t = (t > 1.0f) ? 1.0f : t; t = (t < 0.0f) ? 0.0f : t; - vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); - vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); - vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); - vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w); + vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); + vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); + vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); + vout->w = fmaf(v2->w - v1->w, t, v1->w); - vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); - vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); + vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); + vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); interpolateColour(v1->bgra, v2->bgra, t, vout->bgra); } From 300f2a611e0108bf6ed2c73e39ff594ce84d5c02 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 17 Mar 2023 20:40:45 +0000 Subject: [PATCH 18/20] Optimise state management --- GL/private.h | 1 + GL/state.c | 210 ++++++++++++++++++++++++++++++++++++--------------- GL/texture.c | 8 ++ 3 files changed, 160 insertions(+), 59 deletions(-) diff --git a/GL/private.h b/GL/private.h index 200ffde..7db1b13 100644 --- a/GL/private.h +++ b/GL/private.h @@ -525,6 +525,7 @@ GLboolean _glNearZClippingEnabled(); GLboolean _glGPUStateIsDirty(); void _glGPUStateMarkClean(); +void _glGPUStateMarkDirty(); #define MAX_GLDC_TEXTURE_UNITS 2 #define MAX_GLDC_LIGHTS 8 diff --git a/GL/state.c b/GL/state.c index 38c60ee..ced1cdc 100644 --- a/GL/state.c +++ b/GL/state.c @@ -90,6 +90,10 @@ void _glGPUStateMarkClean() { GPUState.is_dirty = GL_FALSE; } +void _glGPUStateMarkDirty() { + GPUState.is_dirty = GL_TRUE; +} + GLboolean _glGPUStateIsDirty() { return GPUState.is_dirty; } @@ -160,10 +164,8 @@ GLboolean _glIsScissorTestEnabled() { } void _glRecalcEnabledLights() { - GLubyte i; - GPUState.enabled_light_count = 0; - for(int i = 0; i < MAX_GLDC_LIGHTS; ++i) { + for(GLubyte i = 0; i < MAX_GLDC_LIGHTS; ++i) { if(_glLightAt(i)->isEnabled) { GPUState.enabled_light_count++; } @@ -389,41 +391,68 @@ void _glInitContext() { } GLAPI void APIENTRY glEnable(GLenum cap) { - GLboolean was_dirty = GPUState.is_dirty; - - GPUState.is_dirty = GL_TRUE; - switch(cap) { case GL_TEXTURE_2D: - TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE; + if(TEXTURES_ENABLED[_glGetActiveTexture()] != GL_TRUE) { + TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_CULL_FACE: { - GPUState.cull_face = GL_TRUE; + if(GPUState.cull_face != GL_TRUE) { + GPUState.cull_face = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } + } break; case GL_DEPTH_TEST: { - GPUState.depth_test_enabled = GL_TRUE; + if(GPUState.depth_test_enabled != GL_TRUE) { + GPUState.depth_test_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_BLEND: { - GPUState.blend_enabled = GL_TRUE; + if(GPUState.blend_enabled != GL_TRUE) { + GPUState.blend_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_SCISSOR_TEST: { - GPUState.scissor_test_enabled = GL_TRUE; + if(GPUState.scissor_test_enabled != GL_TRUE) { + GPUState.scissor_test_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_LIGHTING: { - GPUState.lighting_enabled = GL_TRUE; + if(GPUState.lighting_enabled != GL_TRUE) { + GPUState.lighting_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_FOG: - GPUState.fog_enabled = GL_TRUE; + if(GPUState.fog_enabled != GL_TRUE) { + GPUState.fog_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_COLOR_MATERIAL: - GPUState.color_material_enabled = GL_TRUE; + if(GPUState.color_material_enabled != GL_TRUE) { + GPUState.color_material_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_SHARED_TEXTURE_PALETTE_EXT: { - GPUState.shared_palette_enabled = GL_TRUE; + if(GPUState.shared_palette_enabled != GL_TRUE) { + GPUState.shared_palette_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_ALPHA_TEST: { - GPUState.alpha_test_enabled = GL_TRUE; + if(GPUState.alpha_test_enabled != GL_TRUE) { + GPUState.alpha_test_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_LIGHT0: case GL_LIGHT1: @@ -432,62 +461,102 @@ GLAPI void APIENTRY glEnable(GLenum cap) { case GL_LIGHT4: case GL_LIGHT5: case GL_LIGHT6: - case GL_LIGHT7: - _glLightAt(cap & 0xF)->isEnabled = GL_TRUE; - _glRecalcEnabledLights(); + case GL_LIGHT7: { + LightSource* ptr = _glLightAt(cap & 0xF); + if(ptr->isEnabled != GL_TRUE) { + ptr->isEnabled = GL_TRUE; + _glRecalcEnabledLights(); + } + } break; case GL_NEARZ_CLIPPING_KOS: - GPUState.znear_clipping_enabled = GL_TRUE; + if(GPUState.znear_clipping_enabled != GL_TRUE) { + GPUState.znear_clipping_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: - GPUState.polygon_offset_enabled = GL_TRUE; + if(GPUState.polygon_offset_enabled != GL_TRUE) { + GPUState.polygon_offset_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_NORMALIZE: - GPUState.normalize_enabled = GL_TRUE; + if(GPUState.normalize_enabled != GL_TRUE) { + GPUState.normalize_enabled = GL_TRUE; + GPUState.is_dirty = GL_TRUE; + } break; default: - GPUState.is_dirty = was_dirty; break; } } GLAPI void APIENTRY glDisable(GLenum cap) { - GLboolean was_dirty = GPUState.is_dirty; - GPUState.is_dirty = GL_TRUE; - switch(cap) { - case GL_TEXTURE_2D: { - TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE; - } break; + case GL_TEXTURE_2D: + if(TEXTURES_ENABLED[_glGetActiveTexture()] != GL_FALSE) { + TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } + break; case GL_CULL_FACE: { - GPUState.culling_enabled = GL_FALSE; + if(GPUState.cull_face != GL_FALSE) { + GPUState.cull_face = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } + } break; case GL_DEPTH_TEST: { - GPUState.depth_test_enabled = GL_FALSE; + if(GPUState.depth_test_enabled != GL_FALSE) { + GPUState.depth_test_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } + } break; + case GL_BLEND: { + if(GPUState.blend_enabled != GL_FALSE) { + GPUState.blend_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } } break; - case GL_BLEND: - GPUState.blend_enabled = GL_FALSE; - break; case GL_SCISSOR_TEST: { - GPUState.scissor_test_enabled = GL_FALSE; + if(GPUState.scissor_test_enabled != GL_FALSE) { + GPUState.scissor_test_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_LIGHTING: { - GPUState.lighting_enabled = GL_FALSE; + if(GPUState.lighting_enabled != GL_FALSE) { + GPUState.lighting_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_FOG: - GPUState.fog_enabled = GL_FALSE; + if(GPUState.fog_enabled != GL_FALSE) { + GPUState.fog_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_COLOR_MATERIAL: - GPUState.color_material_enabled = GL_FALSE; + if(GPUState.color_material_enabled != GL_FALSE) { + GPUState.color_material_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_SHARED_TEXTURE_PALETTE_EXT: { - GPUState.shared_palette_enabled = GL_FALSE; + if(GPUState.shared_palette_enabled != GL_FALSE) { + GPUState.shared_palette_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_ALPHA_TEST: { - GPUState.alpha_test_enabled = GL_FALSE; + if(GPUState.alpha_test_enabled != GL_FALSE) { + GPUState.alpha_test_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } } break; case GL_LIGHT0: case GL_LIGHT1: @@ -497,21 +566,32 @@ GLAPI void APIENTRY glDisable(GLenum cap) { case GL_LIGHT5: case GL_LIGHT6: case GL_LIGHT7: - _glEnableLight(cap & 0xF, GL_FALSE); + if(GPUState.lights[cap & 0xF].isEnabled) { + _glEnableLight(cap & 0xF, GL_FALSE); + GPUState.is_dirty = GL_TRUE; + } break; case GL_NEARZ_CLIPPING_KOS: - GPUState.znear_clipping_enabled = GL_FALSE; + if(GPUState.znear_clipping_enabled != GL_FALSE) { + GPUState.znear_clipping_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: - GPUState.polygon_offset_enabled = GL_FALSE; + if(GPUState.polygon_offset_enabled != GL_FALSE) { + GPUState.polygon_offset_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } break; case GL_NORMALIZE: - GPUState.normalize_enabled = GL_FALSE; + if(GPUState.normalize_enabled != GL_FALSE) { + GPUState.normalize_enabled = GL_FALSE; + GPUState.is_dirty = GL_TRUE; + } break; default: - GPUState.is_dirty = was_dirty; break; } } @@ -558,13 +638,17 @@ GLAPI void APIENTRY glReadBuffer(GLenum mode) { } GLAPI void APIENTRY glDepthMask(GLboolean flag) { - GPUState.depth_mask_enabled = flag; - GPUState.is_dirty = GL_TRUE; + if(GPUState.depth_mask_enabled != flag) { + GPUState.depth_mask_enabled = flag; + GPUState.is_dirty = GL_TRUE; + } } GLAPI void APIENTRY glDepthFunc(GLenum func) { - GPUState.depth_func = func; - GPUState.is_dirty = GL_TRUE; + if(GPUState.depth_func != func) { + GPUState.depth_func = func; + GPUState.is_dirty = GL_TRUE; + } } /* Hints */ @@ -584,26 +668,34 @@ GLAPI void APIENTRY glPolygonMode(GLenum face, GLenum mode) { /* Culling */ GLAPI void APIENTRY glFrontFace(GLenum mode) { - GPUState.front_face = mode; - GPUState.is_dirty = GL_TRUE; + if(GPUState.front_face != mode) { + GPUState.front_face = mode; + GPUState.is_dirty = GL_TRUE; + } } GLAPI void APIENTRY glCullFace(GLenum mode) { - GPUState.cull_face = mode; - GPUState.is_dirty = GL_TRUE; + if(GPUState.cull_face != mode) { + GPUState.cull_face = mode; + GPUState.is_dirty = GL_TRUE; + } } /* Shading - Flat or Goraud */ GLAPI void APIENTRY glShadeModel(GLenum mode) { - GPUState.shade_model = mode; - GPUState.is_dirty = GL_TRUE; + if(GPUState.shade_model != mode) { + GPUState.shade_model = mode; + GPUState.is_dirty = GL_TRUE; + } } /* Blending */ GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor) { - GPUState.blend_sfactor = sfactor; - GPUState.blend_dfactor = dfactor; - GPUState.is_dirty = GL_TRUE; + if(GPUState.blend_dfactor != dfactor || GPUState.blend_sfactor != sfactor) { + GPUState.blend_sfactor = sfactor; + GPUState.blend_dfactor = dfactor; + GPUState.is_dirty = GL_TRUE; + } } diff --git a/GL/texture.c b/GL/texture.c index ce2414e..016be45 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -607,6 +607,8 @@ void APIENTRY glBindTexture(GLenum target, GLuint texture) { } else { TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; } + + _glGPUStateMarkDirty(); } void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) { @@ -668,6 +670,8 @@ void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) { default: break; } + + _glGPUStateMarkDirty(); } void APIENTRY glTexEnvf(GLenum target, GLenum pname, GLfloat param) { @@ -1626,6 +1630,8 @@ void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) { break; } } + + _glGPUStateMarkDirty(); } void APIENTRY glTexParameterf(GLenum target, GLenum pname, GLfloat param) { @@ -1783,6 +1789,8 @@ GLAPI void APIENTRY glColorTableEXT(GLenum target, GLenum internalFormat, GLsize } _glApplyColorTable(palette); + + _glGPUStateMarkDirty(); } GLAPI void APIENTRY glColorSubTableEXT(GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data) { From 0c5f941098bfb35b0c070803a99a7e3215e131af Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 17 Mar 2023 20:40:55 +0000 Subject: [PATCH 19/20] Optimise nearz clipping --- GL/platforms/sh4.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index dede241..05a8d6f 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -104,33 +104,26 @@ static struct __attribute__((aligned(32))) { static int tri_count = 0; static int strip_count = 0; -GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) { - const static int MASK1 = 0x00FF00FF; - const static int MASK2 = 0xFF00FF00; +GL_FORCE_INLINE void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) { + const static uint32_t MASK1 = 0x00FF00FF; + const static uint32_t MASK2 = 0xFF00FF00; - const uint32_t* a = (uint32_t*) v1; - const uint32_t* b = (uint32_t*) v2; - const int f2 = 256.0f * t; - const int f1 = 256 - f2; + const uint32_t f2 = 256 * t; + const uint32_t f1 = 256 - f2; - *((uint32_t*) out) = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) | + *out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) | (((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2); } -GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { - static const float E [] = { - 0.00001f, -0.00001f - }; - +static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { /* Clipping time! */ const float d0 = v1->w + v1->xyz[2]; const float d1 = v2->w + v2->xyz[2]; - const float epsilon = E[d0 < d1]; - - float t = MATH_Fast_Divide(d0, (d0 - d1)) + epsilon; - - t = (t > 1.0f) ? 1.0f : t; - t = (t < 0.0f) ? 0.0f : t; + const float sign = ((2.0f * (d1 < d0)) - 1.0f); + const float epsilon = -0.00001f * sign; + const float n = (d0 - d1); + const float r = (1.f / sqrtf(n * n)) * sign; + float t = fmaf(r, d0, epsilon); vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); @@ -140,7 +133,7 @@ GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vou vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); - interpolateColour(v1->bgra, v2->bgra, t, vout->bgra); + interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra); } GL_FORCE_INLINE void ClearTriangle() { From 6ee9a823c170eb32d1253a8ea9ad446998e24338 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 23 Mar 2023 20:01:41 +0000 Subject: [PATCH 20/20] Don't update lights unnecessarily --- GL/lighting.c | 82 ++++++++++++++++++++++++++++++++++++++------------- GL/private.h | 1 + GL/state.c | 4 +++ 3 files changed, 66 insertions(+), 21 deletions(-) diff --git a/GL/lighting.c b/GL/lighting.c index c63defd..b675a92 100644 --- a/GL/lighting.c +++ b/GL/lighting.c @@ -124,8 +124,10 @@ void APIENTRY glLightModeli(GLenum pname, const GLint param) { void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) { switch(pname) { case GL_LIGHT_MODEL_AMBIENT: { - _glSetLightModelSceneAmbient(params); - _glPrecalcLightingValues(SCENE_AMBIENT_MASK); + if(memcmp(_glGetLightModelSceneAmbient(), params, sizeof(float) * 4) != 0) { + _glSetLightModelSceneAmbient(params); + _glPrecalcLightingValues(SCENE_AMBIENT_MASK); + } } break; case GL_LIGHT_MODEL_LOCAL_VIEWER: _glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE); @@ -164,18 +166,28 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) { LightSource* l = _glLightAt(idx); + GLboolean rebuild = GL_FALSE; + switch(pname) { case GL_AMBIENT: - memcpy(l->ambient, params, sizeof(GLfloat) * 4); + rebuild = memcmp(l->ambient, params, sizeof(GLfloat) * 4) != 0; + if(rebuild) { + memcpy(l->ambient, params, sizeof(GLfloat) * 4); + } break; case GL_DIFFUSE: - memcpy(l->diffuse, params, sizeof(GLfloat) * 4); + rebuild = memcmp(l->diffuse, params, sizeof(GLfloat) * 4) != 0; + if(rebuild) { + memcpy(l->diffuse, params, sizeof(GLfloat) * 4); + } break; case GL_SPECULAR: - memcpy(l->specular, params, sizeof(GLfloat) * 4); + rebuild = memcmp(l->specular, params, sizeof(GLfloat) * 4) != 0; + if(rebuild) { + memcpy(l->specular, params, sizeof(GLfloat) * 4); + } break; case GL_POSITION: { - _glMatrixLoadModelView(); memcpy(l->position, params, sizeof(GLfloat) * 4); l->isDirectional = params[3] == 0.0f; @@ -183,6 +195,7 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) { if(l->isDirectional) { //FIXME: Do we need to rotate directional lights? } else { + _glMatrixLoadModelView(); TransformVec3(l->position); } } @@ -204,7 +217,10 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) { return; } - _glPrecalcLightingValues(mask); + if(rebuild) { + _glPrecalcLightingValues(mask); + } + } void APIENTRY glLightf(GLenum light, GLenum pname, GLfloat param) { @@ -258,25 +274,47 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) { Material* material = _glActiveMaterial(); + GLboolean rebuild = GL_FALSE; + switch(pname) { case GL_SHININESS: glMaterialf(face, pname, *params); + rebuild = GL_TRUE; break; - case GL_AMBIENT: - vec4cpy(material->ambient, params); - break; + case GL_AMBIENT: { + if(memcmp(material->ambient, params, sizeof(float) * 4) != 0) { + vec4cpy(material->ambient, params); + rebuild = GL_TRUE; + } + } break; case GL_DIFFUSE: - vec4cpy(material->diffuse, params); + if(memcmp(material->diffuse, params, sizeof(float) * 4) != 0) { + vec4cpy(material->diffuse, params); + rebuild = GL_TRUE; + } break; case GL_SPECULAR: - vec4cpy(material->specular, params); + if(memcmp(material->specular, params, sizeof(float) * 4) != 0) { + vec4cpy(material->specular, params); + rebuild = GL_TRUE; + } break; case GL_EMISSION: - vec4cpy(material->emissive, params); + if(memcmp(material->emissive, params, sizeof(float) * 4) != 0) { + vec4cpy(material->emissive, params); + rebuild = GL_TRUE; + } break; case GL_AMBIENT_AND_DIFFUSE: { - vec4cpy(material->ambient, params); - vec4cpy(material->diffuse, params); + rebuild = ( + memcmp(material->ambient, params, sizeof(float) * 4) != 0 || + memcmp(material->diffuse, params, sizeof(float) * 4) != 0 + ); + + if(rebuild) { + vec4cpy(material->ambient, params); + vec4cpy(material->diffuse, params); + } } break; case GL_COLOR_INDEXES: default: { @@ -285,13 +323,15 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) { } } - GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK: - (pname == GL_DIFFUSE) ? DIFFUSE_MASK: - (pname == GL_SPECULAR) ? SPECULAR_MASK: - (pname == GL_EMISSION) ? EMISSION_MASK: - (pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0; + if(rebuild) { + GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK: + (pname == GL_DIFFUSE) ? DIFFUSE_MASK: + (pname == GL_SPECULAR) ? SPECULAR_MASK: + (pname == GL_EMISSION) ? EMISSION_MASK: + (pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0; - _glPrecalcLightingValues(updateMask); + _glPrecalcLightingValues(updateMask); + } } void APIENTRY glColorMaterial(GLenum face, GLenum mode) { diff --git a/GL/private.h b/GL/private.h index 7db1b13..f309571 100644 --- a/GL/private.h +++ b/GL/private.h @@ -520,6 +520,7 @@ void _glSetLightModelColorControl(GLint v); GLuint _glEnabledLightCount(); void _glRecalcEnabledLights(); GLfloat* _glLightModelSceneAmbient(); +GLfloat* _glGetLightModelSceneAmbient(); LightSource* _glLightAt(GLuint i); GLboolean _glNearZClippingEnabled(); diff --git a/GL/state.c b/GL/state.c index ced1cdc..618eefd 100644 --- a/GL/state.c +++ b/GL/state.c @@ -180,6 +180,10 @@ void _glSetLightModelSceneAmbient(const GLfloat* v) { vec4cpy(GPUState.scene_ambient, v); } +GLfloat* _glGetLightModelSceneAmbient() { + return GPUState.scene_ambient; +} + void _glSetLightModelColorControl(GLint v) { GPUState.color_control = v; }