From 49f2f0917bc95a279d5822c14b7b7e530ffc7129 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jul 2024 15:36:19 -0500 Subject: [PATCH 1/6] Nehe20 Build Fix + Warnings cleanup for GCC14.1.0. Newest toolchain is bitchier, so I wanted to go ahead and clear up all of the warnings from building GLdc and the various examples... 1) Set CMake CXX standard to 14 instead of 11, since CXXFLAGS were enforcing that language standard anyway. 2) Fixed a bunch of strict aliasing violations in immediate.c: glVertex3f. 3) Removed or commented out lots of unused variables. 4) Fixed some "suggested inner braces on initializer" crap. 5) Fixed a bunch of signed vs unsigned pointer assignments. 6) Fixed several printf() warnings from using %d with int32_t (needs to be %ld for long int). 7) Fixed build issue with Nehe20 from not including kos.h for the KOS_ROMDISK macro. 8) Fixed some signed vs unsigned comparison mismatches in C++ template instantiations within clipping tests. 9) --- CMakeLists.txt | 2 +- GL/immediate.c | 32 ++++++++++++++-------- GL/state.c | 2 -- samples/blend_test/main.c | 1 - samples/depth_funcs_alpha_testing/gl_png.c | 1 - samples/lerabot01/main.c | 16 +++-------- samples/lights/main.c | 3 +- samples/loadbmp.c | 4 +-- samples/mipmap/main.c | 4 +-- samples/nehe06_4444twid/main.c | 6 ++-- samples/nehe06_vq/main.c | 8 +++--- samples/nehe20/main.c | 7 +++-- samples/paletted/main.c | 3 +- samples/paletted_pcx/main.c | 8 +++--- samples/profiler.c | 2 +- tests/zclip/main.cpp | 14 +++++----- 16 files changed, 54 insertions(+), 59 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f7f87a..048f894 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ string(TOUPPER ${BACKEND} BACKEND_UPPER) add_definitions(-DBACKEND_${BACKEND_UPPER}) set(CMAKE_C_STANDARD 99) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) include_directories(include) diff --git a/GL/immediate.c b/GL/immediate.c index afe80c5..d80e194 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -158,6 +158,14 @@ void APIENTRY glColor3fv(const GLfloat* v) { COLOR[B8IDX] = (GLubyte)(v[2] * 255); } +typedef union punned { + GLubyte* byte; + GLfloat* flt; + uint32_t* u32; + void* vptr; + uintptr_t uptr; +} punned_t; + void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; @@ -170,18 +178,18 @@ void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; - uint32_t* dest = (uint32_t*) &vert->x; - *(dest++) = *((uint32_t*) &x); - *(dest++) = *((uint32_t*) &y); - *(dest++) = *((uint32_t*) &z); - *(dest++) = *((uint32_t*) &UV_COORD[0]); - *(dest++) = *((uint32_t*) &UV_COORD[1]); - *(dest++) = *((uint32_t*) &ST_COORD[0]); - *(dest++) = *((uint32_t*) &ST_COORD[1]); - *(dest++) = *((uint32_t*) COLOR); - *(dest++) = *((uint32_t*) &NORMAL[0]); - *(dest++) = *((uint32_t*) &NORMAL[1]); - *(dest++) = *((uint32_t*) &NORMAL[2]); + punned_t dest = { .flt = &vert->x }; + *(dest.flt++) = x; + *(dest.flt++) = y; + *(dest.flt++) = z; + *(dest.flt++) = UV_COORD[0]; + *(dest.flt++) = UV_COORD[1]; + *(dest.flt++) = ST_COORD[0]; + *(dest.flt++) = ST_COORD[1]; + *(dest.u32++) = *((uint32_t*)(void*) COLOR); + *(dest.flt++) = NORMAL[0]; + *(dest.flt++) = NORMAL[1]; + *(dest.flt++) = NORMAL[2]; } void APIENTRY glVertex3fv(const GLfloat* v) { diff --git a/GL/state.c b/GL/state.c index 52f2656..d8b89f2 100644 --- a/GL/state.c +++ b/GL/state.c @@ -80,9 +80,7 @@ static struct { .color_control = GL_SINGLE_COLOR, .color_material_mode = GL_AMBIENT_AND_DIFFUSE, .color_material_mask = AMBIENT_MASK | DIFFUSE_MASK, - .lights = {0}, .enabled_light_count = 0, - .material = {0}, .shade_model = GL_SMOOTH }; diff --git a/samples/blend_test/main.c b/samples/blend_test/main.c index 6fbb795..1303f7f 100644 --- a/samples/blend_test/main.c +++ b/samples/blend_test/main.c @@ -80,7 +80,6 @@ void DrawGLScene() { const float RED [] = {1.0, 0, 0, 0.5}; const float BLUE [] = {0.0, 0, 1, 0.5}; - const float NONE [] = {0, 0, 0, 0}; glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer diff --git a/samples/depth_funcs_alpha_testing/gl_png.c b/samples/depth_funcs_alpha_testing/gl_png.c index 686035c..5415638 100644 --- a/samples/depth_funcs_alpha_testing/gl_png.c +++ b/samples/depth_funcs_alpha_testing/gl_png.c @@ -48,7 +48,6 @@ int dtex_to_gl_texture(texture *tex, char* filename) { GLboolean twiddled = (header.type & (1 << 26)) < 1; GLboolean compressed = (header.type & (1 << 30)) > 0; GLboolean mipmapped = (header.type & (1 << 31)) > 0; - GLboolean strided = (header.type & (1 << 25)) > 0; GLuint format = (header.type >> 27) & 0b111; image->data = (char *) malloc (header.size); diff --git a/samples/lerabot01/main.c b/samples/lerabot01/main.c index 71ca463..d16c0a5 100644 --- a/samples/lerabot01/main.c +++ b/samples/lerabot01/main.c @@ -26,7 +26,7 @@ KOS_INIT_ROMDISK(romdisk); float xrot, yrot, zrot; /* storage for one texture */ -int texture[1]; +GLuint texture[1]; // Load Bitmaps And Convert To Textures void LoadGLTextures() { @@ -81,7 +81,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG GLfloat l1_pos[] = {5.0, 0.0, 1.0, 1.0}; GLfloat l1_diff[] = {1.0, 0.0, 0.0, 1.0}; - GLfloat l1_amb[] = {0.5, 0.5, 0.5, 1.0}; + //GLfloat l1_amb[] = {0.5, 0.5, 0.5, 1.0}; //glLightfv(GL_LIGHT1, GL_AMBIENT, l1_amb); glLightfv(GL_LIGHT1, GL_DIFFUSE, l1_diff); @@ -93,7 +93,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG GLfloat l2_pos[] = {0.0, 15.0, 1.0, 1.0}; GLfloat l2_dir[] = {0.0, -1.0, 0.0}; GLfloat l2_diff[] = {0.5, 0.5, 0.0, 1.0}; - GLfloat l2_amb[] = {0.5, 0.5, 0.5, 1.0}; + //GLfloat l2_amb[] = {0.5, 0.5, 0.5, 1.0}; glEnable(GL_LIGHT2); glLightfv(GL_LIGHT2, GL_DIFFUSE, l2_diff); @@ -145,7 +145,7 @@ void DrawTexturedQuad(int tex, float x, float y, float z) GLfloat y0 = y - texH / 2; GLfloat x1 = x + texW / 2; GLfloat y1 = y + texH / 2; - GLfloat color[] = {1.0f, 1.0f, 1.0f, 1.0f}; + //GLfloat color[] = {1.0f, 1.0f, 1.0f, 1.0f}; GLfloat mat_ambient[] = {1.0f, 1.0f, 1.0f, 1.0f}; GLfloat vertex_data[] = { @@ -172,14 +172,6 @@ void DrawTexturedQuad(int tex, float x, float y, float z) 0.0, 0.0, 1.0 }; - GLfloat color_data[] = { - /* 2D Coordinate, texture coordinate */ - color[0], color[1], color[2], color[3], - color[0], color[1], color[2], color[3], - color[0], color[1], color[2], color[3], - color[0], color[1], color[2], color[3] - }; - //GLint indices[] = {0,1,2,3,2,3}; glEnable(GL_TEXTURE_2D); diff --git a/samples/lights/main.c b/samples/lights/main.c index afd048d..223bd86 100644 --- a/samples/lights/main.c +++ b/samples/lights/main.c @@ -24,8 +24,7 @@ KOS_INIT_ROMDISK(romdisk); #include "../loadbmp.h" float xrot, yrot, zrot; - -int texture[1]; +GLuint texture[1]; void LoadGLTextures() { diff --git a/samples/loadbmp.c b/samples/loadbmp.c index 65bd571..936ba06 100644 --- a/samples/loadbmp.c +++ b/samples/loadbmp.c @@ -35,7 +35,7 @@ int ImageLoad(char *filename, Image *image) { return 0; } image->sizeX = sizeX; - printf("Width of %s: %d\n", filename, sizeX); + printf("Width of %s: %ld\n", filename, sizeX); // read the height if ((i = fread(&sizeY, 4, 1, file)) != 1) { @@ -43,7 +43,7 @@ int ImageLoad(char *filename, Image *image) { return 0; } image->sizeY = sizeY; - printf("Height of %s: %d\n", filename, sizeY); + printf("Height of %s: %ld\n", filename, sizeY); // calculate the size (assuming 24 bits or 3 bytes per pixel). size = image->sizeX * image->sizeY * 3; diff --git a/samples/mipmap/main.c b/samples/mipmap/main.c index ba65d5f..8dc5fca 100644 --- a/samples/mipmap/main.c +++ b/samples/mipmap/main.c @@ -20,9 +20,7 @@ KOS_INIT_ROMDISK(romdisk); #endif #include "../loadbmp.h" - -/* storage for one texture */ -int texture[1]; +GLuint texture[1]; // Load Bitmaps And Convert To Textures void LoadGLTextures() { diff --git a/samples/nehe06_4444twid/main.c b/samples/nehe06_4444twid/main.c index 3cc58d5..f290c6f 100644 --- a/samples/nehe06_4444twid/main.c +++ b/samples/nehe06_4444twid/main.c @@ -62,7 +62,7 @@ int ImageLoad(char *filename, Image *image) { GLboolean twiddled = (header.type & (1 << 26)) < 1; GLboolean compressed = (header.type & (1 << 30)) > 0; GLboolean mipmapped = (header.type & (1 << 31)) > 0; - GLboolean strided = (header.type & (1 << 25)) > 0; + //GLboolean strided = (header.type & (1 << 25)) > 0; GLuint format = (header.type >> 27) & 0b111; image->data = (char *) malloc (header.size); @@ -70,8 +70,8 @@ int ImageLoad(char *filename, Image *image) { image->sizeY = header.height; image->dataSize = header.size; - GLuint expected = 2 * header.width * header.height; - GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); + //GLuint expected = 2 * header.width * header.height; + //GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); fread(image->data, image->dataSize, 1, file); fclose(file); diff --git a/samples/nehe06_vq/main.c b/samples/nehe06_vq/main.c index cf2156e..754f458 100644 --- a/samples/nehe06_vq/main.c +++ b/samples/nehe06_vq/main.c @@ -22,7 +22,7 @@ KOS_INIT_ROMDISK(romdisk); float xrot, yrot, zrot; /* storage for one texture */ -int texture[1]; +GLuint texture[1]; /* Image type - contains height, width, and data */ struct Image { @@ -59,7 +59,7 @@ int ImageLoad(char *filename, Image *image) { GLboolean twiddled = (header.type & (1 << 26)) < 1; GLboolean compressed = (header.type & (1 << 30)) > 0; GLboolean mipmapped = (header.type & (1 << 31)) > 0; - GLboolean strided = (header.type & (1 << 25)) > 0; + //GLboolean strided = (header.type & (1 << 25)) > 0; GLuint format = (header.type >> 27) & 0b111; image->data = (char *) malloc (header.size); @@ -67,8 +67,8 @@ int ImageLoad(char *filename, Image *image) { image->sizeY = header.height; image->dataSize = header.size; - GLuint expected = 2 * header.width * header.height; - GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); + //GLuint expected = 2 * header.width * header.height; + //GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); fread(image->data, image->dataSize, 1, file); fclose(file); diff --git a/samples/nehe20/main.c b/samples/nehe20/main.c index 8050dbd..54c7825 100644 --- a/samples/nehe20/main.c +++ b/samples/nehe20/main.c @@ -11,6 +11,11 @@ #include #include + +#ifdef __DREAMCAST__ +#include +#endif + #define FPS 60 uint32_t waittime = 1000.0f/FPS; uint32_t framestarttime = 0; @@ -227,8 +232,6 @@ int DrawGLScene(GLvoid) // Here's Where We Do All The Drawing int main(int argc, char *argv[]) { - BOOL done=FALSE; // Bool Variable To Exit Loop - glKosInit(); InitGL(); diff --git a/samples/paletted/main.c b/samples/paletted/main.c index 7b88cf3..50fbdf3 100644 --- a/samples/paletted/main.c +++ b/samples/paletted/main.c @@ -22,8 +22,7 @@ KOS_INIT_ROMDISK(romdisk); /* floats for x rotation, y rotation, z rotation */ float xrot, yrot, zrot; -/* storage for one texture */ -int texture[1]; +GLuint texture[1]; typedef struct { unsigned int height; diff --git a/samples/paletted_pcx/main.c b/samples/paletted_pcx/main.c index e397ab7..a01744a 100644 --- a/samples/paletted_pcx/main.c +++ b/samples/paletted_pcx/main.c @@ -41,7 +41,7 @@ /* floats for x rotation, y rotation, z rotation */ float xrot, yrot, zrot; -int textures[3]; +GLuint textures[3]; typedef struct { uint32_t height; @@ -272,7 +272,7 @@ int BMP_GetPalette(FILE *pFile) bitCount = BmpInfoHeader.ClrImportant * sizeof(RGB_QUAD); if (fread(BmpRgbQuad, 1, bitCount, pFile) != bitCount){ - fprintf(stderr, "Failed to read palette: %d\n", bitCount); + fprintf(stderr, "Failed to read palette: %ld\n", bitCount); return 0; } @@ -293,7 +293,7 @@ int BMP_GetPalette(FILE *pFile) int BMP_Depack(FILE *pFile,char *pZone) { char PadRead[4]; - int32_t i, j, Offset, PadSize, pix, c; + int32_t i, j, Offset, PadSize, c; if (BmpInfoHeader.Compression != BMP_BI_RGB) return 0; @@ -356,7 +356,7 @@ int LoadPalettedBMP(const char* filename, Image* image) } /* store palette information */ - image->palette = BmpPal; + image->palette = (char*)BmpPal; image->palette_width = 16; diff --git a/samples/profiler.c b/samples/profiler.c index c44c3c9..cefc81a 100644 --- a/samples/profiler.c +++ b/samples/profiler.c @@ -287,7 +287,7 @@ static bool write_samples(const char* path) { root = ARCS; for(int i = 0; i < BUCKET_SIZE; ++i) { if(root->pc) { - printf("Incrementing %d for %x. ", (root->pc - lowest_address) / bin_size, (unsigned int) root->pc); + printf("Incrementing %ld for %x. ", (root->pc - lowest_address) / bin_size, (unsigned int) root->pc); bins[(root->pc - lowest_address) / bin_size]++; printf("Now: %d\n", (int) bins[(root->pc - lowest_address) / bin_size]); diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index adada72..42febee 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -435,7 +435,7 @@ bool test_clip_case_001() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 5); + check_equal(sent.size(), 5u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -461,7 +461,7 @@ bool test_clip_case_010() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 4); + check_equal(sent.size(), 4u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -481,7 +481,7 @@ bool test_clip_case_100() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 5); + check_equal(sent.size(), 5u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -507,7 +507,7 @@ bool test_clip_case_110() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 6); + check_equal(sent.size(), 6u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -530,7 +530,7 @@ bool test_clip_case_011() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 6); + check_equal(sent.size(), 6u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -553,7 +553,7 @@ bool test_clip_case_101() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 6); + check_equal(sent.size(), 6u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -576,7 +576,7 @@ bool test_clip_case_111() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 4); + check_equal(sent.size(), 4u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); From b920855b572f84bb6c282b258b3d0532661d6fd7 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jul 2024 23:08:37 -0500 Subject: [PATCH 2/6] Micro optimizations and clean-up. --- GL/platforms/sh4.c | 95 +++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 65 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 13bb99d..d7c19de 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -13,8 +13,6 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -#define SQ_BASE_ADDRESS (void*) 0xe0000000 - GL_FORCE_INLINE bool glIsVertex(const float flags) { return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; @@ -75,30 +73,29 @@ GL_FORCE_INLINE float _glFastInvert(float x) { return (1.0f / __builtin_sqrtf(x * x)); } -GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h, int count) { TRACE(); - const float f = _glFastInvert(vertex->w); + for(int v = 0; v < count; ++v) { + const float f = _glFastInvert(vertex[v].w); - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; - vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240; + /* Convert to NDC and apply viewport */ + vertex[v].xyz[0] = (vertex[v].xyz[0] * f * 320) + 320; + vertex[v].xyz[1] = (vertex[v].xyz[1] * f * -240) + 240; - /* Orthographic projections need to use invZ otherwise we lose - the depth information. As w == 1, and clip-space range is -w to +w - we add 1.0 to the Z to bring it into range. We add a little extra to - avoid a divide by zero. - */ - if(vertex->w == 1.0f) { - vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]); - } else { - vertex->xyz[2] = f; + /* Orthographic projections need to use invZ otherwise we lose + the depth information. As w == 1, and clip-space range is -w to +w + we add 1.0 to the Z to bring it into range. We add a little extra to + avoid a divide by zero. + */ + if(vertex[v].w == 1.0f) { + vertex[v].xyz[2] = _glFastInvert(1.0001f + vertex[v].xyz[2]); + } else { + vertex[v].xyz[2] = f; + } } } - -volatile uint32_t *sq = SQ_BASE_ADDRESS; - static inline void _glFlushBuffer() { TRACE(); @@ -142,7 +139,6 @@ static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, V #define SPAN_SORT_CFG 0x005F8030 static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; -static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; enum Visible { NONE_VISIBLE = 0, @@ -175,9 +171,6 @@ void SceneListSubmit(Vertex* vertices, int n) { *PVR_LMMODE0 = 0; *PVR_LMMODE1 = 0; - //Set QACR registers - QACR[1] = QACR[0] = 0x11; - #if CLIP_DEBUG fprintf(stderr, "----\n"); @@ -206,7 +199,6 @@ void SceneListSubmit(Vertex* vertices, int n) { int visible_mask = 0; - sq = SQ_BASE_ADDRESS; sq_dest_addr = (uintptr_t)SQ_MASK_DEST(PVR_TA_INPUT); sq_lock((void *)PVR_TA_INPUT); @@ -237,13 +229,8 @@ void SceneListSubmit(Vertex* vertices, int n) { if(visible_mask == ALL_VISIBLE) { SUBMIT_QUEUED_VERTEX(qv.flags); - _glPerspectiveDivideVertex(v0, h); - //_glPushHeaderOrVertex(v0); - + _glPerspectiveDivideVertex(v0, h, 2); v1->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(v1, h); - //_glPushHeaderOrVertex(v1); _glPushHeaderOrVertex(v0, 2); } else { // If the previous triangle wasn't all visible, and we @@ -280,7 +267,7 @@ void SceneListSubmit(Vertex* vertices, int n) { switch(visible_mask) { case ALL_VISIBLE: - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); QUEUE_VERTEX(v0); break; case NONE_VISIBLE: @@ -293,14 +280,10 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); _glPushHeaderOrVertex(v0, 1); - _glPerspectiveDivideVertex(a, h); - //_glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); + _glPerspectiveDivideVertex(a, h, 2); _glPushHeaderOrVertex(a, 2); QUEUE_VERTEX(b); @@ -314,13 +297,11 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v1, v2, b); b->flags = v2->flags; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a, h, 3); _glPushHeaderOrVertex(a, 1); - _glPerspectiveDivideVertex(c, h); _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); QUEUE_VERTEX(b); break; case THIRD_VISIBLE: @@ -332,15 +313,9 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); - //_glPushHeaderOrVertex(a); - //_glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); + _glPerspectiveDivideVertex(a, h, 3); _glPushHeaderOrVertex(a, 2); - _glPerspectiveDivideVertex(c, h); QUEUE_VERTEX(c); break; case FIRST_AND_SECOND_VISIBLE: @@ -349,20 +324,16 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); _glPushHeaderOrVertex(v0, 1); _glClipEdge(v1, v2, a); a->flags = v2->flags; - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(a, h, 3); + _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(a, h); - //_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(b, 2); QUEUE_VERTEX(a); @@ -377,18 +348,13 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a, h, 4); _glPushHeaderOrVertex(a, 1); - _glPerspectiveDivideVertex(c, h); _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); - //_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(b, 2); - _glPerspectiveDivideVertex(d, h); QUEUE_VERTEX(d); break; case FIRST_AND_THIRD_VISIBLE: @@ -401,16 +367,16 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); _glPushHeaderOrVertex(v0, 1); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a, h, 3); _glPushHeaderOrVertex(a, 1); - _glPerspectiveDivideVertex(c, h); _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b, 1); + QUEUE_VERTEX(c); break; default: @@ -421,7 +387,6 @@ void SceneListSubmit(Vertex* vertices, int n) { SUBMIT_QUEUED_VERTEX(GPU_CMD_VERTEX_EOL); _glFlushBuffer(); - sq_unlock(); } From 812ed4a1ee58fecf12013e0f1194a728fada51f8 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jul 2024 23:29:09 -0500 Subject: [PATCH 3/6] Fixing PC build. --- GL/matrix.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/GL/matrix.c b/GL/matrix.c index bfa2f12..d70a104 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -174,8 +174,13 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) { }; float r = DEG2RAD * angle; - float c = fcos(r); - float s = fsin(r); +#ifdef __DREAMCAST__ + float s, c; + fsincos(r, &s, &c); +#else + float c = cosf(r); + float s = sinf(r); +#endif VEC3_NORMALIZE(x, y, z); From 2f26574a4476bd1c57926ac853ecdf3ce08e25b6 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 3 Aug 2024 15:52:56 +1000 Subject: [PATCH 4/6] Combine projection transform and viewport transform together --- GL/matrix.c | 80 ++++++++++++++++++++--------------------- GL/platforms/sh4.c | 7 ++-- GL/platforms/software.c | 80 +++++++++++++++++++---------------------- GL/private.h | 14 -------- 4 files changed, 79 insertions(+), 102 deletions(-) diff --git a/GL/matrix.c b/GL/matrix.c index 2744cb9..5c1fac3 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -15,10 +15,7 @@ GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2; static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX; - -Viewport VIEWPORT = { - 0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f -}; +static Matrix4x4 __attribute__((aligned(32))) VIEWPORT_MATRIX; static GLenum MATRIX_MODE = GL_MODELVIEW; static GLubyte MATRIX_IDX = 0; @@ -94,12 +91,29 @@ static void transpose(GLfloat* m) { swap(m[11], m[14]); } -static void recalculateNormalMatrix() { +/* When projection matrix changes, need to pre-multiply with viewport transform matrix */ +static void OnProjectionChanged() { + UploadMatrix4x4(&VIEWPORT_MATRIX); + MultiplyMatrix4x4(stack_top(MATRIX_STACKS + (GL_PROJECTION & 0xF))); + DownloadMatrix4x4(stack_top(MATRIX_STACKS + (GL_PROJECTION & 0xF))); +} + +/* When modelview matrix changes, need to re-compute normal matrix */ +static void OnModelviewChanged() { MEMCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4)); inverse((GLfloat*) NORMAL_MATRIX); transpose((GLfloat*) NORMAL_MATRIX); } +static void OnMatrixChanged() { + if(MATRIX_MODE == GL_MODELVIEW) { + OnModelviewChanged(); + } else if(MATRIX_MODE == GL_PROJECTION) { + OnProjectionChanged(); + } +} + + void APIENTRY glMatrixMode(GLenum mode) { MATRIX_MODE = mode; MATRIX_IDX = mode & 0xF; @@ -116,12 +130,13 @@ void APIENTRY glPushMatrix() { void APIENTRY glPopMatrix() { stack_pop(MATRIX_STACKS + MATRIX_IDX); if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); + OnModelviewChanged(); } } void APIENTRY glLoadIdentity() { stack_replace(MATRIX_STACKS + MATRIX_IDX, IDENTITY); + OnMatrixChanged(); } void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) { @@ -141,10 +156,7 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) { assert(top); DownloadMatrix4x4(top); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } @@ -159,10 +171,7 @@ void APIENTRY glScalef(GLfloat x, GLfloat y, GLfloat z) { UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); MultiplyMatrix4x4(&scale); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) { @@ -202,10 +211,7 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) { UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); MultiplyMatrix4x4((const Matrix4x4*) &rotate); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } /* Load an arbitrary matrix */ @@ -214,10 +220,7 @@ void APIENTRY glLoadMatrixf(const GLfloat *m) { memcpy(TEMP, m, sizeof(float) * 16); stack_replace(MATRIX_STACKS + MATRIX_IDX, TEMP); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } /* Ortho */ @@ -243,6 +246,7 @@ void APIENTRY glOrtho(GLfloat left, GLfloat right, UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); MultiplyMatrix4x4((const Matrix4x4*) &OrthoMatrix); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); + OnMatrixChanged(); } @@ -274,6 +278,7 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right, UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); MultiplyMatrix4x4((const Matrix4x4*) &FrustumMatrix); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); + OnMatrixChanged(); } @@ -285,10 +290,7 @@ void glMultMatrixf(const GLfloat *m) { UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); MultiplyMatrix4x4(&TEMP); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } /* Load an arbitrary transposed matrix */ @@ -319,10 +321,7 @@ void glLoadTransposeMatrixf(const GLfloat *m) { TEMP[M15] = m[15]; stack_replace(MATRIX_STACKS + MATRIX_IDX, TEMP); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } /* Multiply the current matrix by an arbitrary transposed matrix */ @@ -352,22 +351,19 @@ void glMultTransposeMatrixf(const GLfloat *m) { UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); MultiplyMatrix4x4((const Matrix4x4*) &TEMP); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); - - if(MATRIX_MODE == GL_MODELVIEW) { - recalculateNormalMatrix(); - } + OnMatrixChanged(); } /* Set the GL viewport */ void APIENTRY glViewport(GLint x, GLint y, GLsizei width, GLsizei height) { - VIEWPORT.x = x; - VIEWPORT.y = y; - VIEWPORT.width = width; - VIEWPORT.height = height; - VIEWPORT.hwidth = ((GLfloat) VIEWPORT.width) * 0.5f; - VIEWPORT.hheight = ((GLfloat) VIEWPORT.height) * 0.5f; - VIEWPORT.x_plus_hwidth = VIEWPORT.x + VIEWPORT.hwidth; - VIEWPORT.y_plus_hheight = VIEWPORT.y + VIEWPORT.hheight; + VIEWPORT_MATRIX[0][0] = width * 0.5f; + VIEWPORT_MATRIX[1][1] = -height * 0.5f; + VIEWPORT_MATRIX[2][2] = 1.0f; + VIEWPORT_MATRIX[3][3] = 1.0f; + + VIEWPORT_MATRIX[3][0] = x + width * 0.5f; + VIEWPORT_MATRIX[3][1] = GetVideoMode()->height - (y + height * 0.5f); + OnProjectionChanged(); } /* Set the depth range */ diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index ba00148..cceab78 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -78,9 +78,10 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { const float f = _glFastInvert(vertex->w); - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; - vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240; + /* Convert to screenspace */ + /* (note that vertices have already been viewport transformed) */ + vertex->xyz[0] = vertex->xyz[0] * f; + vertex->xyz[1] = vertex->xyz[1] * f; /* Orthographic projections need to use invZ otherwise we lose the depth information. As w == 1, and clip-space range is -w to +w diff --git a/GL/platforms/software.c b/GL/platforms/software.c index 4245930..bf9ba47 100644 --- a/GL/platforms/software.c +++ b/GL/platforms/software.c @@ -75,17 +75,13 @@ void SceneListBegin(GPUList list) { vertex_counter = 0; } -GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) { const float f = 1.0f / (vertex->w); - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = __builtin_fmaf( - VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth - ); - - vertex->xyz[1] = h - __builtin_fmaf( - VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight - ); + /* Convert to screenspace */ + /* (note that vertices have already been viewport transformed) */ + vertex->xyz[0] = vertex->xyz[0] * f; + vertex->xyz[1] = vertex->xyz[1] * f; if(vertex->w == 1.0f) { vertex->xyz[2] = 1.0f / (1.0001f + vertex->xyz[2]); @@ -143,8 +139,6 @@ void SceneListSubmit(Vertex* v2, int n) { return; } - const float h = GetVideoMode()->height; - uint8_t visible_mask = 0; uint8_t counter = 0; @@ -182,19 +176,19 @@ void SceneListSubmit(Vertex* v2, int n) { switch(visible_mask) { case 15: /* All visible, but final vertex in strip */ { - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(v1, h); + _glPerspectiveDivideVertex(v1); _glPushHeaderOrVertex(v1); - _glPerspectiveDivideVertex(v2, h); + _glPerspectiveDivideVertex(v2); _glPushHeaderOrVertex(v2); } break; case 7: /* All visible, push the first vertex and move on */ - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); break; case 9: @@ -210,13 +204,13 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); } break; @@ -233,13 +227,13 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); _glPushHeaderOrVertex(b); } @@ -262,13 +256,13 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v1, v2, b); b->flags = v2->flags; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(c); _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); } break; @@ -285,19 +279,19 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); _glClipEdge(v1, v2, a); a->flags = v2->flags; - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(c); _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(c); _glPushHeaderOrVertex(a); } @@ -319,17 +313,17 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); if(counter % 2 == 1) { _glPushHeaderOrVertex(a); } - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(c); _glPushHeaderOrVertex(c); } break; @@ -349,15 +343,15 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(c); _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); c->flags = GPU_CMD_VERTEX_EOL; @@ -380,15 +374,15 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(c); _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); _glPushHeaderOrVertex(c); } @@ -411,17 +405,17 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a); _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(c); _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(b); _glPushHeaderOrVertex(b); _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(d, h); + _glPerspectiveDivideVertex(d); _glPushHeaderOrVertex(d); } break; diff --git a/GL/private.h b/GL/private.h index 5b11c52..758c636 100644 --- a/GL/private.h +++ b/GL/private.h @@ -104,20 +104,6 @@ typedef struct { AlignedVector vector; } PolyList; -typedef struct { - GLint x; - GLint y; - GLint width; - GLint height; - - float x_plus_hwidth; - float y_plus_hheight; - float hwidth; /* width * 0.5f */ - float hheight; /* height * 0.5f */ -} Viewport; - -extern Viewport VIEWPORT; - typedef struct { /* Palette data is always stored in RAM as RGBA8888 and packed as ARGB8888 * when uploaded to the PVR */ From d4fc57cab2894b35113d2e9a57fa1f9dbe4aa60b Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 3 Aug 2024 16:40:23 +1000 Subject: [PATCH 5/6] Fix not working properly, defer matrix calculation until needed --- GL/draw.c | 29 ----------------------------- GL/matrix.c | 43 +++++++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 49 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 01d9cf7..4e7d6ce 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -946,35 +946,6 @@ static void light(SubmissionTarget* target) { _glPerformLighting(vertex, ES, target->count); } -GL_FORCE_INLINE void divide(SubmissionTarget* target) { - TRACE(); - - /* Perform perspective divide on each vertex */ - Vertex* vertex = _glSubmissionTargetStart(target); - - const float h = GetVideoMode()->height; - - ITERATE(target->count) { - const float f = MATH_Fast_Invert(vertex->w); - - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = MATH_fmac( - VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth - ); - vertex->xyz[1] = h - MATH_fmac( - VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight - ); - - /* Apply depth range */ - vertex->xyz[2] = MAX( - 1.0f - MATH_fmac(vertex->xyz[2] * f, 0.5f, 0.5f), - PVR_MIN_Z - ); - - ++vertex; - } -} - GL_FORCE_INLINE int _calc_pvr_face_culling() { if(!_glIsCullingEnabled()) { return GPU_CULLING_SMALL; diff --git a/GL/matrix.c b/GL/matrix.c index 5c1fac3..6863258 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -16,9 +16,11 @@ GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2; static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX; static Matrix4x4 __attribute__((aligned(32))) VIEWPORT_MATRIX; +static Matrix4x4 __attribute__((aligned(32))) PROJECTION_MATRIX; static GLenum MATRIX_MODE = GL_MODELVIEW; static GLubyte MATRIX_IDX = 0; +static GLboolean NORMAL_DIRTY, PROJECTION_DIRTY; static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = { 1.0f, 0.0f, 0.0f, 0.0f, @@ -92,25 +94,24 @@ static void transpose(GLfloat* m) { } /* When projection matrix changes, need to pre-multiply with viewport transform matrix */ -static void OnProjectionChanged() { +static void UpdateProjectionMatrix() { + PROJECTION_DIRTY = false; UploadMatrix4x4(&VIEWPORT_MATRIX); MultiplyMatrix4x4(stack_top(MATRIX_STACKS + (GL_PROJECTION & 0xF))); - DownloadMatrix4x4(stack_top(MATRIX_STACKS + (GL_PROJECTION & 0xF))); + DownloadMatrix4x4(&PROJECTION_MATRIX); } /* When modelview matrix changes, need to re-compute normal matrix */ -static void OnModelviewChanged() { +static void UpdateNormalMatrix() { + NORMAL_DIRTY = false; MEMCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4)); inverse((GLfloat*) NORMAL_MATRIX); transpose((GLfloat*) NORMAL_MATRIX); } static void OnMatrixChanged() { - if(MATRIX_MODE == GL_MODELVIEW) { - OnModelviewChanged(); - } else if(MATRIX_MODE == GL_PROJECTION) { - OnProjectionChanged(); - } + if(MATRIX_MODE == GL_MODELVIEW) NORMAL_DIRTY = true; + if(MATRIX_MODE == GL_PROJECTION) PROJECTION_DIRTY = true; } @@ -125,13 +126,12 @@ void APIENTRY glPushMatrix() { void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top); (void) ret; assert(ret); + OnMatrixChanged(); } void APIENTRY glPopMatrix() { stack_pop(MATRIX_STACKS + MATRIX_IDX); - if(MATRIX_MODE == GL_MODELVIEW) { - OnModelviewChanged(); - } + OnMatrixChanged(); } void APIENTRY glLoadIdentity() { @@ -356,14 +356,14 @@ void glMultTransposeMatrixf(const GLfloat *m) { /* Set the GL viewport */ void APIENTRY glViewport(GLint x, GLint y, GLsizei width, GLsizei height) { - VIEWPORT_MATRIX[0][0] = width * 0.5f; - VIEWPORT_MATRIX[1][1] = -height * 0.5f; - VIEWPORT_MATRIX[2][2] = 1.0f; - VIEWPORT_MATRIX[3][3] = 1.0f; + VIEWPORT_MATRIX[M0] = width * 0.5f; + VIEWPORT_MATRIX[M5] = height * -0.5f; + VIEWPORT_MATRIX[M10] = 1.0f; + VIEWPORT_MATRIX[M15] = 1.0f; - VIEWPORT_MATRIX[3][0] = x + width * 0.5f; - VIEWPORT_MATRIX[3][1] = GetVideoMode()->height - (y + height * 0.5f); - OnProjectionChanged(); + VIEWPORT_MATRIX[M12] = x + width * 0.5f; + VIEWPORT_MATRIX[M13] = GetVideoMode()->height - (y + height * 0.5f); + PROJECTION_DIRTY = true; } /* Set the depth range */ @@ -455,14 +455,17 @@ void _glMatrixLoadModelView() { } void _glMatrixLoadProjection() { - UploadMatrix4x4((const Matrix4x4*) stack_top(MATRIX_STACKS + (GL_PROJECTION & 0xF))); + if (PROJECTION_DIRTY) UpdateProjectionMatrix(); + UploadMatrix4x4(&PROJECTION_MATRIX); } void _glMatrixLoadModelViewProjection() { - UploadMatrix4x4((const Matrix4x4*) stack_top(MATRIX_STACKS + (GL_PROJECTION & 0xF))); + if (PROJECTION_DIRTY) UpdateProjectionMatrix(); + UploadMatrix4x4(&PROJECTION_MATRIX); MultiplyMatrix4x4((const Matrix4x4*) stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF))); } void _glMatrixLoadNormal() { + if (NORMAL_DIRTY) UpdateNormalMatrix(); UploadMatrix4x4((const Matrix4x4*) &NORMAL_MATRIX); } From 92ec1db6431701e42f8e72307b75fcf389463cbe Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sun, 11 Aug 2024 19:39:27 +1000 Subject: [PATCH 6/6] Only update attribute pointers at end instead of per vertex --- GL/immediate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GL/immediate.c b/GL/immediate.c index afe80c5..fd32f82 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -163,13 +163,6 @@ void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IMVertex* vert = aligned_vector_extend(&VERTICES, 1); - /* Resizing could've invalidated the pointers */ - IM_ATTRIBS.vertex.ptr = VERTICES.data; - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12; - IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8; - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; - uint32_t* dest = (uint32_t*) &vert->x; *(dest++) = *((uint32_t*) &x); *(dest++) = *((uint32_t*) &y); @@ -254,6 +247,13 @@ void APIENTRY glNormal3fv(const GLfloat* v) { void APIENTRY glEnd() { IMMEDIATE_MODE_ACTIVE = GL_FALSE; + /* Resizing could've invalidated the pointers */ + IM_ATTRIBS.vertex.ptr = VERTICES.data; + IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12; + IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8; + IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; + IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; + GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES; /* Redirect attrib pointers */