From 709942e69dde948e5f52fe3cfbaac3b9038904ca Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 19:40:40 -0600 Subject: [PATCH 01/17] test. --- GL/platforms/sh4.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index acaf692..6fa971c 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -1,6 +1,8 @@ #include "../platform.h" #include "sh4.h" +#include + #define CLIP_DEBUG 0 @@ -85,14 +87,12 @@ volatile uint32_t *sq = SQ_BASE_ADDRESS; static inline void _glFlushBuffer() { TRACE(); - /* Wait for both store queues to complete */ - sq = (uint32_t*) 0xe0000000; - sq[0] = sq[8] = 0; + sq_wait(); } static inline void _glPushHeaderOrVertex(Vertex* v) { TRACE(); - +#if 0 uint32_t* s = (uint32_t*) v; sq[0] = *(s++); sq[1] = *(s++); @@ -104,6 +104,8 @@ static inline void _glPushHeaderOrVertex(Vertex* v) { sq[7] = *(s++); __asm__("pref @%0" : : "r"(sq)); sq += 8; +#endif + pvr_sq_load(NULL, v, sizeof(Vertex), PVR_TA_INPUT); } static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { From 32ecb843a2efc657d39697a666eb128dcca0b001 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 19:44:40 -0600 Subject: [PATCH 02/17] next try. --- GL/platforms/sh4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 6fa971c..19947ef 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -11,7 +11,7 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -#define SQ_BASE_ADDRESS (void*) 0xe0000000 +//#define SQ_BASE_ADDRESS (void*) 0xe0000000 GL_FORCE_INLINE bool glIsVertex(const float flags) { @@ -105,7 +105,7 @@ static inline void _glPushHeaderOrVertex(Vertex* v) { __asm__("pref @%0" : : "r"(sq)); sq += 8; #endif - pvr_sq_load(NULL, v, sizeof(Vertex), PVR_TA_INPUT); + pvr_sq_load(NULL, v, sizeof(Vertex), PVR_DMA_TA); } static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { @@ -136,7 +136,7 @@ static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, V #define SPAN_SORT_CFG 0x005F8030 static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; -static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; +//static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; void SceneListSubmit(Vertex* v2, int n) { TRACE(); @@ -155,7 +155,7 @@ void SceneListSubmit(Vertex* v2, int n) { *PVR_LMMODE1 = 0; //Set QACR registers - QACR[1] = QACR[0] = 0x11; + // QACR[1] = QACR[0] = 0x11; #if CLIP_DEBUG Vertex* vertex = (Vertex*) src; @@ -168,7 +168,7 @@ void SceneListSubmit(Vertex* v2, int n) { uint8_t visible_mask = 0; uint8_t counter = 0; - sq = SQ_BASE_ADDRESS; + //sq = SQ_BASE_ADDRESS; for(int i = 0; i < n; ++i, ++v2) { PREFETCH(v2 + 1); From b9cdfb2e35446ad9e4b967087a287afadaa195be Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 19:45:17 -0600 Subject: [PATCH 03/17] next try --- GL/platforms/sh4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 19947ef..c21f799 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -82,7 +82,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { } -volatile uint32_t *sq = SQ_BASE_ADDRESS; +//volatile uint32_t *sq = SQ_BASE_ADDRESS; static inline void _glFlushBuffer() { TRACE(); From c25a5cddcb8e62a19049f03d85c6eec7b063e5b5 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 19:52:56 -0600 Subject: [PATCH 04/17] Next attempt. --- GL/platforms/sh4.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index c21f799..feaa930 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -11,7 +11,7 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -//#define SQ_BASE_ADDRESS (void*) 0xe0000000 +#define SQ_BASE_ADDRESS (void*) 0xe0000000 GL_FORCE_INLINE bool glIsVertex(const float flags) { @@ -82,7 +82,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { } -//volatile uint32_t *sq = SQ_BASE_ADDRESS; +volatile uint32_t *sq = SQ_BASE_ADDRESS; static inline void _glFlushBuffer() { TRACE(); @@ -105,7 +105,7 @@ static inline void _glPushHeaderOrVertex(Vertex* v) { __asm__("pref @%0" : : "r"(sq)); sq += 8; #endif - pvr_sq_load(NULL, v, sizeof(Vertex), PVR_DMA_TA); + sq_fast_cpy(SQ_MASK_DEST(PVR_TA_INPUT), v, 1); } static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { @@ -136,7 +136,7 @@ static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, V #define SPAN_SORT_CFG 0x005F8030 static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; -//static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; +static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; void SceneListSubmit(Vertex* v2, int n) { TRACE(); @@ -155,7 +155,7 @@ void SceneListSubmit(Vertex* v2, int n) { *PVR_LMMODE1 = 0; //Set QACR registers - // QACR[1] = QACR[0] = 0x11; + QACR[1] = QACR[0] = 0x11; #if CLIP_DEBUG Vertex* vertex = (Vertex*) src; @@ -168,7 +168,8 @@ void SceneListSubmit(Vertex* v2, int n) { uint8_t visible_mask = 0; uint8_t counter = 0; - //sq = SQ_BASE_ADDRESS; + sq = SQ_BASE_ADDRESS; + sq_lock(); for(int i = 0; i < n; ++i, ++v2) { PREFETCH(v2 + 1); @@ -454,6 +455,8 @@ void SceneListSubmit(Vertex* v2, int n) { } _glFlushBuffer(); + + sq_unlock(); } void SceneListFinish() { From 9586eefee991c7e7b335668fd4f72520859d3735 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 19:55:05 -0600 Subject: [PATCH 05/17] Next attempt. --- GL/platforms/sh4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index feaa930..da6c544 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -169,7 +169,7 @@ void SceneListSubmit(Vertex* v2, int n) { uint8_t counter = 0; sq = SQ_BASE_ADDRESS; - sq_lock(); + sq_lock((void*)PVR_TA_INPUT); for(int i = 0; i < n; ++i, ++v2) { PREFETCH(v2 + 1); From 0be2911e0b4969b45f259af6263f39e2f896f3b2 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 20:41:34 -0600 Subject: [PATCH 06/17] Trying to batch shit better. --- GL/platforms/sh4.c | 112 ++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 58 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index da6c544..2ac192b 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -90,8 +90,9 @@ static inline void _glFlushBuffer() { sq_wait(); } -static inline void _glPushHeaderOrVertex(Vertex* v) { - TRACE(); +static uintptr_t sq_dest_addr = 0; + +static inline void _glPushHeaderOrVertex(Vertex* v, size_t count) { #if 0 uint32_t* s = (uint32_t*) v; sq[0] = *(s++); @@ -105,7 +106,7 @@ static inline void _glPushHeaderOrVertex(Vertex* v) { __asm__("pref @%0" : : "r"(sq)); sq += 8; #endif - sq_fast_cpy(SQ_MASK_DEST(PVR_TA_INPUT), v, 1); + sq_fast_cpy((void*)sq_dest_addr, v, count * sizeof(Vertex));; } static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { @@ -169,6 +170,7 @@ void SceneListSubmit(Vertex* v2, int n) { uint8_t counter = 0; sq = SQ_BASE_ADDRESS; + sq_dest_addr = SQ_MASK_DEST(PVR_TA_INPUT) sq_lock((void*)PVR_TA_INPUT); for(int i = 0; i < n; ++i, ++v2) { @@ -206,19 +208,15 @@ void SceneListSubmit(Vertex* v2, int n) { case 15: /* All visible, but final vertex in strip */ { _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(v1, h); - _glPushHeaderOrVertex(v1); - _glPerspectiveDivideVertex(v2, h); - _glPushHeaderOrVertex(v2); + _glPushHeaderOrVertex(v0, 3); } break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glPushHeaderOrVertex(v0, 1); break; case 9: /* First vertex was visible, last in strip */ @@ -234,13 +232,12 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glPushHeaderOrVertex(v0, 1); _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(a, 2); + // _glPushHeaderOrVertex(b); } break; case 1: @@ -257,13 +254,12 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glPushHeaderOrVertex(v0, 1); _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + + _glPushHeaderOrVertex(a, 2); _glPushHeaderOrVertex(b); } break; @@ -274,8 +270,8 @@ void SceneListSubmit(Vertex* v2, int n) { { Vertex __attribute__((aligned(32))) scratch[3]; Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; + Vertex* c = &scratch[1]; + Vertex* b = &scratch[2]; memcpy_vertex(c, v1); @@ -286,13 +282,13 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = v2->flags; _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + + + _glPushHeaderOrVertex(a, 3); + //_glPushHeaderOrVertex(c); + //_glPushHeaderOrVertex(b); } break; case 11: @@ -309,20 +305,20 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glPushHeaderOrVertex(v0, 1); _glClipEdge(v1, v2, a); a->flags = v2->flags; _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(c, 1); _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(b, 1); _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(c); - _glPushHeaderOrVertex(a); + _glPushHeaderOrVertex(c, 1); + _glPushHeaderOrVertex(a, 1); } break; case 12: @@ -342,26 +338,25 @@ void SceneListSubmit(Vertex* v2, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); + _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(b, h); + _glPerspectiveDivideVertex(c, h); if(counter % 2 == 1) { _glPushHeaderOrVertex(a); } + _glPushHeaderOrVertex(a, 3); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + // _glPushHeaderOrVertex(b); + //_glPushHeaderOrVertex(c); } break; case 13: { Vertex __attribute__((aligned(32))) scratch[3]; Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; + Vertex* c = &scratch[1]; + Vertex* b = &scratch[2]; memcpy_vertex(c, v2); c->flags = GPU_CMD_VERTEX; @@ -373,26 +368,25 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glPushHeaderOrVertex(v0, 1); _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(a, 3); + //_glPushHeaderOrVertex(c); + //_glPushHeaderOrVertex(b); c->flags = GPU_CMD_VERTEX_EOL; - _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(c, 1); } break; case 5: /* First and third vertex were visible */ { Vertex __attribute__((aligned(32))) scratch[3]; Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; + Vertex* c = &scratch[1]; + Vertex* b = &scratch[2]; memcpy_vertex(c, v2); c->flags = GPU_CMD_VERTEX; @@ -404,16 +398,17 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glPushHeaderOrVertex(v0, 1); _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(a, 3); + + //_glPushHeaderOrVertex(c); + + //_glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c, 1); } break; case 14: @@ -435,17 +430,18 @@ void SceneListSubmit(Vertex* v2, int n) { b->flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); + _glPushHeaderOrVertex(a, 1); _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(c, 1); _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(d, h); - _glPushHeaderOrVertex(d); + _glPushHeaderOrVertex(b, 3); + //_glPushHeaderOrVertex(c); + + + //_glPushHeaderOrVertex(d); } break; case 8: From e8a60bc94ac340f1b5dec3cfb3d588127ce69443 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Tue, 30 Jan 2024 20:43:30 -0600 Subject: [PATCH 07/17] Fixing build issues --- GL/platforms/sh4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 2ac192b..06058b4 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -170,7 +170,7 @@ void SceneListSubmit(Vertex* v2, int n) { uint8_t counter = 0; sq = SQ_BASE_ADDRESS; - sq_dest_addr = SQ_MASK_DEST(PVR_TA_INPUT) + sq_dest_addr = SQ_MASK_DEST(PVR_TA_INPUT); sq_lock((void*)PVR_TA_INPUT); for(int i = 0; i < n; ++i, ++v2) { @@ -189,7 +189,7 @@ void SceneListSubmit(Vertex* v2, int n) { } break; default: - _glPushHeaderOrVertex(v2); + _glPushHeaderOrVertex(v2, 1); counter = 0; continue; }; @@ -260,7 +260,7 @@ void SceneListSubmit(Vertex* v2, int n) { _glPerspectiveDivideVertex(b, h); _glPushHeaderOrVertex(a, 2); - _glPushHeaderOrVertex(b); + //_glPushHeaderOrVertex(b); } break; case 10: @@ -343,7 +343,7 @@ void SceneListSubmit(Vertex* v2, int n) { _glPerspectiveDivideVertex(c, h); if(counter % 2 == 1) { - _glPushHeaderOrVertex(a); + _glPushHeaderOrVertex(a, 1); } _glPushHeaderOrVertex(a, 3); From 3420f0d0c6619a7403dd37f64aa4caaef7ab20c2 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jan 2024 01:56:01 -0600 Subject: [PATCH 08/17] Adjusting vertex buffers. --- GL/platforms/sh4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 06058b4..f145173 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -6,7 +6,7 @@ #define CLIP_DEBUG 0 -#define PVR_VERTEX_BUF_SIZE 2560 * 256 +#define PVR_VERTEX_BUF_SIZE 2048 * 256 * 2 #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -29,7 +29,8 @@ void InitGPU(_Bool autosort, _Bool fsaa) { PVR_VERTEX_BUF_SIZE, /* Vertex buffer size */ 0, /* No DMA */ fsaa, /* No FSAA */ - (autosort) ? 0 : 1 /* Disable translucent auto-sorting to match traditional GL */ + (autosort) ? 0 : 1 /* Disable translucent auto-sorting to match traditional GL */, + .opb_overflow_count = 1 }; pvr_init(¶ms); From 4033e9c23aba17949c8b65ae6af1ff1a5824ce11 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jan 2024 01:59:46 -0600 Subject: [PATCH 09/17] Submitted wrong size to sq_fast_cpy() --- GL/platforms/sh4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index f145173..4cdfc25 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -107,7 +107,7 @@ static inline void _glPushHeaderOrVertex(Vertex* v, size_t count) { __asm__("pref @%0" : : "r"(sq)); sq += 8; #endif - sq_fast_cpy((void*)sq_dest_addr, v, count * sizeof(Vertex));; + sq_fast_cpy((void*)sq_dest_addr, v, count); } static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { From 313341f93df64c7b9a396db2c1a0f4596451bc1e Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jan 2024 02:50:01 -0600 Subject: [PATCH 10/17] Trying without prefetching. --- GL/platforms/sh4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 46c3513..698a8c9 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -24,7 +24,7 @@ #define GL_FORCE_INLINE static GL_INLINE_DEBUG #endif -#define PREFETCH(addr) __builtin_prefetch((addr)) +#define PREFETCH(addr) ((void)addr)//__builtin_prefetch((addr)) GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) { if(!len) { From 8d1ce547f562162f6115c1ee66a0147a65ae1d27 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jan 2024 02:51:51 -0600 Subject: [PATCH 11/17] Screwed up prefetch macro. --- GL/platforms/sh4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 698a8c9..9e51f78 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -24,7 +24,8 @@ #define GL_FORCE_INLINE static GL_INLINE_DEBUG #endif -#define PREFETCH(addr) ((void)addr)//__builtin_prefetch((addr)) +#define PREFETCH(addr) (void)0 +//((void)addr)//__builtin_prefetch((addr)) GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) { if(!len) { From d4a83ff23501559d489d35e42c27e4c6836909f5 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jan 2024 14:46:49 -0600 Subject: [PATCH 12/17] Fixed a double promotion, put back prefetching. --- GL/matrix.c | 6 +++--- GL/platforms/sh4.h | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/GL/matrix.c b/GL/matrix.c index d95cd49..bfa2f12 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -6,7 +6,7 @@ #include "../containers/stack.h" -#define DEG2RAD (0.01745329251994329576923690768489) +#define DEG2RAD (0.01745329251994329576923690768489f) /* Depth range */ @@ -174,8 +174,8 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) { }; float r = DEG2RAD * angle; - float c = cos(r); - float s = sin(r); + float c = fcos(r); + float s = fsin(r); VEC3_NORMALIZE(x, y, z); diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 9e51f78..46c3513 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -24,8 +24,7 @@ #define GL_FORCE_INLINE static GL_INLINE_DEBUG #endif -#define PREFETCH(addr) (void)0 -//((void)addr)//__builtin_prefetch((addr)) +#define PREFETCH(addr) __builtin_prefetch((addr)) GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) { if(!len) { From 04c2fcceaee4b02f8e4896422bf512b2e5e9d96a Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jan 2024 14:59:58 -0600 Subject: [PATCH 13/17] Getting rid of prefetching. --- GL/platforms/sh4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 46c3513..4cdf85a 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -24,7 +24,8 @@ #define GL_FORCE_INLINE static GL_INLINE_DEBUG #endif -#define PREFETCH(addr) __builtin_prefetch((addr)) +#define PREFETCH(addr) (void) 0 +//__builtin_prefetch((addr)) GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) { if(!len) { From 5fd6e510286c9cc55ea2a2b5784f5d000dc95d2e Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 10 Apr 2024 01:09:29 -0500 Subject: [PATCH 14/17] Readded prefetching. --- GL/platforms/sh4.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 4cdf85a..46c3513 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -24,8 +24,7 @@ #define GL_FORCE_INLINE static GL_INLINE_DEBUG #endif -#define PREFETCH(addr) (void) 0 -//__builtin_prefetch((addr)) +#define PREFETCH(addr) __builtin_prefetch((addr)) GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) { if(!len) { From 49f2f0917bc95a279d5822c14b7b7e530ffc7129 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jul 2024 15:36:19 -0500 Subject: [PATCH 15/17] Nehe20 Build Fix + Warnings cleanup for GCC14.1.0. Newest toolchain is bitchier, so I wanted to go ahead and clear up all of the warnings from building GLdc and the various examples... 1) Set CMake CXX standard to 14 instead of 11, since CXXFLAGS were enforcing that language standard anyway. 2) Fixed a bunch of strict aliasing violations in immediate.c: glVertex3f. 3) Removed or commented out lots of unused variables. 4) Fixed some "suggested inner braces on initializer" crap. 5) Fixed a bunch of signed vs unsigned pointer assignments. 6) Fixed several printf() warnings from using %d with int32_t (needs to be %ld for long int). 7) Fixed build issue with Nehe20 from not including kos.h for the KOS_ROMDISK macro. 8) Fixed some signed vs unsigned comparison mismatches in C++ template instantiations within clipping tests. 9) --- CMakeLists.txt | 2 +- GL/immediate.c | 32 ++++++++++++++-------- GL/state.c | 2 -- samples/blend_test/main.c | 1 - samples/depth_funcs_alpha_testing/gl_png.c | 1 - samples/lerabot01/main.c | 16 +++-------- samples/lights/main.c | 3 +- samples/loadbmp.c | 4 +-- samples/mipmap/main.c | 4 +-- samples/nehe06_4444twid/main.c | 6 ++-- samples/nehe06_vq/main.c | 8 +++--- samples/nehe20/main.c | 7 +++-- samples/paletted/main.c | 3 +- samples/paletted_pcx/main.c | 8 +++--- samples/profiler.c | 2 +- tests/zclip/main.cpp | 14 +++++----- 16 files changed, 54 insertions(+), 59 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f7f87a..048f894 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ string(TOUPPER ${BACKEND} BACKEND_UPPER) add_definitions(-DBACKEND_${BACKEND_UPPER}) set(CMAKE_C_STANDARD 99) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) include_directories(include) diff --git a/GL/immediate.c b/GL/immediate.c index afe80c5..d80e194 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -158,6 +158,14 @@ void APIENTRY glColor3fv(const GLfloat* v) { COLOR[B8IDX] = (GLubyte)(v[2] * 255); } +typedef union punned { + GLubyte* byte; + GLfloat* flt; + uint32_t* u32; + void* vptr; + uintptr_t uptr; +} punned_t; + void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; @@ -170,18 +178,18 @@ void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; - uint32_t* dest = (uint32_t*) &vert->x; - *(dest++) = *((uint32_t*) &x); - *(dest++) = *((uint32_t*) &y); - *(dest++) = *((uint32_t*) &z); - *(dest++) = *((uint32_t*) &UV_COORD[0]); - *(dest++) = *((uint32_t*) &UV_COORD[1]); - *(dest++) = *((uint32_t*) &ST_COORD[0]); - *(dest++) = *((uint32_t*) &ST_COORD[1]); - *(dest++) = *((uint32_t*) COLOR); - *(dest++) = *((uint32_t*) &NORMAL[0]); - *(dest++) = *((uint32_t*) &NORMAL[1]); - *(dest++) = *((uint32_t*) &NORMAL[2]); + punned_t dest = { .flt = &vert->x }; + *(dest.flt++) = x; + *(dest.flt++) = y; + *(dest.flt++) = z; + *(dest.flt++) = UV_COORD[0]; + *(dest.flt++) = UV_COORD[1]; + *(dest.flt++) = ST_COORD[0]; + *(dest.flt++) = ST_COORD[1]; + *(dest.u32++) = *((uint32_t*)(void*) COLOR); + *(dest.flt++) = NORMAL[0]; + *(dest.flt++) = NORMAL[1]; + *(dest.flt++) = NORMAL[2]; } void APIENTRY glVertex3fv(const GLfloat* v) { diff --git a/GL/state.c b/GL/state.c index 52f2656..d8b89f2 100644 --- a/GL/state.c +++ b/GL/state.c @@ -80,9 +80,7 @@ static struct { .color_control = GL_SINGLE_COLOR, .color_material_mode = GL_AMBIENT_AND_DIFFUSE, .color_material_mask = AMBIENT_MASK | DIFFUSE_MASK, - .lights = {0}, .enabled_light_count = 0, - .material = {0}, .shade_model = GL_SMOOTH }; diff --git a/samples/blend_test/main.c b/samples/blend_test/main.c index 6fbb795..1303f7f 100644 --- a/samples/blend_test/main.c +++ b/samples/blend_test/main.c @@ -80,7 +80,6 @@ void DrawGLScene() { const float RED [] = {1.0, 0, 0, 0.5}; const float BLUE [] = {0.0, 0, 1, 0.5}; - const float NONE [] = {0, 0, 0, 0}; glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer diff --git a/samples/depth_funcs_alpha_testing/gl_png.c b/samples/depth_funcs_alpha_testing/gl_png.c index 686035c..5415638 100644 --- a/samples/depth_funcs_alpha_testing/gl_png.c +++ b/samples/depth_funcs_alpha_testing/gl_png.c @@ -48,7 +48,6 @@ int dtex_to_gl_texture(texture *tex, char* filename) { GLboolean twiddled = (header.type & (1 << 26)) < 1; GLboolean compressed = (header.type & (1 << 30)) > 0; GLboolean mipmapped = (header.type & (1 << 31)) > 0; - GLboolean strided = (header.type & (1 << 25)) > 0; GLuint format = (header.type >> 27) & 0b111; image->data = (char *) malloc (header.size); diff --git a/samples/lerabot01/main.c b/samples/lerabot01/main.c index 71ca463..d16c0a5 100644 --- a/samples/lerabot01/main.c +++ b/samples/lerabot01/main.c @@ -26,7 +26,7 @@ KOS_INIT_ROMDISK(romdisk); float xrot, yrot, zrot; /* storage for one texture */ -int texture[1]; +GLuint texture[1]; // Load Bitmaps And Convert To Textures void LoadGLTextures() { @@ -81,7 +81,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG GLfloat l1_pos[] = {5.0, 0.0, 1.0, 1.0}; GLfloat l1_diff[] = {1.0, 0.0, 0.0, 1.0}; - GLfloat l1_amb[] = {0.5, 0.5, 0.5, 1.0}; + //GLfloat l1_amb[] = {0.5, 0.5, 0.5, 1.0}; //glLightfv(GL_LIGHT1, GL_AMBIENT, l1_amb); glLightfv(GL_LIGHT1, GL_DIFFUSE, l1_diff); @@ -93,7 +93,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG GLfloat l2_pos[] = {0.0, 15.0, 1.0, 1.0}; GLfloat l2_dir[] = {0.0, -1.0, 0.0}; GLfloat l2_diff[] = {0.5, 0.5, 0.0, 1.0}; - GLfloat l2_amb[] = {0.5, 0.5, 0.5, 1.0}; + //GLfloat l2_amb[] = {0.5, 0.5, 0.5, 1.0}; glEnable(GL_LIGHT2); glLightfv(GL_LIGHT2, GL_DIFFUSE, l2_diff); @@ -145,7 +145,7 @@ void DrawTexturedQuad(int tex, float x, float y, float z) GLfloat y0 = y - texH / 2; GLfloat x1 = x + texW / 2; GLfloat y1 = y + texH / 2; - GLfloat color[] = {1.0f, 1.0f, 1.0f, 1.0f}; + //GLfloat color[] = {1.0f, 1.0f, 1.0f, 1.0f}; GLfloat mat_ambient[] = {1.0f, 1.0f, 1.0f, 1.0f}; GLfloat vertex_data[] = { @@ -172,14 +172,6 @@ void DrawTexturedQuad(int tex, float x, float y, float z) 0.0, 0.0, 1.0 }; - GLfloat color_data[] = { - /* 2D Coordinate, texture coordinate */ - color[0], color[1], color[2], color[3], - color[0], color[1], color[2], color[3], - color[0], color[1], color[2], color[3], - color[0], color[1], color[2], color[3] - }; - //GLint indices[] = {0,1,2,3,2,3}; glEnable(GL_TEXTURE_2D); diff --git a/samples/lights/main.c b/samples/lights/main.c index afd048d..223bd86 100644 --- a/samples/lights/main.c +++ b/samples/lights/main.c @@ -24,8 +24,7 @@ KOS_INIT_ROMDISK(romdisk); #include "../loadbmp.h" float xrot, yrot, zrot; - -int texture[1]; +GLuint texture[1]; void LoadGLTextures() { diff --git a/samples/loadbmp.c b/samples/loadbmp.c index 65bd571..936ba06 100644 --- a/samples/loadbmp.c +++ b/samples/loadbmp.c @@ -35,7 +35,7 @@ int ImageLoad(char *filename, Image *image) { return 0; } image->sizeX = sizeX; - printf("Width of %s: %d\n", filename, sizeX); + printf("Width of %s: %ld\n", filename, sizeX); // read the height if ((i = fread(&sizeY, 4, 1, file)) != 1) { @@ -43,7 +43,7 @@ int ImageLoad(char *filename, Image *image) { return 0; } image->sizeY = sizeY; - printf("Height of %s: %d\n", filename, sizeY); + printf("Height of %s: %ld\n", filename, sizeY); // calculate the size (assuming 24 bits or 3 bytes per pixel). size = image->sizeX * image->sizeY * 3; diff --git a/samples/mipmap/main.c b/samples/mipmap/main.c index ba65d5f..8dc5fca 100644 --- a/samples/mipmap/main.c +++ b/samples/mipmap/main.c @@ -20,9 +20,7 @@ KOS_INIT_ROMDISK(romdisk); #endif #include "../loadbmp.h" - -/* storage for one texture */ -int texture[1]; +GLuint texture[1]; // Load Bitmaps And Convert To Textures void LoadGLTextures() { diff --git a/samples/nehe06_4444twid/main.c b/samples/nehe06_4444twid/main.c index 3cc58d5..f290c6f 100644 --- a/samples/nehe06_4444twid/main.c +++ b/samples/nehe06_4444twid/main.c @@ -62,7 +62,7 @@ int ImageLoad(char *filename, Image *image) { GLboolean twiddled = (header.type & (1 << 26)) < 1; GLboolean compressed = (header.type & (1 << 30)) > 0; GLboolean mipmapped = (header.type & (1 << 31)) > 0; - GLboolean strided = (header.type & (1 << 25)) > 0; + //GLboolean strided = (header.type & (1 << 25)) > 0; GLuint format = (header.type >> 27) & 0b111; image->data = (char *) malloc (header.size); @@ -70,8 +70,8 @@ int ImageLoad(char *filename, Image *image) { image->sizeY = header.height; image->dataSize = header.size; - GLuint expected = 2 * header.width * header.height; - GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); + //GLuint expected = 2 * header.width * header.height; + //GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); fread(image->data, image->dataSize, 1, file); fclose(file); diff --git a/samples/nehe06_vq/main.c b/samples/nehe06_vq/main.c index cf2156e..754f458 100644 --- a/samples/nehe06_vq/main.c +++ b/samples/nehe06_vq/main.c @@ -22,7 +22,7 @@ KOS_INIT_ROMDISK(romdisk); float xrot, yrot, zrot; /* storage for one texture */ -int texture[1]; +GLuint texture[1]; /* Image type - contains height, width, and data */ struct Image { @@ -59,7 +59,7 @@ int ImageLoad(char *filename, Image *image) { GLboolean twiddled = (header.type & (1 << 26)) < 1; GLboolean compressed = (header.type & (1 << 30)) > 0; GLboolean mipmapped = (header.type & (1 << 31)) > 0; - GLboolean strided = (header.type & (1 << 25)) > 0; + //GLboolean strided = (header.type & (1 << 25)) > 0; GLuint format = (header.type >> 27) & 0b111; image->data = (char *) malloc (header.size); @@ -67,8 +67,8 @@ int ImageLoad(char *filename, Image *image) { image->sizeY = header.height; image->dataSize = header.size; - GLuint expected = 2 * header.width * header.height; - GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); + //GLuint expected = 2 * header.width * header.height; + //GLuint ratio = (GLuint) (((GLfloat) expected) / ((GLfloat) header.size)); fread(image->data, image->dataSize, 1, file); fclose(file); diff --git a/samples/nehe20/main.c b/samples/nehe20/main.c index 8050dbd..54c7825 100644 --- a/samples/nehe20/main.c +++ b/samples/nehe20/main.c @@ -11,6 +11,11 @@ #include #include + +#ifdef __DREAMCAST__ +#include +#endif + #define FPS 60 uint32_t waittime = 1000.0f/FPS; uint32_t framestarttime = 0; @@ -227,8 +232,6 @@ int DrawGLScene(GLvoid) // Here's Where We Do All The Drawing int main(int argc, char *argv[]) { - BOOL done=FALSE; // Bool Variable To Exit Loop - glKosInit(); InitGL(); diff --git a/samples/paletted/main.c b/samples/paletted/main.c index 7b88cf3..50fbdf3 100644 --- a/samples/paletted/main.c +++ b/samples/paletted/main.c @@ -22,8 +22,7 @@ KOS_INIT_ROMDISK(romdisk); /* floats for x rotation, y rotation, z rotation */ float xrot, yrot, zrot; -/* storage for one texture */ -int texture[1]; +GLuint texture[1]; typedef struct { unsigned int height; diff --git a/samples/paletted_pcx/main.c b/samples/paletted_pcx/main.c index e397ab7..a01744a 100644 --- a/samples/paletted_pcx/main.c +++ b/samples/paletted_pcx/main.c @@ -41,7 +41,7 @@ /* floats for x rotation, y rotation, z rotation */ float xrot, yrot, zrot; -int textures[3]; +GLuint textures[3]; typedef struct { uint32_t height; @@ -272,7 +272,7 @@ int BMP_GetPalette(FILE *pFile) bitCount = BmpInfoHeader.ClrImportant * sizeof(RGB_QUAD); if (fread(BmpRgbQuad, 1, bitCount, pFile) != bitCount){ - fprintf(stderr, "Failed to read palette: %d\n", bitCount); + fprintf(stderr, "Failed to read palette: %ld\n", bitCount); return 0; } @@ -293,7 +293,7 @@ int BMP_GetPalette(FILE *pFile) int BMP_Depack(FILE *pFile,char *pZone) { char PadRead[4]; - int32_t i, j, Offset, PadSize, pix, c; + int32_t i, j, Offset, PadSize, c; if (BmpInfoHeader.Compression != BMP_BI_RGB) return 0; @@ -356,7 +356,7 @@ int LoadPalettedBMP(const char* filename, Image* image) } /* store palette information */ - image->palette = BmpPal; + image->palette = (char*)BmpPal; image->palette_width = 16; diff --git a/samples/profiler.c b/samples/profiler.c index c44c3c9..cefc81a 100644 --- a/samples/profiler.c +++ b/samples/profiler.c @@ -287,7 +287,7 @@ static bool write_samples(const char* path) { root = ARCS; for(int i = 0; i < BUCKET_SIZE; ++i) { if(root->pc) { - printf("Incrementing %d for %x. ", (root->pc - lowest_address) / bin_size, (unsigned int) root->pc); + printf("Incrementing %ld for %x. ", (root->pc - lowest_address) / bin_size, (unsigned int) root->pc); bins[(root->pc - lowest_address) / bin_size]++; printf("Now: %d\n", (int) bins[(root->pc - lowest_address) / bin_size]); diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index adada72..42febee 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -435,7 +435,7 @@ bool test_clip_case_001() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 5); + check_equal(sent.size(), 5u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -461,7 +461,7 @@ bool test_clip_case_010() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 4); + check_equal(sent.size(), 4u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -481,7 +481,7 @@ bool test_clip_case_100() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 5); + check_equal(sent.size(), 5u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -507,7 +507,7 @@ bool test_clip_case_110() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 6); + check_equal(sent.size(), 6u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -530,7 +530,7 @@ bool test_clip_case_011() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 6); + check_equal(sent.size(), 6u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -553,7 +553,7 @@ bool test_clip_case_101() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 6); + check_equal(sent.size(), 6u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); @@ -576,7 +576,7 @@ bool test_clip_case_111() { SceneListSubmit(&data[0], data.size()); - check_equal(sent.size(), 4); + check_equal(sent.size(), 4u); check_equal(sent[0].flags, GPU_CMD_POLYHDR); check_equal(sent[1].flags, GPU_CMD_VERTEX); check_equal(sent[2].flags, GPU_CMD_VERTEX); From b920855b572f84bb6c282b258b3d0532661d6fd7 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jul 2024 23:08:37 -0500 Subject: [PATCH 16/17] Micro optimizations and clean-up. --- GL/platforms/sh4.c | 95 +++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 65 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 13bb99d..d7c19de 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -13,8 +13,6 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -#define SQ_BASE_ADDRESS (void*) 0xe0000000 - GL_FORCE_INLINE bool glIsVertex(const float flags) { return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; @@ -75,30 +73,29 @@ GL_FORCE_INLINE float _glFastInvert(float x) { return (1.0f / __builtin_sqrtf(x * x)); } -GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h, int count) { TRACE(); - const float f = _glFastInvert(vertex->w); + for(int v = 0; v < count; ++v) { + const float f = _glFastInvert(vertex[v].w); - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; - vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240; + /* Convert to NDC and apply viewport */ + vertex[v].xyz[0] = (vertex[v].xyz[0] * f * 320) + 320; + vertex[v].xyz[1] = (vertex[v].xyz[1] * f * -240) + 240; - /* Orthographic projections need to use invZ otherwise we lose - the depth information. As w == 1, and clip-space range is -w to +w - we add 1.0 to the Z to bring it into range. We add a little extra to - avoid a divide by zero. - */ - if(vertex->w == 1.0f) { - vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]); - } else { - vertex->xyz[2] = f; + /* Orthographic projections need to use invZ otherwise we lose + the depth information. As w == 1, and clip-space range is -w to +w + we add 1.0 to the Z to bring it into range. We add a little extra to + avoid a divide by zero. + */ + if(vertex[v].w == 1.0f) { + vertex[v].xyz[2] = _glFastInvert(1.0001f + vertex[v].xyz[2]); + } else { + vertex[v].xyz[2] = f; + } } } - -volatile uint32_t *sq = SQ_BASE_ADDRESS; - static inline void _glFlushBuffer() { TRACE(); @@ -142,7 +139,6 @@ static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, V #define SPAN_SORT_CFG 0x005F8030 static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; -static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; enum Visible { NONE_VISIBLE = 0, @@ -175,9 +171,6 @@ void SceneListSubmit(Vertex* vertices, int n) { *PVR_LMMODE0 = 0; *PVR_LMMODE1 = 0; - //Set QACR registers - QACR[1] = QACR[0] = 0x11; - #if CLIP_DEBUG fprintf(stderr, "----\n"); @@ -206,7 +199,6 @@ void SceneListSubmit(Vertex* vertices, int n) { int visible_mask = 0; - sq = SQ_BASE_ADDRESS; sq_dest_addr = (uintptr_t)SQ_MASK_DEST(PVR_TA_INPUT); sq_lock((void *)PVR_TA_INPUT); @@ -237,13 +229,8 @@ void SceneListSubmit(Vertex* vertices, int n) { if(visible_mask == ALL_VISIBLE) { SUBMIT_QUEUED_VERTEX(qv.flags); - _glPerspectiveDivideVertex(v0, h); - //_glPushHeaderOrVertex(v0); - + _glPerspectiveDivideVertex(v0, h, 2); v1->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(v1, h); - //_glPushHeaderOrVertex(v1); _glPushHeaderOrVertex(v0, 2); } else { // If the previous triangle wasn't all visible, and we @@ -280,7 +267,7 @@ void SceneListSubmit(Vertex* vertices, int n) { switch(visible_mask) { case ALL_VISIBLE: - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); QUEUE_VERTEX(v0); break; case NONE_VISIBLE: @@ -293,14 +280,10 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); _glPushHeaderOrVertex(v0, 1); - _glPerspectiveDivideVertex(a, h); - //_glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); + _glPerspectiveDivideVertex(a, h, 2); _glPushHeaderOrVertex(a, 2); QUEUE_VERTEX(b); @@ -314,13 +297,11 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v1, v2, b); b->flags = v2->flags; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a, h, 3); _glPushHeaderOrVertex(a, 1); - _glPerspectiveDivideVertex(c, h); _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); QUEUE_VERTEX(b); break; case THIRD_VISIBLE: @@ -332,15 +313,9 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); - //_glPushHeaderOrVertex(a); - //_glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); + _glPerspectiveDivideVertex(a, h, 3); _glPushHeaderOrVertex(a, 2); - _glPerspectiveDivideVertex(c, h); QUEUE_VERTEX(c); break; case FIRST_AND_SECOND_VISIBLE: @@ -349,20 +324,16 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); _glPushHeaderOrVertex(v0, 1); _glClipEdge(v1, v2, a); a->flags = v2->flags; - _glPerspectiveDivideVertex(c, h); + _glPerspectiveDivideVertex(a, h, 3); + _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(a, h); - //_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(b, 2); QUEUE_VERTEX(a); @@ -377,18 +348,13 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v2, v0, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a, h, 4); _glPushHeaderOrVertex(a, 1); - _glPerspectiveDivideVertex(c, h); _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); - //_glPushHeaderOrVertex(b); - //_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(b, 2); - _glPerspectiveDivideVertex(d, h); QUEUE_VERTEX(d); break; case FIRST_AND_THIRD_VISIBLE: @@ -401,16 +367,16 @@ void SceneListSubmit(Vertex* vertices, int n) { _glClipEdge(v1, v2, b); b->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0, h, 1); _glPushHeaderOrVertex(v0, 1); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(a, h, 3); _glPushHeaderOrVertex(a, 1); - _glPerspectiveDivideVertex(c, h); _glPushHeaderOrVertex(c, 1); - _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b, 1); + QUEUE_VERTEX(c); break; default: @@ -421,7 +387,6 @@ void SceneListSubmit(Vertex* vertices, int n) { SUBMIT_QUEUED_VERTEX(GPU_CMD_VERTEX_EOL); _glFlushBuffer(); - sq_unlock(); } From 812ed4a1ee58fecf12013e0f1194a728fada51f8 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Wed, 31 Jul 2024 23:29:09 -0500 Subject: [PATCH 17/17] Fixing PC build. --- GL/matrix.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/GL/matrix.c b/GL/matrix.c index bfa2f12..d70a104 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -174,8 +174,13 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) { }; float r = DEG2RAD * angle; - float c = fcos(r); - float s = fsin(r); +#ifdef __DREAMCAST__ + float s, c; + fsincos(r, &s, &c); +#else + float c = cosf(r); + float s = sinf(r); +#endif VEC3_NORMALIZE(x, y, z);