From c5ce81a38dc7cced99b834bd7855ad3c294c999b Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 19 Apr 2023 20:57:44 +0100 Subject: [PATCH 01/10] WIP: Restructure clipping to be much MUCH faster in the visible case This currently only works with triangles, anything more and it crashes due to me not queuing subsequent vertices in the strip correctly --- CMakeLists.txt | 1 + GL/platforms/sh4.c | 519 ++++++++++++++-------------- GL/private.h | 44 ++- samples/zclip_triangle/main.c | 3 +- tests/zclip/main.cpp | 625 ++++++++++++++++++++++++++++++++++ 5 files changed, 916 insertions(+), 276 deletions(-) create mode 100644 tests/zclip/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 13ad28d..fb48c72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,7 @@ gen_sample(scissor samples/scissor/main.c) gen_sample(polymark samples/polymark/main.c) gen_sample(cubes samples/cubes/main.cpp) +gen_sample(zclip_test tests/zclip/main.cpp) if(PLATFORM_DREAMCAST) gen_sample(trimark samples/trimark/main.c) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 0d2a35c..dee2b40 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -71,7 +71,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; } -GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { +GL_FORCE_INLINE void _glSubmitHeaderOrVertex(volatile uint32_t* d, const Vertex* v) { #ifndef NDEBUG gl_assert(!isnan(v->xyz[2])); gl_assert(!isnan(v->w)); @@ -94,13 +94,6 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { d += 8; } -static struct __attribute__((aligned(32))) { - Vertex* v; - int visible; -} triangle[3]; - -static int tri_count = 0; -static int strip_count = 0; static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) { const static uint32_t MASK1 = 0x00FF00FF; @@ -134,296 +127,284 @@ static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra); } -GL_FORCE_INLINE void ClearTriangle() { - tri_count = 0; -} - -static inline void ShiftTriangle() { - if(!tri_count) { - return; - } - - tri_count--; - triangle[0] = triangle[1]; - triangle[1] = triangle[2]; - -#ifndef NDEBUG - triangle[2].v = NULL; - triangle[2].visible = false; -#endif -} - - -static inline void ShiftRotateTriangle() { - if(!tri_count) { - return; - } - - if(triangle[0].v < triangle[1].v) { - triangle[0] = triangle[2]; - } else { - triangle[1] = triangle[2]; - } - - tri_count--; -} - #define SPAN_SORT_CFG 0x005F8030 +static volatile int *pvrdmacfg = (int*)0xA05F6888; +static volatile int *qacr = (int*)0xFF000038; void SceneListSubmit(void* src, int n) { + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) { + return; + } + const float h = GetVideoMode()->height; PVR_SET(SPAN_SORT_CFG, 0x0); //Set PVR DMA registers - volatile int *pvrdmacfg = (int*)0xA05F6888; pvrdmacfg[0] = 1; pvrdmacfg[1] = 0; //Set QACR registers - volatile int *qacr = (int*)0xFF000038; qacr[1] = qacr[0] = 0x11; - uint32_t *d = SQ_BASE_ADDRESS; + volatile uint32_t *d = SQ_BASE_ADDRESS; - Vertex __attribute__((aligned(32))) tmp; + int8_t queue_head = 0; + int8_t queue_tail = 0; + + Vertex __attribute__((aligned(32))) queue[3]; + const int queue_capacity = sizeof(queue) / sizeof(Vertex); - /* Perform perspective divide on each vertex */ Vertex* vertex = (Vertex*) src; + uint32_t visible_mask = 0; - if(!_glNearZClippingEnabled()) { - /* Prep store queues */ - - while(n--) { - if(glIsVertex(vertex->flags)) { - _glPerspectiveDivideVertex(vertex, h); - } - - _glSubmitHeaderOrVertex(d, vertex); - ++vertex; - } - - return; + for(int i = 0; i < n; ++i) { + Vertex* v = vertex + i; + fprintf(stderr, "{%f, %f, %f, %f},\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w); } - tri_count = 0; - strip_count = 0; + /* Assume first entry is a header */ + _glSubmitHeaderOrVertex(d, vertex++); -#if CLIP_DEBUG - printf("----\n"); -#endif + /* Push first 2 vertices of the strip */ + memcpy_vertex(&queue[0], vertex++); + memcpy_vertex(&queue[1], vertex++); + visible_mask = ((queue[0].xyz[2] >= -queue[0].w) << 1) | ((queue[1].xyz[2] >= -queue[1].w) << 2); + queue_tail = 2; + n -= 3; - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - PREFETCH(vertex + 2); - /* Wait until we fill the triangle */ - if(tri_count < 3) { - if(glIsVertex(vertex->flags)) { - ++strip_count; - triangle[tri_count].v = vertex; - triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w; - if(++tri_count < 3) { - continue; - } - } else { - /* We hit a header */ - tri_count = 0; - strip_count = 0; - _glSubmitHeaderOrVertex(d, vertex); - continue; - } - } - -#if CLIP_DEBUG - printf("SC: %d\n", strip_count); -#endif - - /* If we got here, then triangle contains 3 vertices */ - int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); - - /* Clipping time! - - There are 6 distinct possibilities when clipping a triangle. 3 of them result - in another triangle, 3 of them result in a quadrilateral. - - Assuming you iterate the edges of the triangle in order, and create a new *visible* - vertex when you cross the plane, and discard vertices behind the plane, then the only - difference between the two cases is that the final two vertices that need submitting have - to be reversed. - - Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may - be used in a subsequent triangle in the strip and would end up being double divided. - */ - -#define SUBMIT_QUEUED() \ - if(strip_count > 3) { \ - tmp = *(vertex - 2); \ - /* If we had triangles ahead of this one, submit and finalize */ \ - _glPerspectiveDivideVertex(&tmp, h); \ - _glSubmitHeaderOrVertex(d, &tmp); \ - tmp = *(vertex - 1); \ - tmp.flags = GPU_CMD_VERTEX_EOL; \ - _glPerspectiveDivideVertex(&tmp, h); \ - _glSubmitHeaderOrVertex(d, &tmp); \ - } - - bool is_last_in_strip = glIsLastVertex(vertex->flags); + while(n--) { + Vertex* self = &queue[queue_tail]; + memcpy_vertex(self, vertex++); + visible_mask = (visible_mask >> 1) | ((self->xyz[2] >= -self->w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; switch(visible_mask) { - case 1: { - SUBMIT_QUEUED(); - /* 0, 0a, 2a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 2: { - SUBMIT_QUEUED(); - /* 0a, 1, 1a */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 3: { - SUBMIT_QUEUED(); - /* 0, 1, 2a, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 4: { - SUBMIT_QUEUED(); - /* 1a, 2, 2a */ - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 5: { - SUBMIT_QUEUED(); - /* 0, 0a, 2, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 6: { - SUBMIT_QUEUED(); - /* 0a, 1, 2a, 2 */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 7: { - /* All the vertices are visible! We divide and submit v0, then shift */ - _glPerspectiveDivideVertex(vertex - 2, h); - _glSubmitHeaderOrVertex(d, vertex - 2); - - if(is_last_in_strip) { - _glPerspectiveDivideVertex(vertex - 1, h); - _glSubmitHeaderOrVertex(d, vertex - 1); - _glPerspectiveDivideVertex(vertex, h); - _glSubmitHeaderOrVertex(d, vertex); - tri_count = 0; - strip_count = 0; - } - - ShiftRotateTriangle(); - continue; - } break; case 0: - default: + queue_head = (queue_head + 1) % queue_capacity; + continue; break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + + if(glIsLastVertex(self->flags)) { + /* If this was the last vertex in the strip, we clear the + * triangle out */ + while(queue_head != queue_tail) { + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + } + + visible_mask = 0; + } + break; + case 1: + /* First vertex was visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + a.flags = GPU_CMD_VERTEX; + + /* If v2 was the last in the strip, then b should be. If it wasn't + we'll create a degenerate triangle by adding b twice in a row so that the + strip processing will continue correctly after crossing the plane so it can + cross back*/ + b.flags = v2->flags; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v1, v2, &a); + _glClipEdge(v2, v0, &b); + + a.flags = v2->flags; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(&v2, v0, &a); + _glClipEdge(v1, &v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glPerspectiveDivideVertex(&v2, h); + _glSubmitHeaderOrVertex(d, &v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b, c; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + uint32_t v2_flags = v2->flags; + v2->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + v2->flags = v2_flags; + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + default: + break; } - /* If this was the last in the strip, we don't need to - submit anything else, we just wipe the tri_count */ - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } else { - ShiftRotateTriangle(); - strip_count = 2; + /* Submit the beginning of the next strip (2 verts, maybe a header) */ + int8_t v = 0; + while(v < 2 && n > 1) { + if(!glIsVertex(vertex->flags)) { + _glSubmitHeaderOrVertex(d, vertex); + } else { + memcpy_vertex(&queue[queue_tail], vertex++); + visible_mask = (visible_mask >> 1) | ((queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; + ++v; + } + --n; } + } } diff --git a/GL/private.h b/GL/private.h index f309571..bfbfebb 100644 --- a/GL/private.h +++ b/GL/private.h @@ -221,23 +221,55 @@ typedef struct { } _glvec4; #define vec2cpy(dst, src) \ - *((_glvec2*) dst) = *((_glvec2*) src) + *((uint64_t*) dst) = *((uint64_t*) src); #define vec3cpy(dst, src) \ - *((_glvec3*) dst) = *((_glvec3*) src) + *((uint64_t*) dst) = *((uint64_t*) src); \ + dst[2] = src[2]; #define vec4cpy(dst, src) \ - *((_glvec4*) dst) = *((_glvec4*) src) + *((uint64_t*) dst) = *((uint64_t*) src); \ + *((uint64_t*) dst + 2) = *((uint64_t*) src + 2); GL_FORCE_INLINE float clamp(float d, float min, float max) { return (d < min) ? min : (d > max) ? max : d; } +GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) { +#ifdef __DREAMCAST__ + _Complex float double_scratch; + + asm volatile ( + "fschg\n\t" + "clrs\n" + ".align 2\n" + "fmov.d @%[in]+, %[scratch]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fmov.d @%[in]+, %[scratch]\n\t" + "add #8, %[out]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fmov.d @%[in]+, %[scratch]\n\t" + "add #8, %[out]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fmov.d @%[in], %[scratch]\n\t" + "add #8, %[out]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fschg\n" + : [in] "+&r" ((uint32_t) src), [scratch] "=&d" (double_scratch), [out] "+&r" ((uint32_t) dest) + : + : "t", "memory" // clobbers + ); +#else + *dest = *src; +#endif +} + #define swapVertex(a, b) \ do { \ - Vertex c = *a; \ - *a = *b; \ - *b = c; \ + Vertex __attribute__((aligned(32))) c; \ + memcpy_vertex(&c, a); \ + memcpy_vertex(a, b); \ + memcpy_vertex(b, &c); \ } while(0) /* ClipVertex doesn't have room for these, so we need to parse them diff --git a/samples/zclip_triangle/main.c b/samples/zclip_triangle/main.c index 17f4887..92482ab 100644 --- a/samples/zclip_triangle/main.c +++ b/samples/zclip_triangle/main.c @@ -86,12 +86,13 @@ void DrawGLScene() rotation = (rotation > 360.0f) ? rotation - 360.0f : rotation; glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer + glClearColor(0.5f, 0.5f, 0.5f, 0.5f); glLoadIdentity(); // Reset The View glDisable(GL_CULL_FACE); glPushMatrix(); - glTranslatef(0.0f, -1.0f, movement); + glTranslatef(0.0f, -1.0f, -movement); glRotatef(rotation, 0.0f, 1.0f, 0.0f); glBegin(GL_TRIANGLES); diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp new file mode 100644 index 0000000..9cd2037 --- /dev/null +++ b/tests/zclip/main.cpp @@ -0,0 +1,625 @@ + +#include +#include +#include +#include +#include + +#define SQ_BASE_ADDRESS 0 +#define SPAN_SORT_CFG 0 +#define PVR_SET(x, y) (void)(x); (void)(y) + +struct Vertex { + uint32_t flags; + float xyz[3]; + float uv[2]; + float w; + uint8_t bgra[4]; +}; + +struct { + float hwidth; + float x_plus_hwidth; + float hheight; + float y_plus_hheight; +} VIEWPORT = {320, 320, 240, 240}; + + +struct VideoMode { + float height; +}; + +static VideoMode* GetVideoMode() { + static VideoMode mode = {320.0f}; + return &mode; +} + +enum GPUCommand { + GPU_CMD_POLYHDR = 0x80840000, + GPU_CMD_VERTEX = 0xe0000000, + GPU_CMD_VERTEX_EOL = 0xf0000000, + GPU_CMD_USERCLIP = 0x20000000, + GPU_CMD_MODIFIER = 0x80000000, + GPU_CMD_SPRITE = 0xA0000000 +}; + +static std::vector sent; + +static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) { + const static uint32_t MASK1 = 0x00FF00FF; + const static uint32_t MASK2 = 0xFF00FF00; + + const uint32_t f2 = 256 * t; + const uint32_t f1 = 256 - f2; + + *out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) | + (((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2); +} + +static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { + /* Clipping time! */ + const float d0 = v1->w + v1->xyz[2]; + const float d1 = v2->w + v2->xyz[2]; + const float sign = ((2.0f * (d1 < d0)) - 1.0f); + const float epsilon = -0.00001f * sign; + const float n = (d0 - d1); + const float r = (1.f / sqrtf(n * n)) * sign; + float t = fmaf(r, d0, epsilon); + + vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); + vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); + vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); + vout->w = fmaf(v2->w - v1->w, t, v1->w); + + vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); + vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); + + interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra); +} + +bool glIsVertex(const uint32_t flags) { + return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; +} + +bool glIsLastVertex(const uint32_t flags) { + return flags == GPU_CMD_VERTEX_EOL; +} + +void _glSubmitHeaderOrVertex(volatile uint32_t*, Vertex* vtx) { + sent.push_back(*vtx); +} + +float _glFastInvert(float x) { + return (1.f / __builtin_sqrtf(x * x)); +} + +void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { + const float f = _glFastInvert(vertex->w); + + /* Convert to NDC and apply viewport */ + vertex->xyz[0] = __builtin_fmaf( + VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth + ); + + vertex->xyz[1] = h - __builtin_fmaf( + VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight + ); + + /* Orthographic projections need to use invZ otherwise we lose + the depth information. As w == 1, and clip-space range is -w to +w + we add 1.0 to the Z to bring it into range. We add a little extra to + avoid a divide by zero. + */ + + vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; +} + + +void memcpy_vertex(Vertex* dst, Vertex* src) { + *dst = *src; +} + +/* Zclipping is so difficult to get right, that self sample tests all the cases of clipping and makes sure that things work as expected */ + +#ifdef __DREAMCAST__ +static volatile int *pvrdmacfg = (int*)0xA05F6888; +static volatile int *qacr = (int*)0xFF000038; +#else +static int pvrdmacfg[2]; +static int qacr[2]; +#endif + +void SceneListSubmit(void* src, int n) { + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) { + return; + } + + const float h = GetVideoMode()->height; + + PVR_SET(SPAN_SORT_CFG, 0x0); + + //Set PVR DMA registers + pvrdmacfg[0] = 1; + pvrdmacfg[1] = 0; + + //Set QACR registers + qacr[1] = qacr[0] = 0x11; + + volatile uint32_t *d = SQ_BASE_ADDRESS; + + int8_t queue_head = 0; + int8_t queue_tail = 0; + + Vertex __attribute__((aligned(32))) queue[5]; + const int queue_capacity = sizeof(queue) / sizeof(Vertex); + + Vertex* vertex = (Vertex*) src; + uint32_t visible_mask = 0; + + /* Assume first entry is a header */ + _glSubmitHeaderOrVertex(d, vertex++); + + /* Push first 2 vertices of the strip */ + memcpy_vertex(&queue[0], vertex++); + memcpy_vertex(&queue[1], vertex++); + visible_mask = ((queue[0].xyz[2] >= -queue[0].w) << 1) | ((queue[1].xyz[2] >= -queue[1].w) << 2); + queue_tail = 2; + n -= 3; + + while(n--) { + Vertex* self = &queue[queue_tail]; + memcpy_vertex(self, vertex++); + visible_mask = (visible_mask >> 1) | ((self->xyz[2] >= -self->w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; + + switch(visible_mask) { + case 0: + queue_head = (queue_head + 1) % queue_capacity; + continue; + break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + + if(glIsLastVertex(self->flags)) { + /* If this was the last vertex in the strip, we clear the + * triangle out */ + while(queue_head != queue_tail) { + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + } + + visible_mask = 0; + } + break; + case 1: + /* First vertex was visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + a.flags = GPU_CMD_VERTEX; + + /* If v2 was the last in the strip, then b should be. If it wasn't + we'll create a degenerate triangle by adding b twice in a row so that the + strip processing will continue correctly after crossing the plane so it can + cross back*/ + b.flags = v2->flags; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v1, v2, &a); + _glClipEdge(v2, v0, &b); + + a.flags = v2->flags; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(&v2, v0, &a); + _glClipEdge(v1, &v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glPerspectiveDivideVertex(&v2, h); + _glSubmitHeaderOrVertex(d, &v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b, c; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + uint32_t v2_flags = v2->flags; + v2->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + v2->flags = v2_flags; + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + default: + break; + } + + /* Submit the beginning of the next strip (2 verts, maybe a header) */ + int8_t v = 0; + while(v < 2 && n > 1) { + if(!glIsVertex(vertex->flags)) { + _glSubmitHeaderOrVertex(d, vertex); + } else { + memcpy_vertex(&queue[queue_tail], vertex++); + visible_mask = (visible_mask >> 1) | ((queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; + ++v; + } + --n; + } + + } +} + + +struct VertexTmpl { + VertexTmpl(float x, float y, float z, float w): + x(x), y(y), z(z), w(w) {} + + float x, y, z, w; +}; + +std::vector make_vertices(const std::vector& verts) { + std::vector result; + Vertex r; + + r.flags = GPU_CMD_POLYHDR; + result.push_back(r); + + for(auto& v: verts) { + r.flags = GPU_CMD_VERTEX; + r.xyz[0] = v.x; + r.xyz[1] = v.y; + r.xyz[2] = v.z; + r.uv[0] = 0.0f; + r.uv[1] = 0.0f; + r.w = v.w; + + result.push_back(r); + } + + result.back().flags = GPU_CMD_VERTEX_EOL; + return result; +} + +template +void check_equal(const T& lhs, const U& rhs) { + if(lhs != rhs) { + throw std::runtime_error("Assertion failed"); + } +} + +template<> +void check_equal(const Vertex& lhs, const Vertex& rhs) { + if(lhs.xyz[0] != rhs.xyz[0] || + lhs.xyz[1] != rhs.xyz[1] || + lhs.xyz[2] != rhs.xyz[2] || + lhs.w != rhs.w) { + throw std::runtime_error("Assertion failed"); + } +} + + +bool test_clip_case_001() { + /* The first vertex is visible only */ + sent.clear(); + + auto data = make_vertices({ + {0.000000, -2.414213, 3.080808, 5.000000}, + {-4.526650, -2.414213, -7.121212, -5.000000}, + {4.526650, -2.414213, -7.121212, -5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 5); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + + // Because we're sending a single triangle, we end up sending a + // degenerate final vert. But if we were sending more than one triangle + // this would be GPU_CMD_VERTEX twice + check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[3], sent[4]); + return true; +} + +bool test_clip_case_010() { + /* The third vertex is visible only */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, -7.121212, -5.000000}, + {0.000000, -2.414213, 3.080808, 5.000000}, + {4.526650, -2.414213, -7.121212, -5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 4); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL); + return true; +} + +bool test_clip_case_100() { + /* The third vertex is visible only */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, -7.121212, -5.000000}, + {4.526650, -2.414213, -7.121212, -5.000000}, + {0.000000, -2.414213, 3.080808, 5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 5); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + + // Because we're sending a single triangle, we end up sending a + // degenerate final vert. But if we were sending more than one triangle + // this would be GPU_CMD_VERTEX twice + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[1], sent[2]); + return true; +} + +bool test_clip_case_110() { + /* 2nd and 3rd visible */ + sent.clear(); + + auto data = make_vertices({ + {0.0, -2.414213, -7.121212, -5.000000}, + {-4.526650, -2.414213, 3.080808, 5.000000}, + {4.526650, -2.414213, 3.080808, 5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 6); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX); + check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[2], sent[4]); + return true; +} + +bool test_clip_case_011() { + /* 1st and 2nd visible */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, 3.080808, 5.000000}, + {4.526650, -2.414213, 3.080808, 5.000000}, + {0.0, -2.414213, -7.121212, -5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 6); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX); + check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[2], sent[4]); + return true; +} + +bool test_clip_case_101() { + /* 1st and 3rd visible */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, 3.080808, 5.000000}, + {0.0, -2.414213, -7.121212, -5.000000}, + {4.526650, -2.414213, 3.080808, 5.000000}, + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 6); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX); + check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[3], sent[5]); + return true; +} + +bool test_start_behind() { + /* Triangle behind the plane, but the strip continues in front */ + sent.clear(); + + auto data = make_vertices({ + {-3.021717, -2.414213, -10.155344, -9.935254}, + {5.915236, -2.414213, -9.354721, -9.136231}, + {-5.915236, -2.414213, -0.264096, -0.063767}, + {3.021717, -2.414213, 0.536527, 0.735255}, + {-7.361995, -2.414213, 4.681529, 4.871976}, + {1.574958, -2.414213, 5.482152, 5.670999}, + }); + + SceneListSubmit(&data[0], data.size()); + + return true; +} + +int main(int argc, char* argv[]) { + // test_clip_case_000(); + test_clip_case_001(); + test_clip_case_010(); + test_clip_case_100(); + test_clip_case_110(); + test_clip_case_011(); + test_clip_case_101(); + // test_clip_case_111(); + + test_start_behind(); + + return 0; +} From e54494e99564b62dc62ecdbe14ae7141838b9d00 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 20 Apr 2023 20:45:59 +0100 Subject: [PATCH 02/10] More clipping work --- CMakeLists.txt | 1 + GL/platforms/sh4.c | 221 ++++++++++++++++-------------------- containers/aligned_vector.h | 1 + tests/zclip/main.cpp | 198 ++++++++++++++------------------ 4 files changed, 183 insertions(+), 238 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb48c72..e22de7c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ string(TOUPPER ${BACKEND} BACKEND_UPPER) add_definitions(-DBACKEND_${BACKEND_UPPER}) set(CMAKE_C_STANDARD 99) +set(CMAKE_CXX_STANDARD 11) include_directories(include) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index dee2b40..0b0f73b 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -153,32 +153,44 @@ void SceneListSubmit(void* src, int n) { int8_t queue_head = 0; int8_t queue_tail = 0; - Vertex __attribute__((aligned(32))) queue[3]; + /* The most vertices ever in the queue is 5 (as some clipping operations + * produce and additional couple of vertice, but we add one more so the ring buffer doesn't + * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ + Vertex __attribute__((aligned(32))) queue[6]; const int queue_capacity = sizeof(queue) / sizeof(Vertex); Vertex* vertex = (Vertex*) src; uint32_t visible_mask = 0; - for(int i = 0; i < n; ++i) { - Vertex* v = vertex + i; - fprintf(stderr, "{%f, %f, %f, %f},\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w); - } +#define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) +#define PUSH_VERTEX(v) \ + do { \ + memcpy_vertex(queue + queue_tail, (v)); \ + visible_mask = (visible_mask << 1) | ((v)->xyz[2] >= -(v)->w); \ + queue_tail = (queue_tail + 1) % queue_capacity; \ + } while(0) /* Assume first entry is a header */ _glSubmitHeaderOrVertex(d, vertex++); /* Push first 2 vertices of the strip */ - memcpy_vertex(&queue[0], vertex++); - memcpy_vertex(&queue[1], vertex++); - visible_mask = ((queue[0].xyz[2] >= -queue[0].w) << 1) | ((queue[1].xyz[2] >= -queue[1].w) << 2); - queue_tail = 2; + PUSH_VERTEX(vertex); + ++vertex; + PUSH_VERTEX(vertex); + ++vertex; + n -= 3; while(n--) { - Vertex* self = &queue[queue_tail]; - memcpy_vertex(self, vertex++); - visible_mask = (visible_mask >> 1) | ((self->xyz[2] >= -self->w) << 2); // Push new vertex - queue_tail = (queue_tail + 1) % queue_capacity; + Vertex* current = vertex; + if(!glIsVertex(vertex->flags)) { + _glSubmitHeaderOrVertex(d, vertex); + ++vertex; + continue; + } else { + PUSH_VERTEX(vertex); + ++vertex; + } switch(visible_mask) { case 0: @@ -190,8 +202,9 @@ void SceneListSubmit(void* src, int n) { _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); queue_head = (queue_head + 1) % queue_capacity; + visible_mask >>= 1; - if(glIsLastVertex(self->flags)) { + if(glIsLastVertex(current->flags)) { /* If this was the last vertex in the strip, we clear the * triangle out */ while(queue_head != queue_tail) { @@ -203,7 +216,7 @@ void SceneListSubmit(void* src, int n) { visible_mask = 0; } break; - case 1: + case 4: /* First vertex was visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices @@ -221,17 +234,11 @@ void SceneListSubmit(void* src, int n) { cross back*/ b.flags = v2->flags; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &b); - - /* But skip the vertices that are already there */ - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + // v0 is already at the head of the queue + memcpy_vertex(QUEUE_OFFSET(1), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(&b); /* Additional vertex */ + visible_mask = 15; /* All 4 vertices visible */ } break; case 2: @@ -248,21 +255,12 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - _glPerspectiveDivideVertex(v1, h); - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - /* But skip the vertices that are already there */ - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(0), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + visible_mask = 7; /* All 3 vertices visible */ } break; - case 3: /* First and second vertex were visible */ + case 6: /* First and second vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; @@ -275,26 +273,13 @@ void SceneListSubmit(void* src, int n) { a.flags = v2->flags; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); - - _glPerspectiveDivideVertex(v1, h); - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - /* But skip the vertices that are already there */ - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(v1); /* Additional vertex */ + PUSH_VERTEX(&a); /* Additional vertex */ + visible_mask = 31; /* All 5 vertices visible */ } break; - case 4: + case 1: /* Third vertex was visible. */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices @@ -307,104 +292,90 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - _glSubmitHeaderOrVertex(d, &a); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - _glPerspectiveDivideVertex(&v2, h); - _glSubmitHeaderOrVertex(d, &v2); - - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(0), &a); // replace + memcpy_vertex(QUEUE_OFFSET(1), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(&v2); /* Additional vertex */ + visible_mask = 15; /* All 4 vertices visible */ } break; case 5: /* First and third vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b, c; // Scratch vertices + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + Vertex v1 = queue[(queue_head + 1) % queue_capacity]; + Vertex v2 = queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v0, v1, &a); - _glClipEdge(v1, v2, &b); + _glClipEdge(v0, &v1, &a); + _glClipEdge(&v1, &v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); + memcpy_vertex(QUEUE_OFFSET(1), &a); // replace - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); + uint32_t v2_flags = v2.flags; + v2.flags = GPU_CMD_VERTEX; + memcpy_vertex(QUEUE_OFFSET(2), &v2); // replace + v2.flags = v2_flags; - uint32_t v2_flags = v2->flags; - v2->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v2, h); - _glSubmitHeaderOrVertex(d, v2); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - v2->flags = v2_flags; - _glSubmitHeaderOrVertex(d, v2); - - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + PUSH_VERTEX(&b); /* Additional vertex */ + PUSH_VERTEX(&v2); /* Additional vertex */ + visible_mask = 31; /* All 5 vertices visible */ } break; - case 6: /* Second and third vertex were visible */ + case 3: /* Second and third vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + Vertex v1 = queue[(queue_head + 1) % queue_capacity]; + Vertex v2 = queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v0, v1, &a); - _glClipEdge(v2, v0, &b); + _glClipEdge(v0, &v1, &a); + _glClipEdge(&v2, v0, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); + memcpy_vertex(QUEUE_OFFSET(0), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(&v1); /* Additional vertex */ + PUSH_VERTEX(&v2); /* Additional vertex */ - _glPerspectiveDivideVertex(v1, h); - _glSubmitHeaderOrVertex(d, v1); + fprintf(stderr, "%x\n", queue[((queue_tail - 1) % queue_capacity)].flags); - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(v2, h); - _glSubmitHeaderOrVertex(d, v2); - - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + visible_mask = 31; /* All 5 vertices visible */ } break; default: break; } - /* Submit the beginning of the next strip (2 verts, maybe a header) */ - int8_t v = 0; - while(v < 2 && n > 1) { - if(!glIsVertex(vertex->flags)) { - _glSubmitHeaderOrVertex(d, vertex); - } else { - memcpy_vertex(&queue[queue_tail], vertex++); - visible_mask = (visible_mask >> 1) | ((queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2); // Push new vertex - queue_tail = (queue_tail + 1) % queue_capacity; - ++v; - } - --n; - } + if(glIsLastVertex(current->flags)) { + /* If this was the last vertex in the strip, we need to flush the queue and then + restart it again */ + while(visible_mask) { + // There are 3 vertices, so we push the first one + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + + /* This bitmask is reversed to the direction it should be, but we're effectively counting + the bits here. Either everything is visible, or it was clipped and so everything is visible */ + visible_mask >>= 1; + } + } else { + /* Here we need to submit vertices until the visible mask is < 4 + * which would mean there are only 2 vertices queued */ + while(visible_mask > 3) { + // There are 3 vertices, so we push the first one + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + int mask = (0x80000000 >> __builtin_clz(visible_mask)); + visible_mask &= ~mask; + } + } } } diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index 3400fe1..be62f6c 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -12,6 +12,7 @@ extern "C" { #if defined(__APPLE__) || defined(__WIN32__) /* Linux + Kos define this, OSX does not, so just use malloc there */ static inline void* memalign(size_t alignment, size_t size) { + (void) alignment; return malloc(size); } #else diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index 9cd2037..d81a208 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -151,27 +151,44 @@ void SceneListSubmit(void* src, int n) { int8_t queue_head = 0; int8_t queue_tail = 0; - Vertex __attribute__((aligned(32))) queue[5]; + /* The most vertices ever in the queue is 5 (as some clipping operations + * produce and additional couple of vertice, but we add one more so the ring buffer doesn't + * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ + Vertex __attribute__((aligned(32))) queue[6]; const int queue_capacity = sizeof(queue) / sizeof(Vertex); Vertex* vertex = (Vertex*) src; uint32_t visible_mask = 0; +#define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) +#define PUSH_VERTEX(v) \ + do { \ + memcpy_vertex(queue + queue_tail, (v)); \ + visible_mask = (visible_mask << 1) | ((v)->xyz[2] >= -(v)->w); \ + queue_tail = (queue_tail + 1) % queue_capacity; \ + } while(0) + /* Assume first entry is a header */ _glSubmitHeaderOrVertex(d, vertex++); /* Push first 2 vertices of the strip */ - memcpy_vertex(&queue[0], vertex++); - memcpy_vertex(&queue[1], vertex++); - visible_mask = ((queue[0].xyz[2] >= -queue[0].w) << 1) | ((queue[1].xyz[2] >= -queue[1].w) << 2); - queue_tail = 2; + PUSH_VERTEX(vertex); + ++vertex; + PUSH_VERTEX(vertex); + ++vertex; + n -= 3; while(n--) { - Vertex* self = &queue[queue_tail]; - memcpy_vertex(self, vertex++); - visible_mask = (visible_mask >> 1) | ((self->xyz[2] >= -self->w) << 2); // Push new vertex - queue_tail = (queue_tail + 1) % queue_capacity; + Vertex* current = vertex; + if(!glIsVertex(vertex->flags)) { + _glSubmitHeaderOrVertex(d, vertex); + ++vertex; + continue; + } else { + PUSH_VERTEX(vertex); + ++vertex; + } switch(visible_mask) { case 0: @@ -183,8 +200,9 @@ void SceneListSubmit(void* src, int n) { _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); queue_head = (queue_head + 1) % queue_capacity; + visible_mask >>= 1; - if(glIsLastVertex(self->flags)) { + if(glIsLastVertex(current->flags)) { /* If this was the last vertex in the strip, we clear the * triangle out */ while(queue_head != queue_tail) { @@ -196,7 +214,7 @@ void SceneListSubmit(void* src, int n) { visible_mask = 0; } break; - case 1: + case 4: /* First vertex was visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices @@ -214,17 +232,11 @@ void SceneListSubmit(void* src, int n) { cross back*/ b.flags = v2->flags; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &b); - - /* But skip the vertices that are already there */ - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + // v0 is already at the head of the queue + memcpy_vertex(QUEUE_OFFSET(1), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(&b); /* Additional vertex */ + visible_mask = 15; /* All 4 vertices visible */ } break; case 2: @@ -241,21 +253,12 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - _glPerspectiveDivideVertex(v1, h); - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - /* But skip the vertices that are already there */ - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(0), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + visible_mask = 7; /* All 3 vertices visible */ } break; - case 3: /* First and second vertex were visible */ + case 6: /* First and second vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; @@ -268,26 +271,13 @@ void SceneListSubmit(void* src, int n) { a.flags = v2->flags; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); - - _glPerspectiveDivideVertex(v1, h); - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - /* But skip the vertices that are already there */ - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(v1); /* Additional vertex */ + PUSH_VERTEX(&a); /* Additional vertex */ + visible_mask = 31; /* All 5 vertices visible */ } break; - case 4: + case 1: /* Third vertex was visible. */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices @@ -300,24 +290,16 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - _glSubmitHeaderOrVertex(d, &a); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - _glPerspectiveDivideVertex(&v2, h); - _glSubmitHeaderOrVertex(d, &v2); - - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(0), &a); // replace + memcpy_vertex(QUEUE_OFFSET(1), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(&v2); /* Additional vertex */ + visible_mask = 15; /* All 4 vertices visible */ } break; case 5: /* First and third vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b, c; // Scratch vertices + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -327,29 +309,19 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); - - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); + memcpy_vertex(QUEUE_OFFSET(1), &a); // replace uint32_t v2_flags = v2->flags; v2->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v2, h); - _glSubmitHeaderOrVertex(d, v2); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - + memcpy_vertex(QUEUE_OFFSET(2), v2); // replace v2->flags = v2_flags; - _glSubmitHeaderOrVertex(d, v2); - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + PUSH_VERTEX(&b); /* Additional vertex */ + PUSH_VERTEX(v2); /* Additional vertex */ + visible_mask = 31; /* All 5 vertices visible */ } break; - case 6: /* Second and third vertex were visible */ + case 3: /* Second and third vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; @@ -362,46 +334,46 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); - - _glPerspectiveDivideVertex(v1, h); - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - - _glSubmitHeaderOrVertex(d, v1); - - _glPerspectiveDivideVertex(v2, h); - _glSubmitHeaderOrVertex(d, v2); - - queue_head = (queue_head + 3) % queue_capacity; - visible_mask = 0; + memcpy_vertex(QUEUE_OFFSET(0), &a); // replace + memcpy_vertex(QUEUE_OFFSET(2), &b); // replace + PUSH_VERTEX(v1); /* Additional vertex */ + PUSH_VERTEX(v2); /* Additional vertex */ + visible_mask = 31; /* All 5 vertices visible */ } break; default: break; } - /* Submit the beginning of the next strip (2 verts, maybe a header) */ - int8_t v = 0; - while(v < 2 && n > 1) { - if(!glIsVertex(vertex->flags)) { - _glSubmitHeaderOrVertex(d, vertex); - } else { - memcpy_vertex(&queue[queue_tail], vertex++); - visible_mask = (visible_mask >> 1) | ((queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2); // Push new vertex - queue_tail = (queue_tail + 1) % queue_capacity; - ++v; - } - --n; - } + if(glIsLastVertex(current->flags)) { + /* If this was the last vertex in the strip, we need to flush the queue and then + restart it again */ + while(visible_mask) { + // There are 3 vertices, so we push the first one + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + + /* This bitmask is reversed to the direction it should be, but we're effectively counting + the bits here. Either everything is visible, or it was clipped and so everything is visible */ + visible_mask >>= 1; + } + } else { + /* Here we need to submit vertices until the visible mask is < 4 + * which would mean there are only 2 vertices queued */ + while(visible_mask > 3) { + // There are 3 vertices, so we push the first one + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + int mask = (0x80000000 >> __builtin_clz(visible_mask)); + visible_mask &= ~mask; + } + } } } - struct VertexTmpl { VertexTmpl(float x, float y, float z, float w): x(x), y(y), z(z), w(w) {} From 72c375f87c9f627e910c6e5162213b668058800d Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 21 Apr 2023 11:39:37 +0100 Subject: [PATCH 03/10] Fix some things --- GL/platforms/sh4.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 0b0f73b..fc85cdd 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -202,19 +202,6 @@ void SceneListSubmit(void* src, int n) { _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); queue_head = (queue_head + 1) % queue_capacity; - visible_mask >>= 1; - - if(glIsLastVertex(current->flags)) { - /* If this was the last vertex in the strip, we clear the - * triangle out */ - while(queue_head != queue_tail) { - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - } - - visible_mask = 0; - } break; case 4: /* First vertex was visible */ @@ -340,9 +327,6 @@ void SceneListSubmit(void* src, int n) { memcpy_vertex(QUEUE_OFFSET(2), &b); // replace PUSH_VERTEX(&v1); /* Additional vertex */ PUSH_VERTEX(&v2); /* Additional vertex */ - - fprintf(stderr, "%x\n", queue[((queue_tail - 1) % queue_capacity)].flags); - visible_mask = 31; /* All 5 vertices visible */ } break; @@ -360,9 +344,8 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &queue[queue_head]); queue_head = (queue_head + 1) % queue_capacity; - /* This bitmask is reversed to the direction it should be, but we're effectively counting - the bits here. Either everything is visible, or it was clipped and so everything is visible */ - visible_mask >>= 1; + int mask = (0x80000000 >> __builtin_clz(visible_mask)); + visible_mask &= ~mask; } } else { /* Here we need to submit vertices until the visible mask is < 4 From baa275b41bea0270456a9b66cc5685a8aa3a6efa Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 21 Apr 2023 20:38:21 +0100 Subject: [PATCH 04/10] Fix a bunch of issues with clipping (almost working) --- GL/platforms/sh4.c | 196 +++++++++++++++++++------------------ tests/zclip/main.cpp | 224 +++++++++++++++++++++++++------------------ 2 files changed, 236 insertions(+), 184 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index fc85cdd..46e4e45 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -13,9 +13,6 @@ (0xe0000000 | (((uint32_t)0x10000000) & 0x03ffffe0)) -static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; - - GL_FORCE_INLINE bool glIsVertex(const float flags) { return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; } @@ -78,7 +75,7 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(volatile uint32_t* d, const Vertex* #endif #if CLIP_DEBUG - printf("Submitting: %x (%x)\n", v, v->flags); + fprintf(stderr, "Submitting: %x (%x)\n", v, v->flags); #endif uint32_t *s = (uint32_t*) v; @@ -128,8 +125,9 @@ static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) } #define SPAN_SORT_CFG 0x005F8030 -static volatile int *pvrdmacfg = (int*)0xA05F6888; -static volatile int *qacr = (int*)0xFF000038; +static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; +static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; +static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; void SceneListSubmit(void* src, int n) { /* You need at least a header, and 3 vertices to render anything */ @@ -142,13 +140,13 @@ void SceneListSubmit(void* src, int n) { PVR_SET(SPAN_SORT_CFG, 0x0); //Set PVR DMA registers - pvrdmacfg[0] = 1; - pvrdmacfg[1] = 0; + *PVR_LMMODE0 = 0; + *PVR_LMMODE1 = 0; //Set QACR registers - qacr[1] = qacr[0] = 0x11; + QACR[1] = QACR[0] = 0x11; - volatile uint32_t *d = SQ_BASE_ADDRESS; + volatile uint32_t *d = SQ_BASE_ADDRESS; int8_t queue_head = 0; int8_t queue_tail = 0; @@ -156,31 +154,24 @@ void SceneListSubmit(void* src, int n) { /* The most vertices ever in the queue is 5 (as some clipping operations * produce and additional couple of vertice, but we add one more so the ring buffer doesn't * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ - Vertex __attribute__((aligned(32))) queue[6]; + Vertex __attribute__((aligned(32))) queue[4]; const int queue_capacity = sizeof(queue) / sizeof(Vertex); Vertex* vertex = (Vertex*) src; + uint32_t visible_mask = 0; + uint32_t counter = 0; #define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) #define PUSH_VERTEX(v) \ do { \ memcpy_vertex(queue + queue_tail, (v)); \ - visible_mask = (visible_mask << 1) | ((v)->xyz[2] >= -(v)->w); \ + visible_mask = (visible_mask >> 1) | ((v)->xyz[2] >= -(v)->w) << 2; \ + assert(visible_mask < 15); \ queue_tail = (queue_tail + 1) % queue_capacity; \ + counter++; \ } while(0) - /* Assume first entry is a header */ - _glSubmitHeaderOrVertex(d, vertex++); - - /* Push first 2 vertices of the strip */ - PUSH_VERTEX(vertex); - ++vertex; - PUSH_VERTEX(vertex); - ++vertex; - - n -= 3; - while(n--) { Vertex* current = vertex; if(!glIsVertex(vertex->flags)) { @@ -192,18 +183,38 @@ void SceneListSubmit(void* src, int n) { ++vertex; } + if(counter < 3) { + continue; + } + switch(visible_mask) { case 0: queue_head = (queue_head + 1) % queue_capacity; + --counter; continue; break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; + + if(glIsLastVertex(current->flags)) { + /* If this was the last vertex in the strip, we need to flush the queue and then + restart it again */ + queue_head = (queue_head + 1) % queue_capacity; + counter--; + while(counter--) { + // There are 3 vertices, so we push the first one + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + } + + visible_mask = 0; + continue; + } break; - case 4: + case 1: /* First vertex was visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices @@ -221,11 +232,14 @@ void SceneListSubmit(void* src, int n) { cross back*/ b.flags = v2->flags; - // v0 is already at the head of the queue - memcpy_vertex(QUEUE_OFFSET(1), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(&b); /* Additional vertex */ - visible_mask = 15; /* All 4 vertices visible */ + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &b); } break; case 2: @@ -234,88 +248,104 @@ void SceneListSubmit(void* src, int n) { { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; _glClipEdge(v0, v1, &a); _glClipEdge(v1, v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX_EOL; - memcpy_vertex(QUEUE_OFFSET(0), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - visible_mask = 7; /* All 3 vertices visible */ + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &b); } break; - case 6: /* First and second vertex were visible */ + case 3: /* First and second vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v1, v2, &a); + _glClipEdge(&v1, v2, &a); _glClipEdge(v2, v0, &b); a.flags = v2->flags; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(v1); /* Additional vertex */ - PUSH_VERTEX(&a); /* Additional vertex */ - visible_mask = 31; /* All 5 vertices visible */ + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&v1, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &a); } break; - case 1: + case 4: /* Third vertex was visible. */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; _glClipEdge(&v2, v0, &a); _glClipEdge(v1, &v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(0), &a); // replace - memcpy_vertex(QUEUE_OFFSET(1), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(&v2); /* Additional vertex */ - visible_mask = 15; /* All 4 vertices visible */ + _glPerspectiveDivideVertex(&v2, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v2); } break; case 5: /* First and third vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex v1 = queue[(queue_head + 1) % queue_capacity]; - Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v0, &v1, &a); - _glClipEdge(&v1, &v2, &b); + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, &v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(1), &a); // replace + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&v2, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &a); uint32_t v2_flags = v2.flags; v2.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(2), &v2); // replace + _glSubmitHeaderOrVertex(d, &v2); v2.flags = v2_flags; - - PUSH_VERTEX(&b); /* Additional vertex */ - PUSH_VERTEX(&v2); /* Additional vertex */ - visible_mask = 31; /* All 5 vertices visible */ + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v2); } break; - case 3: /* Second and third vertex were visible */ + case 6: /* Second and third vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex v1 = queue[(queue_head + 1) % queue_capacity]; - Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; _glClipEdge(v0, &v1, &a); _glClipEdge(&v2, v0, &b); @@ -323,42 +353,24 @@ void SceneListSubmit(void* src, int n) { a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(0), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(&v1); /* Additional vertex */ - PUSH_VERTEX(&v2); /* Additional vertex */ - visible_mask = 31; /* All 5 vertices visible */ + _glPerspectiveDivideVertex(&v1, h); + _glPerspectiveDivideVertex(&v2, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &v2); } break; default: break; } - if(glIsLastVertex(current->flags)) { - /* If this was the last vertex in the strip, we need to flush the queue and then - restart it again */ - - while(visible_mask) { - // There are 3 vertices, so we push the first one - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - - int mask = (0x80000000 >> __builtin_clz(visible_mask)); - visible_mask &= ~mask; - } - } else { - /* Here we need to submit vertices until the visible mask is < 4 - * which would mean there are only 2 vertices queued */ - while(visible_mask > 3) { - // There are 3 vertices, so we push the first one - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - int mask = (0x80000000 >> __builtin_clz(visible_mask)); - visible_mask &= ~mask; - } - } + queue_head = (queue_head + 1) % queue_capacity; + counter--; } } diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index d81a208..4de4564 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #define SQ_BASE_ADDRESS 0 #define SPAN_SORT_CFG 0 @@ -141,7 +142,7 @@ void SceneListSubmit(void* src, int n) { //Set PVR DMA registers pvrdmacfg[0] = 1; - pvrdmacfg[1] = 0; + pvrdmacfg[1] = 1; //Set QACR registers qacr[1] = qacr[0] = 0x11; @@ -154,31 +155,24 @@ void SceneListSubmit(void* src, int n) { /* The most vertices ever in the queue is 5 (as some clipping operations * produce and additional couple of vertice, but we add one more so the ring buffer doesn't * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ - Vertex __attribute__((aligned(32))) queue[6]; + Vertex __attribute__((aligned(32))) queue[4]; const int queue_capacity = sizeof(queue) / sizeof(Vertex); Vertex* vertex = (Vertex*) src; + uint32_t visible_mask = 0; + uint32_t counter = 0; #define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) #define PUSH_VERTEX(v) \ do { \ memcpy_vertex(queue + queue_tail, (v)); \ - visible_mask = (visible_mask << 1) | ((v)->xyz[2] >= -(v)->w); \ + visible_mask = (visible_mask >> 1) | ((v)->xyz[2] >= -(v)->w) << 2; \ + assert(visible_mask < 15); \ queue_tail = (queue_tail + 1) % queue_capacity; \ + counter++; \ } while(0) - /* Assume first entry is a header */ - _glSubmitHeaderOrVertex(d, vertex++); - - /* Push first 2 vertices of the strip */ - PUSH_VERTEX(vertex); - ++vertex; - PUSH_VERTEX(vertex); - ++vertex; - - n -= 3; - while(n--) { Vertex* current = vertex; if(!glIsVertex(vertex->flags)) { @@ -190,31 +184,38 @@ void SceneListSubmit(void* src, int n) { ++vertex; } + if(counter < 3) { + continue; + } + switch(visible_mask) { case 0: queue_head = (queue_head + 1) % queue_capacity; + --counter; continue; break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - visible_mask >>= 1; if(glIsLastVertex(current->flags)) { - /* If this was the last vertex in the strip, we clear the - * triangle out */ - while(queue_head != queue_tail) { + /* If this was the last vertex in the strip, we need to flush the queue and then + restart it again */ + queue_head = (queue_head + 1) % queue_capacity; + counter--; + while(counter--) { + // There are 3 vertices, so we push the first one _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); queue_head = (queue_head + 1) % queue_capacity; } visible_mask = 0; + continue; } break; - case 4: + case 1: /* First vertex was visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices @@ -232,11 +233,14 @@ void SceneListSubmit(void* src, int n) { cross back*/ b.flags = v2->flags; - // v0 is already at the head of the queue - memcpy_vertex(QUEUE_OFFSET(1), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(&b); /* Additional vertex */ - visible_mask = 15; /* All 4 vertices visible */ + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &b); } break; case 2: @@ -245,56 +249,69 @@ void SceneListSubmit(void* src, int n) { { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; _glClipEdge(v0, v1, &a); _glClipEdge(v1, v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX_EOL; - memcpy_vertex(QUEUE_OFFSET(0), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - visible_mask = 7; /* All 3 vertices visible */ + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &b); } break; - case 6: /* First and second vertex were visible */ + case 3: /* First and second vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v1, v2, &a); + _glClipEdge(&v1, v2, &a); _glClipEdge(v2, v0, &b); a.flags = v2->flags; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(v1); /* Additional vertex */ - PUSH_VERTEX(&a); /* Additional vertex */ - visible_mask = 31; /* All 5 vertices visible */ + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&v1, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &a); } break; - case 1: + case 4: /* Third vertex was visible. */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; _glClipEdge(&v2, v0, &a); _glClipEdge(v1, &v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(0), &a); // replace - memcpy_vertex(QUEUE_OFFSET(1), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(&v2); /* Additional vertex */ - visible_mask = 15; /* All 4 vertices visible */ + _glPerspectiveDivideVertex(&v2, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v2); } break; case 5: /* First and third vertex were visible */ @@ -302,75 +319,59 @@ void SceneListSubmit(void* src, int n) { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; _glClipEdge(v0, v1, &a); - _glClipEdge(v1, v2, &b); + _glClipEdge(v1, &v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(1), &a); // replace + _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&v2, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); - uint32_t v2_flags = v2->flags; - v2->flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(2), v2); // replace - v2->flags = v2_flags; - - PUSH_VERTEX(&b); /* Additional vertex */ - PUSH_VERTEX(v2); /* Additional vertex */ - visible_mask = 31; /* All 5 vertices visible */ + _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &a); + uint32_t v2_flags = v2.flags; + v2.flags = GPU_CMD_VERTEX; + _glSubmitHeaderOrVertex(d, &v2); + v2.flags = v2_flags; + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v2); } break; - case 3: /* Second and third vertex were visible */ + case 6: /* Second and third vertex were visible */ { Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v0, v1, &a); - _glClipEdge(v2, v0, &b); + _glClipEdge(v0, &v1, &a); + _glClipEdge(&v2, v0, &b); a.flags = GPU_CMD_VERTEX; b.flags = GPU_CMD_VERTEX; - memcpy_vertex(QUEUE_OFFSET(0), &a); // replace - memcpy_vertex(QUEUE_OFFSET(2), &b); // replace - PUSH_VERTEX(v1); /* Additional vertex */ - PUSH_VERTEX(v2); /* Additional vertex */ - visible_mask = 31; /* All 5 vertices visible */ + _glPerspectiveDivideVertex(&v1, h); + _glPerspectiveDivideVertex(&v2, h); + _glPerspectiveDivideVertex(&a, h); + _glPerspectiveDivideVertex(&b, h); + + _glSubmitHeaderOrVertex(d, &a); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &v1); + _glSubmitHeaderOrVertex(d, &v2); } break; default: break; } - if(glIsLastVertex(current->flags)) { - /* If this was the last vertex in the strip, we need to flush the queue and then - restart it again */ - - while(visible_mask) { - // There are 3 vertices, so we push the first one - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - - /* This bitmask is reversed to the direction it should be, but we're effectively counting - the bits here. Either everything is visible, or it was clipped and so everything is visible */ - visible_mask >>= 1; - } - } else { - /* Here we need to submit vertices until the visible mask is < 4 - * which would mean there are only 2 vertices queued */ - while(visible_mask > 3) { - // There are 3 vertices, so we push the first one - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - int mask = (0x80000000 >> __builtin_clz(visible_mask)); - visible_mask &= ~mask; - } - } + queue_head = (queue_head + 1) % queue_capacity; + counter--; } } @@ -563,6 +564,27 @@ bool test_clip_case_101() { return true; } +bool test_clip_case_111() { + /* 1st and 3rd visible */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, 3.080808, 5.000000}, + {0.0, -2.414213, -7.121212, 8.000000}, + {4.526650, -2.414213, 3.080808, 5.000000}, + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 4); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL); + return true; +} + + bool test_start_behind() { /* Triangle behind the plane, but the strip continues in front */ sent.clear(); @@ -581,6 +603,23 @@ bool test_start_behind() { return true; } +bool test_longer_strip() { + sent.clear(); + + auto data = make_vertices({ + {-4.384623, -2.414213, -5.699644, -5.488456}, + {4.667572, -2.414213, -5.621354, -5.410322}, + {-4.667572, -2.414213, 4.319152, 4.510323}, + {4.384623, -2.414213, 4.397442, 4.588456}, + {-4.809045, -2.414213, 9.328549, 9.509711}, + {4.243149, -2.414213, 9.406840, 9.587846}, + }); + + SceneListSubmit(&data[0], data.size()); + + return true; +} + int main(int argc, char* argv[]) { // test_clip_case_000(); test_clip_case_001(); @@ -589,9 +628,10 @@ int main(int argc, char* argv[]) { test_clip_case_110(); test_clip_case_011(); test_clip_case_101(); - // test_clip_case_111(); + test_clip_case_111(); test_start_behind(); + test_longer_strip(); return 0; } From 095ebf27901e3a321bcc139f037b0dc2fb9691f1 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 22 Apr 2023 11:37:42 +0100 Subject: [PATCH 05/10] Fix final bug --- GL/platforms/sh4.c | 3 +-- tests/zclip/main.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 46e4e45..dd85432 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -202,8 +202,7 @@ void SceneListSubmit(void* src, int n) { /* If this was the last vertex in the strip, we need to flush the queue and then restart it again */ queue_head = (queue_head + 1) % queue_capacity; - counter--; - while(counter--) { + while(--counter) { // There are 3 vertices, so we push the first one _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index 4de4564..70292a7 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -203,8 +203,7 @@ void SceneListSubmit(void* src, int n) { /* If this was the last vertex in the strip, we need to flush the queue and then restart it again */ queue_head = (queue_head + 1) % queue_capacity; - counter--; - while(counter--) { + while(--counter) { // There are 3 vertices, so we push the first one _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); From 9cc52a01feb021a5859ef86e8f643de481877ad0 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 22 Apr 2023 20:47:45 +0100 Subject: [PATCH 06/10] Better clipping --- GL/platforms/sh4.c | 63 +++++++++++++++++--------------- samples/nehe10/romdisk/world.txt | 2 +- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index dd85432..aa3c42f 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -160,7 +160,6 @@ void SceneListSubmit(void* src, int n) { Vertex* vertex = (Vertex*) src; uint32_t visible_mask = 0; - uint32_t counter = 0; #define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) #define PUSH_VERTEX(v) \ @@ -169,54 +168,60 @@ void SceneListSubmit(void* src, int n) { visible_mask = (visible_mask >> 1) | ((v)->xyz[2] >= -(v)->w) << 2; \ assert(visible_mask < 15); \ queue_tail = (queue_tail + 1) % queue_capacity; \ - counter++; \ } while(0) +#if CLIP_DEBUG + for(int i = 0; i < n; ++i) { + fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]); + } + + fprintf(stderr, "----\n"); +#endif while(n--) { Vertex* current = vertex; if(!glIsVertex(vertex->flags)) { - _glSubmitHeaderOrVertex(d, vertex); - ++vertex; + _glSubmitHeaderOrVertex(d, vertex++); continue; } else { PUSH_VERTEX(vertex); ++vertex; + + int counter = (queue_tail - queue_head + queue_capacity) % queue_capacity; + if(counter < 3) { + continue; + } } - if(counter < 3) { - continue; - } - +#if CLIP_DEBUG + fprintf(stderr, "%d\n", visible_mask); +#endif + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + bool last_vertex = glIsLastVertex(current->flags); switch(visible_mask) { case 0: - queue_head = (queue_head + 1) % queue_capacity; - --counter; - continue; break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); - if(glIsLastVertex(current->flags)) { + if(last_vertex) { /* If this was the last vertex in the strip, we need to flush the queue and then restart it again */ - queue_head = (queue_head + 1) % queue_capacity; - while(--counter) { - // There are 3 vertices, so we push the first one - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - } - visible_mask = 0; - continue; + int v1 = (queue_head + 1) % queue_capacity; + int v2 = (queue_head + 2) % queue_capacity; + + _glPerspectiveDivideVertex(&queue[v1], h); + _glSubmitHeaderOrVertex(d, &queue[v1]); + + _glPerspectiveDivideVertex(&queue[v2], h); + _glSubmitHeaderOrVertex(d, &queue[v2]); } break; case 1: /* First vertex was visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -245,7 +250,6 @@ void SceneListSubmit(void* src, int n) { /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -253,7 +257,7 @@ void SceneListSubmit(void* src, int n) { _glClipEdge(v0, v1, &a); _glClipEdge(v1, v2, &b); a.flags = GPU_CMD_VERTEX; - b.flags = GPU_CMD_VERTEX_EOL; + b.flags = v2->flags; _glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(&a, h); @@ -266,7 +270,6 @@ void SceneListSubmit(void* src, int n) { break; case 3: /* First and second vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -292,7 +295,6 @@ void SceneListSubmit(void* src, int n) { case 4: /* Third vertex was visible. */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; @@ -314,7 +316,6 @@ void SceneListSubmit(void* src, int n) { break; case 5: /* First and third vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; @@ -341,7 +342,6 @@ void SceneListSubmit(void* src, int n) { break; case 6: /* Second and third vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; @@ -368,8 +368,11 @@ void SceneListSubmit(void* src, int n) { break; } - queue_head = (queue_head + 1) % queue_capacity; - counter--; + if(last_vertex) { + visible_mask = queue_head = queue_tail = 0; + } else { + queue_head = (queue_head + 1) % queue_capacity; + } } } diff --git a/samples/nehe10/romdisk/world.txt b/samples/nehe10/romdisk/world.txt index a3368bb..8f645ac 100644 --- a/samples/nehe10/romdisk/world.txt +++ b/samples/nehe10/romdisk/world.txt @@ -157,4 +157,4 @@ NUMPOLLIES 36 2.0 0.0 -0.5 0.0 0.0 3.0 1.0 -0.5 1.0 1.0 2.0 1.0 -0.5 0.0 1.0 -2.0 0.0 -0.5 0.0 0.0 \ No newline at end of file +2.0 0.0 -0.5 0.0 0.0 From 2ec7055547e01ad2e05e97a66407a1ec0b0a1e5e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 23 Apr 2023 07:44:09 +0100 Subject: [PATCH 07/10] Optimisations --- GL/platforms/sh4.c | 48 ++++++++++++------------- tests/zclip/main.cpp | 83 ++++++++++++++++++++++---------------------- 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index aa3c42f..2ed55f0 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -146,7 +146,7 @@ void SceneListSubmit(void* src, int n) { //Set QACR registers QACR[1] = QACR[0] = 0x11; - volatile uint32_t *d = SQ_BASE_ADDRESS; + volatile uint32_t *d = SQ_BASE_ADDRESS; int8_t queue_head = 0; int8_t queue_tail = 0; @@ -159,17 +159,6 @@ void SceneListSubmit(void* src, int n) { Vertex* vertex = (Vertex*) src; - uint32_t visible_mask = 0; - -#define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) -#define PUSH_VERTEX(v) \ - do { \ - memcpy_vertex(queue + queue_tail, (v)); \ - visible_mask = (visible_mask >> 1) | ((v)->xyz[2] >= -(v)->w) << 2; \ - assert(visible_mask < 15); \ - queue_tail = (queue_tail + 1) % queue_capacity; \ - } while(0) - #if CLIP_DEBUG for(int i = 0; i < n; ++i) { fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]); @@ -177,26 +166,35 @@ void SceneListSubmit(void* src, int n) { fprintf(stderr, "----\n"); #endif - while(n--) { - Vertex* current = vertex; - if(!glIsVertex(vertex->flags)) { - _glSubmitHeaderOrVertex(d, vertex++); - continue; - } else { - PUSH_VERTEX(vertex); - ++vertex; + uint8_t visible_mask = 0; + bool last_vertex = false; - int counter = (queue_tail - queue_head + queue_capacity) % queue_capacity; - if(counter < 3) { - continue; - } + while(n--) { + uint8_t counter = 0; + last_vertex = false; + memcpy_vertex(queue + queue_tail, vertex++); + switch(queue[queue_tail].flags) { + case GPU_CMD_VERTEX_EOL: + last_vertex = true; + case GPU_CMD_VERTEX: + visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2; + assert(visible_mask < 15); + queue_tail = (queue_tail + 1) % queue_capacity; + counter = (queue_tail - queue_head + queue_capacity) % queue_capacity; + break; + default: + _glSubmitHeaderOrVertex(d, &queue[queue_tail]); + break; + } + + if(counter < 3) { + continue; } #if CLIP_DEBUG fprintf(stderr, "%d\n", visible_mask); #endif Vertex __attribute__((aligned(32))) a, b; // Scratch vertices - bool last_vertex = glIsLastVertex(current->flags); switch(visible_mask) { case 0: break; diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index 70292a7..7b7e316 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -159,65 +159,67 @@ void SceneListSubmit(void* src, int n) { const int queue_capacity = sizeof(queue) / sizeof(Vertex); Vertex* vertex = (Vertex*) src; - uint32_t visible_mask = 0; - uint32_t counter = 0; -#define QUEUE_OFFSET(n) (queue + ((queue_head + (n)) % queue_capacity)) -#define PUSH_VERTEX(v) \ - do { \ - memcpy_vertex(queue + queue_tail, (v)); \ - visible_mask = (visible_mask >> 1) | ((v)->xyz[2] >= -(v)->w) << 2; \ - assert(visible_mask < 15); \ - queue_tail = (queue_tail + 1) % queue_capacity; \ - counter++; \ - } while(0) +#if CLIP_DEBUG + for(int i = 0; i < n; ++i) { + fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]); + } + fprintf(stderr, "----\n"); +#endif while(n--) { - Vertex* current = vertex; - if(!glIsVertex(vertex->flags)) { - _glSubmitHeaderOrVertex(d, vertex); - ++vertex; - continue; - } else { - PUSH_VERTEX(vertex); - ++vertex; + bool last_vertex = false; + memcpy_vertex(queue + queue_tail, vertex); + ++vertex; + switch(queue[queue_tail].flags) { + case GPU_CMD_POLYHDR: + _glSubmitHeaderOrVertex(d, &queue[queue_tail]); + break; + case GPU_CMD_VERTEX_EOL: + last_vertex = true; + case GPU_CMD_VERTEX: + visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2; + assert(visible_mask < 15); + queue_tail = (queue_tail + 1) % queue_capacity; + default: + break; } + int counter = (queue_tail - queue_head + queue_capacity) % queue_capacity; if(counter < 3) { continue; } +#if CLIP_DEBUG + fprintf(stderr, "%d\n", visible_mask); +#endif + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices switch(visible_mask) { case 0: - queue_head = (queue_head + 1) % queue_capacity; - --counter; - continue; break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); - if(glIsLastVertex(current->flags)) { + if(last_vertex) { /* If this was the last vertex in the strip, we need to flush the queue and then restart it again */ - queue_head = (queue_head + 1) % queue_capacity; - while(--counter) { - // There are 3 vertices, so we push the first one - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); - queue_head = (queue_head + 1) % queue_capacity; - } - visible_mask = 0; - continue; + int v1 = (queue_head + 1) % queue_capacity; + int v2 = (queue_head + 2) % queue_capacity; + + _glPerspectiveDivideVertex(&queue[v1], h); + _glSubmitHeaderOrVertex(d, &queue[v1]); + + _glPerspectiveDivideVertex(&queue[v2], h); + _glSubmitHeaderOrVertex(d, &queue[v2]); } break; case 1: /* First vertex was visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -246,7 +248,6 @@ void SceneListSubmit(void* src, int n) { /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -254,7 +255,7 @@ void SceneListSubmit(void* src, int n) { _glClipEdge(v0, v1, &a); _glClipEdge(v1, v2, &b); a.flags = GPU_CMD_VERTEX; - b.flags = GPU_CMD_VERTEX_EOL; + b.flags = v2->flags; _glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(&a, h); @@ -267,7 +268,6 @@ void SceneListSubmit(void* src, int n) { break; case 3: /* First and second vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; @@ -293,7 +293,6 @@ void SceneListSubmit(void* src, int n) { case 4: /* Third vertex was visible. */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; @@ -315,7 +314,6 @@ void SceneListSubmit(void* src, int n) { break; case 5: /* First and third vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; @@ -342,7 +340,6 @@ void SceneListSubmit(void* src, int n) { break; case 6: /* Second and third vertex were visible */ { - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices Vertex* v0 = &queue[queue_head]; Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; @@ -369,11 +366,15 @@ void SceneListSubmit(void* src, int n) { break; } - queue_head = (queue_head + 1) % queue_capacity; - counter--; + if(last_vertex) { + visible_mask = queue_head = queue_tail = 0; + } else { + queue_head = (queue_head + 1) % queue_capacity; + } } } + struct VertexTmpl { VertexTmpl(float x, float y, float z, float w): x(x), y(y), z(z), w(w) {} From 0923b5c601836339ef5511a84670a17a29f29142 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 23 Apr 2023 20:16:15 +0100 Subject: [PATCH 08/10] Further optimisations --- GL/platforms/sh4.c | 57 +++++++++++++++++++++++++--------------------- GL/private.h | 4 ++-- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 2ed55f0..061b5fe 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -154,9 +154,8 @@ void SceneListSubmit(void* src, int n) { /* The most vertices ever in the queue is 5 (as some clipping operations * produce and additional couple of vertice, but we add one more so the ring buffer doesn't * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ - Vertex __attribute__((aligned(32))) queue[4]; - const int queue_capacity = sizeof(queue) / sizeof(Vertex); - + const static uint8_t queue_capacity = 4; + Vertex __attribute__((aligned(32))) queue[queue_capacity]; Vertex* vertex = (Vertex*) src; #if CLIP_DEBUG @@ -195,28 +194,28 @@ void SceneListSubmit(void* src, int n) { fprintf(stderr, "%d\n", visible_mask); #endif Vertex __attribute__((aligned(32))) a, b; // Scratch vertices - switch(visible_mask) { + switch(visible_mask | (last_vertex << 3)) { case 0: break; + case 15: /* All visible, but final vertex in strip */ + const int8_t v1 = (queue_head + 1) % queue_capacity; + const int8_t v2 = (queue_head + 2) % queue_capacity; + + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + + _glPerspectiveDivideVertex(&queue[v1], h); + _glSubmitHeaderOrVertex(d, &queue[v1]); + + _glPerspectiveDivideVertex(&queue[v2], h); + _glSubmitHeaderOrVertex(d, &queue[v2]); + break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); - - if(last_vertex) { - /* If this was the last vertex in the strip, we need to flush the queue and then - restart it again */ - - int v1 = (queue_head + 1) % queue_capacity; - int v2 = (queue_head + 2) % queue_capacity; - - _glPerspectiveDivideVertex(&queue[v1], h); - _glSubmitHeaderOrVertex(d, &queue[v1]); - - _glPerspectiveDivideVertex(&queue[v2], h); - _glSubmitHeaderOrVertex(d, &queue[v2]); - } break; + case 9: case 1: /* First vertex was visible */ { @@ -235,37 +234,40 @@ void SceneListSubmit(void* src, int n) { b.flags = v2->flags; _glPerspectiveDivideVertex(v0, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(&a, h); _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(&b, h); _glSubmitHeaderOrVertex(d, &b); _glSubmitHeaderOrVertex(d, &b); } break; + case 10: case 2: /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ { Vertex* v0 = &queue[queue_head]; - const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v0, v1, &a); - _glClipEdge(v1, v2, &b); + _glClipEdge(v0, &v1, &a); + _glClipEdge(&v1, v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = v2->flags; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&v1, h); _glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(&b, h); _glSubmitHeaderOrVertex(d, &a); - _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &v1); _glSubmitHeaderOrVertex(d, &b); } break; + case 11: case 3: /* First and second vertex were visible */ { Vertex* v0 = &queue[queue_head]; @@ -290,6 +292,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &a); } break; + case 12: case 4: /* Third vertex was visible. */ { @@ -312,6 +315,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &v2); } break; + case 13: case 5: /* First and third vertex were visible */ { Vertex* v0 = &queue[queue_head]; @@ -338,6 +342,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &v2); } break; + case 14: case 6: /* Second and third vertex were visible */ { Vertex* v0 = &queue[queue_head]; diff --git a/GL/private.h b/GL/private.h index bfbfebb..c3f50cb 100644 --- a/GL/private.h +++ b/GL/private.h @@ -241,8 +241,8 @@ GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) { asm volatile ( "fschg\n\t" - "clrs\n" - ".align 2\n" + "clrs\n\t" + ".align 2\n\t" "fmov.d @%[in]+, %[scratch]\n\t" "fmov.d %[scratch], @%[out]\n\t" "fmov.d @%[in]+, %[scratch]\n\t" From 1a678d2c8d2cca713bdeda0c25e60a7a3a16a185 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 23 Apr 2023 21:00:01 +0100 Subject: [PATCH 09/10] Undo some bad changes --- GL/private.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/GL/private.h b/GL/private.h index c3f50cb..f945ccb 100644 --- a/GL/private.h +++ b/GL/private.h @@ -221,15 +221,13 @@ typedef struct { } _glvec4; #define vec2cpy(dst, src) \ - *((uint64_t*) dst) = *((uint64_t*) src); + *((_glvec2*) dst) = *((_glvec2*) src) #define vec3cpy(dst, src) \ - *((uint64_t*) dst) = *((uint64_t*) src); \ - dst[2] = src[2]; + *((_glvec3*) dst) = *((_glvec3*) src) #define vec4cpy(dst, src) \ - *((uint64_t*) dst) = *((uint64_t*) src); \ - *((uint64_t*) dst + 2) = *((uint64_t*) src + 2); + *((_glvec4*) dst) = *((_glvec4*) src) GL_FORCE_INLINE float clamp(float d, float min, float max) { return (d < min) ? min : (d > max) ? max : d; From 0f65eab86a61178eaa12c1a6d72771a98a194d8c Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 26 Apr 2023 20:50:43 +0100 Subject: [PATCH 10/10] Much faster clipping --- GL/platforms/sh4.c | 390 ++++++++++++++++++++++++--------------------- samples/loadbmp.c | 8 +- 2 files changed, 215 insertions(+), 183 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 061b5fe..bbaa334 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -9,8 +9,7 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) -#define SQ_BASE_ADDRESS (uint32_t *)(void *) \ - (0xe0000000 | (((uint32_t)0x10000000) & 0x03ffffe0)) +#define SQ_BASE_ADDRESS (void*) 0xe0000000 GL_FORCE_INLINE bool glIsVertex(const float flags) { @@ -51,21 +50,18 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { const float f = _glFastInvert(vertex->w); /* Convert to NDC and apply viewport */ - vertex->xyz[0] = __builtin_fmaf( - VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth - ); - - vertex->xyz[1] = h - __builtin_fmaf( - VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight - ); + vertex->xyz[0] = ((vertex->xyz[0] * f) * 320) + 320; + vertex->xyz[1] = ((vertex->xyz[1] * f) * -240) + 240; + vertex->xyz[2] = f; /* Orthographic projections need to use invZ otherwise we lose the depth information. As w == 1, and clip-space range is -w to +w we add 1.0 to the Z to bring it into range. We add a little extra to avoid a divide by zero. */ - - vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; + if(vertex->w == 1.0f) { + vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]); + } } GL_FORCE_INLINE void _glSubmitHeaderOrVertex(volatile uint32_t* d, const Vertex* v) { @@ -91,37 +87,29 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(volatile uint32_t* d, const Vertex* d += 8; } - -static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) { - const static uint32_t MASK1 = 0x00FF00FF; - const static uint32_t MASK2 = 0xFF00FF00; - - const uint32_t f2 = 256 * t; - const uint32_t f1 = 256 - f2; - - *out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) | - (((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2); -} - -static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { - /* Clipping time! */ +static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { + const static float o = 1.0f / 255.0f; const float d0 = v1->w + v1->xyz[2]; const float d1 = v2->w + v2->xyz[2]; - const float sign = ((2.0f * (d1 < d0)) - 1.0f); - const float epsilon = -0.00001f * sign; - const float n = (d0 - d1); - const float r = (1.f / sqrtf(n * n)) * sign; - float t = fmaf(r, d0, epsilon); + const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f; + const float invt = 1.0f - t; - vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); - vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); - vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); - vout->w = fmaf(v2->w - v1->w, t, v1->w); + vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0]; + vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1]; + vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2]; - vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); - vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); + vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0]; + vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1]; - interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra); + vout->w = invt * v1->w + t * v2->w; + + const float m = 255 * t; + const float n = 255 - m; + + vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o; + vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o; + vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o; + vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o; } #define SPAN_SORT_CFG 0x005F8030 @@ -146,17 +134,9 @@ void SceneListSubmit(void* src, int n) { //Set QACR registers QACR[1] = QACR[0] = 0x11; - volatile uint32_t *d = SQ_BASE_ADDRESS; + volatile uint32_t *sq = SQ_BASE_ADDRESS; - int8_t queue_head = 0; - int8_t queue_tail = 0; - - /* The most vertices ever in the queue is 5 (as some clipping operations - * produce and additional couple of vertice, but we add one more so the ring buffer doesn't - * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ - const static uint8_t queue_capacity = 4; - Vertex __attribute__((aligned(32))) queue[queue_capacity]; - Vertex* vertex = (Vertex*) src; + uint32_t clipping_disabled_mask = (_glNearZClippingEnabled()) ? 0 : 0x7; #if CLIP_DEBUG for(int i = 0; i < n; ++i) { @@ -165,83 +145,100 @@ void SceneListSubmit(void* src, int n) { fprintf(stderr, "----\n"); #endif - uint8_t visible_mask = 0; - bool last_vertex = false; + uint8_t counter = 0; + Vertex* v2 = (Vertex*) src; while(n--) { - uint8_t counter = 0; - last_vertex = false; - memcpy_vertex(queue + queue_tail, vertex++); - switch(queue[queue_tail].flags) { + __builtin_prefetch(v2 + 1); + + switch(v2->flags) { case GPU_CMD_VERTEX_EOL: - last_vertex = true; case GPU_CMD_VERTEX: - visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2; - assert(visible_mask < 15); - queue_tail = (queue_tail + 1) % queue_capacity; - counter = (queue_tail - queue_head + queue_capacity) % queue_capacity; + if(++counter < 3) { + v2++; + continue; + } break; default: - _glSubmitHeaderOrVertex(d, &queue[queue_tail]); - break; + _glSubmitHeaderOrVertex(sq, v2++); + counter = 0; + continue; } - if(counter < 3) { - continue; - } + Vertex* const v0 = v2 - 2; + Vertex* const v1 = v2 - 1; -#if CLIP_DEBUG - fprintf(stderr, "%d\n", visible_mask); -#endif - Vertex __attribute__((aligned(32))) a, b; // Scratch vertices - switch(visible_mask | (last_vertex << 3)) { + const uint8_t visible_mask = ( + (v0->xyz[2] > -v0->w) << 0 | + (v1->xyz[2] > -v1->w) << 1 | + (v2->xyz[2] > -v2->w) << 2 | + ((v2->flags == GPU_CMD_VERTEX_EOL) << 3) | + clipping_disabled_mask // This forces everything to be marked visible if clipping is disabled + ); + + switch(visible_mask) { case 0: break; case 15: /* All visible, but final vertex in strip */ - const int8_t v1 = (queue_head + 1) % queue_capacity; - const int8_t v2 = (queue_head + 2) % queue_capacity; + { + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); + Vertex __attribute__((aligned(32))) a = *v1; + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); - _glPerspectiveDivideVertex(&queue[v1], h); - _glSubmitHeaderOrVertex(d, &queue[v1]); - - _glPerspectiveDivideVertex(&queue[v2], h); - _glSubmitHeaderOrVertex(d, &queue[v2]); + a = *v2; + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); + } break; case 7: /* All visible, push the first vertex and move on */ - _glPerspectiveDivideVertex(&queue[queue_head], h); - _glSubmitHeaderOrVertex(d, &queue[queue_head]); + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); break; case 9: - case 1: - /* First vertex was visible */ + /* First vertex was visible, last in strip */ { - Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) a, b; - _glClipEdge(v0, v1, &a); - _glClipEdge(v2, v0, &b); - a.flags = GPU_CMD_VERTEX; + _glClipEdge(v0, v1, &a); + a.flags = GPU_CMD_VERTEX; - /* If v2 was the last in the strip, then b should be. If it wasn't - we'll create a degenerate triangle by adding b twice in a row so that the - strip processing will continue correctly after crossing the plane so it can - cross back*/ - b.flags = v2->flags; + _glClipEdge(v2, v0, &b); + b.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(v0, h); - _glSubmitHeaderOrVertex(d, v0); + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); - _glPerspectiveDivideVertex(&a, h); - _glSubmitHeaderOrVertex(d, &a); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &b); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &b); + } + break; + case 1: + /* First vertex was visible, but not last in strip */ + { + Vertex __attribute__((aligned(32))) a, b; + + _glClipEdge(v0, v1, &a); + a.flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, &b); + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &b); + _glSubmitHeaderOrVertex(sq, &b); } break; case 10: @@ -249,133 +246,164 @@ void SceneListSubmit(void* src, int n) { /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ { - Vertex* v0 = &queue[queue_head]; - Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; - const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) a; + Vertex __attribute__((aligned(32))) c = *v1; + _glClipEdge(v0, &c, &a); + a.flags = GPU_CMD_VERTEX; - _glClipEdge(v0, &v1, &a); - _glClipEdge(&v1, v2, &b); - a.flags = GPU_CMD_VERTEX; - b.flags = v2->flags; + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); - _glPerspectiveDivideVertex(&v1, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); + _glClipEdge(&c, v2, &a); + a.flags = v2->flags; - _glSubmitHeaderOrVertex(d, &a); - _glSubmitHeaderOrVertex(d, &v1); - _glSubmitHeaderOrVertex(d, &b); + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); } break; case 11: case 3: /* First and second vertex were visible */ - { - Vertex* v0 = &queue[queue_head]; - Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; - Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + { + Vertex __attribute__((aligned(32))) a, b; + Vertex __attribute__((aligned(32))) c = *v1; - _glClipEdge(&v1, v2, &a); - _glClipEdge(v2, v0, &b); + _glClipEdge(v2, v0, &b); + b.flags = GPU_CMD_VERTEX; - a.flags = v2->flags; - b.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); - _glPerspectiveDivideVertex(v0, h); - _glPerspectiveDivideVertex(&v1, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); + _glClipEdge(v1, v2, &a); + a.flags = v2->flags; - _glSubmitHeaderOrVertex(d, v0); - _glSubmitHeaderOrVertex(d, &v1); - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &v1); - _glSubmitHeaderOrVertex(d, &a); + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &b); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &c); + _glSubmitHeaderOrVertex(sq, &a); } break; case 12: case 4: /* Third vertex was visible. */ { - Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) a, b; + Vertex __attribute__((aligned(32))) c = *v2; - _glClipEdge(&v2, v0, &a); - _glClipEdge(v1, &v2, &b); - a.flags = GPU_CMD_VERTEX; - b.flags = GPU_CMD_VERTEX; + _glClipEdge(v2, v0, &a); + a.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&v2, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); + _glClipEdge(v1, v2, &b); + b.flags = GPU_CMD_VERTEX; - _glSubmitHeaderOrVertex(d, &a); - _glSubmitHeaderOrVertex(d, &a); - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &v2); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &a); + _glSubmitHeaderOrVertex(sq, &b); + + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); } break; case 13: + { + Vertex __attribute__((aligned(32))) a, b; + Vertex __attribute__((aligned(32))) c = *v2; + c.flags = GPU_CMD_VERTEX; + + _glClipEdge(v0, v1, &a); + a.flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, &b); + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); + + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &b); + + c.flags = GPU_CMD_VERTEX_EOL; + _glSubmitHeaderOrVertex(sq, &c); + } + break; case 5: /* First and third vertex were visible */ { - Vertex* v0 = &queue[queue_head]; - Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; - Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) a, b; + Vertex __attribute__((aligned(32))) c = *v2; + c.flags = GPU_CMD_VERTEX; - _glClipEdge(v0, v1, &a); - _glClipEdge(v1, &v2, &b); - a.flags = GPU_CMD_VERTEX; - b.flags = GPU_CMD_VERTEX; + _glClipEdge(v0, v1, &a); + a.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glPerspectiveDivideVertex(&v2, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); + _glClipEdge(v1, v2, &b); + b.flags = GPU_CMD_VERTEX; - _glSubmitHeaderOrVertex(d, v0); - _glSubmitHeaderOrVertex(d, &a); - uint32_t v2_flags = v2.flags; - v2.flags = GPU_CMD_VERTEX; - _glSubmitHeaderOrVertex(d, &v2); - v2.flags = v2_flags; - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &v2); + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(sq, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); + + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &b); + _glSubmitHeaderOrVertex(sq, &c); } break; case 14: case 6: /* Second and third vertex were visible */ { - Vertex* v0 = &queue[queue_head]; - Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; - Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity]; + Vertex __attribute__((aligned(32))) a, b; + Vertex __attribute__((aligned(32))) c = *v1; - _glClipEdge(v0, &v1, &a); - _glClipEdge(&v2, v0, &b); + _glClipEdge(v0, v1, &a); + a.flags = GPU_CMD_VERTEX; - a.flags = GPU_CMD_VERTEX; - b.flags = GPU_CMD_VERTEX; + _glClipEdge(v2, v0, &b); + b.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&v1, h); - _glPerspectiveDivideVertex(&v2, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(sq, &a); - _glSubmitHeaderOrVertex(d, &a); - _glSubmitHeaderOrVertex(d, &v1); - _glSubmitHeaderOrVertex(d, &b); - _glSubmitHeaderOrVertex(d, &v1); - _glSubmitHeaderOrVertex(d, &v2); + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(sq, &b); + _glSubmitHeaderOrVertex(sq, &c); + + c = *v2; + _glPerspectiveDivideVertex(&c, h); + _glSubmitHeaderOrVertex(sq, &c); } break; default: break; } - if(last_vertex) { - visible_mask = queue_head = queue_tail = 0; + if(v2->flags == GPU_CMD_VERTEX_EOL) { + counter = 0; } else { - queue_head = (queue_head + 1) % queue_capacity; + --counter; } + + v2++; } } diff --git a/samples/loadbmp.c b/samples/loadbmp.c index cc8d7b7..65bd571 100644 --- a/samples/loadbmp.c +++ b/samples/loadbmp.c @@ -23,7 +23,11 @@ int ImageLoad(char *filename, Image *image) { } // seek through the bmp header, up to the width/height: - fseek(file, 18, SEEK_CUR); + fseek(file, 10, SEEK_CUR); + + uint32_t offset; + fread(&offset, 4, 1, file); + fseek(file, 4, SEEK_CUR); // read the width if ((i = fread(&sizeX, 4, 1, file)) != 1) { @@ -65,7 +69,7 @@ int ImageLoad(char *filename, Image *image) { } // seek past the rest of the bitmap header. - fseek(file, 24, SEEK_CUR); + fseek(file, offset, SEEK_SET); // read the data. image->data = (char *) malloc(size);