diff --git a/CMakeLists.txt b/CMakeLists.txt index 13ad28d..fb48c72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,7 @@ gen_sample(scissor samples/scissor/main.c) gen_sample(polymark samples/polymark/main.c) gen_sample(cubes samples/cubes/main.cpp) +gen_sample(zclip_test tests/zclip/main.cpp) if(PLATFORM_DREAMCAST) gen_sample(trimark samples/trimark/main.c) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 0d2a35c..dee2b40 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -71,7 +71,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; } -GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { +GL_FORCE_INLINE void _glSubmitHeaderOrVertex(volatile uint32_t* d, const Vertex* v) { #ifndef NDEBUG gl_assert(!isnan(v->xyz[2])); gl_assert(!isnan(v->w)); @@ -94,13 +94,6 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { d += 8; } -static struct __attribute__((aligned(32))) { - Vertex* v; - int visible; -} triangle[3]; - -static int tri_count = 0; -static int strip_count = 0; static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) { const static uint32_t MASK1 = 0x00FF00FF; @@ -134,296 +127,284 @@ static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra); } -GL_FORCE_INLINE void ClearTriangle() { - tri_count = 0; -} - -static inline void ShiftTriangle() { - if(!tri_count) { - return; - } - - tri_count--; - triangle[0] = triangle[1]; - triangle[1] = triangle[2]; - -#ifndef NDEBUG - triangle[2].v = NULL; - triangle[2].visible = false; -#endif -} - - -static inline void ShiftRotateTriangle() { - if(!tri_count) { - return; - } - - if(triangle[0].v < triangle[1].v) { - triangle[0] = triangle[2]; - } else { - triangle[1] = triangle[2]; - } - - tri_count--; -} - #define SPAN_SORT_CFG 0x005F8030 +static volatile int *pvrdmacfg = (int*)0xA05F6888; +static volatile int *qacr = (int*)0xFF000038; void SceneListSubmit(void* src, int n) { + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) { + return; + } + const float h = GetVideoMode()->height; PVR_SET(SPAN_SORT_CFG, 0x0); //Set PVR DMA registers - volatile int *pvrdmacfg = (int*)0xA05F6888; pvrdmacfg[0] = 1; pvrdmacfg[1] = 0; //Set QACR registers - volatile int *qacr = (int*)0xFF000038; qacr[1] = qacr[0] = 0x11; - uint32_t *d = SQ_BASE_ADDRESS; + volatile uint32_t *d = SQ_BASE_ADDRESS; - Vertex __attribute__((aligned(32))) tmp; + int8_t queue_head = 0; + int8_t queue_tail = 0; + + Vertex __attribute__((aligned(32))) queue[3]; + const int queue_capacity = sizeof(queue) / sizeof(Vertex); - /* Perform perspective divide on each vertex */ Vertex* vertex = (Vertex*) src; + uint32_t visible_mask = 0; - if(!_glNearZClippingEnabled()) { - /* Prep store queues */ - - while(n--) { - if(glIsVertex(vertex->flags)) { - _glPerspectiveDivideVertex(vertex, h); - } - - _glSubmitHeaderOrVertex(d, vertex); - ++vertex; - } - - return; + for(int i = 0; i < n; ++i) { + Vertex* v = vertex + i; + fprintf(stderr, "{%f, %f, %f, %f},\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w); } - tri_count = 0; - strip_count = 0; + /* Assume first entry is a header */ + _glSubmitHeaderOrVertex(d, vertex++); -#if CLIP_DEBUG - printf("----\n"); -#endif + /* Push first 2 vertices of the strip */ + memcpy_vertex(&queue[0], vertex++); + memcpy_vertex(&queue[1], vertex++); + visible_mask = ((queue[0].xyz[2] >= -queue[0].w) << 1) | ((queue[1].xyz[2] >= -queue[1].w) << 2); + queue_tail = 2; + n -= 3; - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - PREFETCH(vertex + 2); - /* Wait until we fill the triangle */ - if(tri_count < 3) { - if(glIsVertex(vertex->flags)) { - ++strip_count; - triangle[tri_count].v = vertex; - triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w; - if(++tri_count < 3) { - continue; - } - } else { - /* We hit a header */ - tri_count = 0; - strip_count = 0; - _glSubmitHeaderOrVertex(d, vertex); - continue; - } - } - -#if CLIP_DEBUG - printf("SC: %d\n", strip_count); -#endif - - /* If we got here, then triangle contains 3 vertices */ - int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); - - /* Clipping time! - - There are 6 distinct possibilities when clipping a triangle. 3 of them result - in another triangle, 3 of them result in a quadrilateral. - - Assuming you iterate the edges of the triangle in order, and create a new *visible* - vertex when you cross the plane, and discard vertices behind the plane, then the only - difference between the two cases is that the final two vertices that need submitting have - to be reversed. - - Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may - be used in a subsequent triangle in the strip and would end up being double divided. - */ - -#define SUBMIT_QUEUED() \ - if(strip_count > 3) { \ - tmp = *(vertex - 2); \ - /* If we had triangles ahead of this one, submit and finalize */ \ - _glPerspectiveDivideVertex(&tmp, h); \ - _glSubmitHeaderOrVertex(d, &tmp); \ - tmp = *(vertex - 1); \ - tmp.flags = GPU_CMD_VERTEX_EOL; \ - _glPerspectiveDivideVertex(&tmp, h); \ - _glSubmitHeaderOrVertex(d, &tmp); \ - } - - bool is_last_in_strip = glIsLastVertex(vertex->flags); + while(n--) { + Vertex* self = &queue[queue_tail]; + memcpy_vertex(self, vertex++); + visible_mask = (visible_mask >> 1) | ((self->xyz[2] >= -self->w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; switch(visible_mask) { - case 1: { - SUBMIT_QUEUED(); - /* 0, 0a, 2a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 2: { - SUBMIT_QUEUED(); - /* 0a, 1, 1a */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 3: { - SUBMIT_QUEUED(); - /* 0, 1, 2a, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 4: { - SUBMIT_QUEUED(); - /* 1a, 2, 2a */ - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 5: { - SUBMIT_QUEUED(); - /* 0, 0a, 2, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 6: { - SUBMIT_QUEUED(); - /* 0a, 1, 2a, 2 */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); - } break; - case 7: { - /* All the vertices are visible! We divide and submit v0, then shift */ - _glPerspectiveDivideVertex(vertex - 2, h); - _glSubmitHeaderOrVertex(d, vertex - 2); - - if(is_last_in_strip) { - _glPerspectiveDivideVertex(vertex - 1, h); - _glSubmitHeaderOrVertex(d, vertex - 1); - _glPerspectiveDivideVertex(vertex, h); - _glSubmitHeaderOrVertex(d, vertex); - tri_count = 0; - strip_count = 0; - } - - ShiftRotateTriangle(); - continue; - } break; case 0: - default: + queue_head = (queue_head + 1) % queue_capacity; + continue; break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + + if(glIsLastVertex(self->flags)) { + /* If this was the last vertex in the strip, we clear the + * triangle out */ + while(queue_head != queue_tail) { + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + } + + visible_mask = 0; + } + break; + case 1: + /* First vertex was visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + a.flags = GPU_CMD_VERTEX; + + /* If v2 was the last in the strip, then b should be. If it wasn't + we'll create a degenerate triangle by adding b twice in a row so that the + strip processing will continue correctly after crossing the plane so it can + cross back*/ + b.flags = v2->flags; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v1, v2, &a); + _glClipEdge(v2, v0, &b); + + a.flags = v2->flags; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(&v2, v0, &a); + _glClipEdge(v1, &v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glPerspectiveDivideVertex(&v2, h); + _glSubmitHeaderOrVertex(d, &v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b, c; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + uint32_t v2_flags = v2->flags; + v2->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + v2->flags = v2_flags; + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + default: + break; } - /* If this was the last in the strip, we don't need to - submit anything else, we just wipe the tri_count */ - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } else { - ShiftRotateTriangle(); - strip_count = 2; + /* Submit the beginning of the next strip (2 verts, maybe a header) */ + int8_t v = 0; + while(v < 2 && n > 1) { + if(!glIsVertex(vertex->flags)) { + _glSubmitHeaderOrVertex(d, vertex); + } else { + memcpy_vertex(&queue[queue_tail], vertex++); + visible_mask = (visible_mask >> 1) | ((queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; + ++v; + } + --n; } + } } diff --git a/GL/private.h b/GL/private.h index f309571..bfbfebb 100644 --- a/GL/private.h +++ b/GL/private.h @@ -221,23 +221,55 @@ typedef struct { } _glvec4; #define vec2cpy(dst, src) \ - *((_glvec2*) dst) = *((_glvec2*) src) + *((uint64_t*) dst) = *((uint64_t*) src); #define vec3cpy(dst, src) \ - *((_glvec3*) dst) = *((_glvec3*) src) + *((uint64_t*) dst) = *((uint64_t*) src); \ + dst[2] = src[2]; #define vec4cpy(dst, src) \ - *((_glvec4*) dst) = *((_glvec4*) src) + *((uint64_t*) dst) = *((uint64_t*) src); \ + *((uint64_t*) dst + 2) = *((uint64_t*) src + 2); GL_FORCE_INLINE float clamp(float d, float min, float max) { return (d < min) ? min : (d > max) ? max : d; } +GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) { +#ifdef __DREAMCAST__ + _Complex float double_scratch; + + asm volatile ( + "fschg\n\t" + "clrs\n" + ".align 2\n" + "fmov.d @%[in]+, %[scratch]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fmov.d @%[in]+, %[scratch]\n\t" + "add #8, %[out]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fmov.d @%[in]+, %[scratch]\n\t" + "add #8, %[out]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fmov.d @%[in], %[scratch]\n\t" + "add #8, %[out]\n\t" + "fmov.d %[scratch], @%[out]\n\t" + "fschg\n" + : [in] "+&r" ((uint32_t) src), [scratch] "=&d" (double_scratch), [out] "+&r" ((uint32_t) dest) + : + : "t", "memory" // clobbers + ); +#else + *dest = *src; +#endif +} + #define swapVertex(a, b) \ do { \ - Vertex c = *a; \ - *a = *b; \ - *b = c; \ + Vertex __attribute__((aligned(32))) c; \ + memcpy_vertex(&c, a); \ + memcpy_vertex(a, b); \ + memcpy_vertex(b, &c); \ } while(0) /* ClipVertex doesn't have room for these, so we need to parse them diff --git a/samples/zclip_triangle/main.c b/samples/zclip_triangle/main.c index 17f4887..92482ab 100644 --- a/samples/zclip_triangle/main.c +++ b/samples/zclip_triangle/main.c @@ -86,12 +86,13 @@ void DrawGLScene() rotation = (rotation > 360.0f) ? rotation - 360.0f : rotation; glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer + glClearColor(0.5f, 0.5f, 0.5f, 0.5f); glLoadIdentity(); // Reset The View glDisable(GL_CULL_FACE); glPushMatrix(); - glTranslatef(0.0f, -1.0f, movement); + glTranslatef(0.0f, -1.0f, -movement); glRotatef(rotation, 0.0f, 1.0f, 0.0f); glBegin(GL_TRIANGLES); diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp new file mode 100644 index 0000000..9cd2037 --- /dev/null +++ b/tests/zclip/main.cpp @@ -0,0 +1,625 @@ + +#include +#include +#include +#include +#include + +#define SQ_BASE_ADDRESS 0 +#define SPAN_SORT_CFG 0 +#define PVR_SET(x, y) (void)(x); (void)(y) + +struct Vertex { + uint32_t flags; + float xyz[3]; + float uv[2]; + float w; + uint8_t bgra[4]; +}; + +struct { + float hwidth; + float x_plus_hwidth; + float hheight; + float y_plus_hheight; +} VIEWPORT = {320, 320, 240, 240}; + + +struct VideoMode { + float height; +}; + +static VideoMode* GetVideoMode() { + static VideoMode mode = {320.0f}; + return &mode; +} + +enum GPUCommand { + GPU_CMD_POLYHDR = 0x80840000, + GPU_CMD_VERTEX = 0xe0000000, + GPU_CMD_VERTEX_EOL = 0xf0000000, + GPU_CMD_USERCLIP = 0x20000000, + GPU_CMD_MODIFIER = 0x80000000, + GPU_CMD_SPRITE = 0xA0000000 +}; + +static std::vector sent; + +static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) { + const static uint32_t MASK1 = 0x00FF00FF; + const static uint32_t MASK2 = 0xFF00FF00; + + const uint32_t f2 = 256 * t; + const uint32_t f1 = 256 - f2; + + *out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) | + (((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2); +} + +static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { + /* Clipping time! */ + const float d0 = v1->w + v1->xyz[2]; + const float d1 = v2->w + v2->xyz[2]; + const float sign = ((2.0f * (d1 < d0)) - 1.0f); + const float epsilon = -0.00001f * sign; + const float n = (d0 - d1); + const float r = (1.f / sqrtf(n * n)) * sign; + float t = fmaf(r, d0, epsilon); + + vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); + vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); + vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); + vout->w = fmaf(v2->w - v1->w, t, v1->w); + + vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); + vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); + + interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra); +} + +bool glIsVertex(const uint32_t flags) { + return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; +} + +bool glIsLastVertex(const uint32_t flags) { + return flags == GPU_CMD_VERTEX_EOL; +} + +void _glSubmitHeaderOrVertex(volatile uint32_t*, Vertex* vtx) { + sent.push_back(*vtx); +} + +float _glFastInvert(float x) { + return (1.f / __builtin_sqrtf(x * x)); +} + +void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { + const float f = _glFastInvert(vertex->w); + + /* Convert to NDC and apply viewport */ + vertex->xyz[0] = __builtin_fmaf( + VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth + ); + + vertex->xyz[1] = h - __builtin_fmaf( + VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight + ); + + /* Orthographic projections need to use invZ otherwise we lose + the depth information. As w == 1, and clip-space range is -w to +w + we add 1.0 to the Z to bring it into range. We add a little extra to + avoid a divide by zero. + */ + + vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; +} + + +void memcpy_vertex(Vertex* dst, Vertex* src) { + *dst = *src; +} + +/* Zclipping is so difficult to get right, that self sample tests all the cases of clipping and makes sure that things work as expected */ + +#ifdef __DREAMCAST__ +static volatile int *pvrdmacfg = (int*)0xA05F6888; +static volatile int *qacr = (int*)0xFF000038; +#else +static int pvrdmacfg[2]; +static int qacr[2]; +#endif + +void SceneListSubmit(void* src, int n) { + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) { + return; + } + + const float h = GetVideoMode()->height; + + PVR_SET(SPAN_SORT_CFG, 0x0); + + //Set PVR DMA registers + pvrdmacfg[0] = 1; + pvrdmacfg[1] = 0; + + //Set QACR registers + qacr[1] = qacr[0] = 0x11; + + volatile uint32_t *d = SQ_BASE_ADDRESS; + + int8_t queue_head = 0; + int8_t queue_tail = 0; + + Vertex __attribute__((aligned(32))) queue[5]; + const int queue_capacity = sizeof(queue) / sizeof(Vertex); + + Vertex* vertex = (Vertex*) src; + uint32_t visible_mask = 0; + + /* Assume first entry is a header */ + _glSubmitHeaderOrVertex(d, vertex++); + + /* Push first 2 vertices of the strip */ + memcpy_vertex(&queue[0], vertex++); + memcpy_vertex(&queue[1], vertex++); + visible_mask = ((queue[0].xyz[2] >= -queue[0].w) << 1) | ((queue[1].xyz[2] >= -queue[1].w) << 2); + queue_tail = 2; + n -= 3; + + while(n--) { + Vertex* self = &queue[queue_tail]; + memcpy_vertex(self, vertex++); + visible_mask = (visible_mask >> 1) | ((self->xyz[2] >= -self->w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; + + switch(visible_mask) { + case 0: + queue_head = (queue_head + 1) % queue_capacity; + continue; + break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + + if(glIsLastVertex(self->flags)) { + /* If this was the last vertex in the strip, we clear the + * triangle out */ + while(queue_head != queue_tail) { + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + queue_head = (queue_head + 1) % queue_capacity; + } + + visible_mask = 0; + } + break; + case 1: + /* First vertex was visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + a.flags = GPU_CMD_VERTEX; + + /* If v2 was the last in the strip, then b should be. If it wasn't + we'll create a degenerate triangle by adding b twice in a row so that the + strip processing will continue correctly after crossing the plane so it can + cross back*/ + b.flags = v2->flags; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v1, v2, &a); + _glClipEdge(v2, v0, &b); + + a.flags = v2->flags; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + /* But skip the vertices that are already there */ + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex v2 = queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(&v2, v0, &a); + _glClipEdge(v1, &v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glPerspectiveDivideVertex(&v2, h); + _glSubmitHeaderOrVertex(d, &v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b, c; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v1, v2, &b); + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + uint32_t v2_flags = v2->flags; + v2->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + v2->flags = v2_flags; + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) a, b; // Scratch vertices + Vertex* v0 = &queue[queue_head]; + Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; + + _glClipEdge(v0, v1, &a); + _glClipEdge(v2, v0, &b); + + a.flags = GPU_CMD_VERTEX; + b.flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(&a, h); + _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(v1, h); + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(&b, h); + _glSubmitHeaderOrVertex(d, &b); + + _glSubmitHeaderOrVertex(d, v1); + + _glPerspectiveDivideVertex(v2, h); + _glSubmitHeaderOrVertex(d, v2); + + queue_head = (queue_head + 3) % queue_capacity; + visible_mask = 0; + } + break; + default: + break; + } + + /* Submit the beginning of the next strip (2 verts, maybe a header) */ + int8_t v = 0; + while(v < 2 && n > 1) { + if(!glIsVertex(vertex->flags)) { + _glSubmitHeaderOrVertex(d, vertex); + } else { + memcpy_vertex(&queue[queue_tail], vertex++); + visible_mask = (visible_mask >> 1) | ((queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2); // Push new vertex + queue_tail = (queue_tail + 1) % queue_capacity; + ++v; + } + --n; + } + + } +} + + +struct VertexTmpl { + VertexTmpl(float x, float y, float z, float w): + x(x), y(y), z(z), w(w) {} + + float x, y, z, w; +}; + +std::vector make_vertices(const std::vector& verts) { + std::vector result; + Vertex r; + + r.flags = GPU_CMD_POLYHDR; + result.push_back(r); + + for(auto& v: verts) { + r.flags = GPU_CMD_VERTEX; + r.xyz[0] = v.x; + r.xyz[1] = v.y; + r.xyz[2] = v.z; + r.uv[0] = 0.0f; + r.uv[1] = 0.0f; + r.w = v.w; + + result.push_back(r); + } + + result.back().flags = GPU_CMD_VERTEX_EOL; + return result; +} + +template +void check_equal(const T& lhs, const U& rhs) { + if(lhs != rhs) { + throw std::runtime_error("Assertion failed"); + } +} + +template<> +void check_equal(const Vertex& lhs, const Vertex& rhs) { + if(lhs.xyz[0] != rhs.xyz[0] || + lhs.xyz[1] != rhs.xyz[1] || + lhs.xyz[2] != rhs.xyz[2] || + lhs.w != rhs.w) { + throw std::runtime_error("Assertion failed"); + } +} + + +bool test_clip_case_001() { + /* The first vertex is visible only */ + sent.clear(); + + auto data = make_vertices({ + {0.000000, -2.414213, 3.080808, 5.000000}, + {-4.526650, -2.414213, -7.121212, -5.000000}, + {4.526650, -2.414213, -7.121212, -5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 5); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + + // Because we're sending a single triangle, we end up sending a + // degenerate final vert. But if we were sending more than one triangle + // this would be GPU_CMD_VERTEX twice + check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[3], sent[4]); + return true; +} + +bool test_clip_case_010() { + /* The third vertex is visible only */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, -7.121212, -5.000000}, + {0.000000, -2.414213, 3.080808, 5.000000}, + {4.526650, -2.414213, -7.121212, -5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 4); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL); + return true; +} + +bool test_clip_case_100() { + /* The third vertex is visible only */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, -7.121212, -5.000000}, + {4.526650, -2.414213, -7.121212, -5.000000}, + {0.000000, -2.414213, 3.080808, 5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 5); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + + // Because we're sending a single triangle, we end up sending a + // degenerate final vert. But if we were sending more than one triangle + // this would be GPU_CMD_VERTEX twice + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[1], sent[2]); + return true; +} + +bool test_clip_case_110() { + /* 2nd and 3rd visible */ + sent.clear(); + + auto data = make_vertices({ + {0.0, -2.414213, -7.121212, -5.000000}, + {-4.526650, -2.414213, 3.080808, 5.000000}, + {4.526650, -2.414213, 3.080808, 5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 6); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX); + check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[2], sent[4]); + return true; +} + +bool test_clip_case_011() { + /* 1st and 2nd visible */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, 3.080808, 5.000000}, + {4.526650, -2.414213, 3.080808, 5.000000}, + {0.0, -2.414213, -7.121212, -5.000000} + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 6); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX); + check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[2], sent[4]); + return true; +} + +bool test_clip_case_101() { + /* 1st and 3rd visible */ + sent.clear(); + + auto data = make_vertices({ + {-4.526650, -2.414213, 3.080808, 5.000000}, + {0.0, -2.414213, -7.121212, -5.000000}, + {4.526650, -2.414213, 3.080808, 5.000000}, + }); + + SceneListSubmit(&data[0], data.size()); + + check_equal(sent.size(), 6); + check_equal(sent[0].flags, GPU_CMD_POLYHDR); + check_equal(sent[1].flags, GPU_CMD_VERTEX); + check_equal(sent[2].flags, GPU_CMD_VERTEX); + check_equal(sent[3].flags, GPU_CMD_VERTEX); + check_equal(sent[4].flags, GPU_CMD_VERTEX); + check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL); + check_equal(sent[3], sent[5]); + return true; +} + +bool test_start_behind() { + /* Triangle behind the plane, but the strip continues in front */ + sent.clear(); + + auto data = make_vertices({ + {-3.021717, -2.414213, -10.155344, -9.935254}, + {5.915236, -2.414213, -9.354721, -9.136231}, + {-5.915236, -2.414213, -0.264096, -0.063767}, + {3.021717, -2.414213, 0.536527, 0.735255}, + {-7.361995, -2.414213, 4.681529, 4.871976}, + {1.574958, -2.414213, 5.482152, 5.670999}, + }); + + SceneListSubmit(&data[0], data.size()); + + return true; +} + +int main(int argc, char* argv[]) { + // test_clip_case_000(); + test_clip_case_001(); + test_clip_case_010(); + test_clip_case_100(); + test_clip_case_110(); + test_clip_case_011(); + test_clip_case_101(); + // test_clip_case_111(); + + test_start_behind(); + + return 0; +}