diff --git a/GL/clip.c b/GL/clip.c index 4573afe..ed051d8 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -251,7 +251,7 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { */ #define _VERT_VISIBLE(v) \ - (v->w >= 0 && v->xyz[2] >= -v->w) \ + (v->xyz[2] > -v->w) \ uint8_t visible = ( (_VERT_VISIBLE(v1) ? 4 : 0) | diff --git a/GL/draw.c b/GL/draw.c index ee822d3..1caa2ed 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -933,16 +933,6 @@ static void transform(SubmissionTarget* target) { TransformVertices(vertex, target->count); } -static void clip(SubmissionTarget* target) { - TRACE(); - - /* Perform clipping, generating new vertices as necessary */ - _glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT); - - /* Reset the count now that we may have added vertices */ - target->count = target->output->vector.size - target->start_offset; -} - static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { const uint8_t* dataIn = (const uint8_t*) xyz; uint8_t* dataOut = (uint8_t*) xyzOut; @@ -1172,39 +1162,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL transform(target); } - if(_glIsClippingEnabled()) { -#if DEBUG_CLIPPING - uint32_t i = 0; - fprintf(stderr, "=========\n"); - - for(i = 0; i < target->count; ++i) { - Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i); - if(v->flags == 0xe0000000 || v->flags == 0xf0000000) { - fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags); - } else { - fprintf(stderr, "%x\n", *((uint32_t*)v)); - } - } -#endif - - clip(target); - - assert(extras.size == target->count); - -#if DEBUG_CLIPPING - fprintf(stderr, "--------\n"); - for(i = 0; i < target->count; ++i) { - Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i); - if(v->flags == 0xe0000000 || v->flags == 0xf0000000) { - fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags); - } else { - fprintf(stderr, "%x\n", *((uint32_t*)v)); - } - } -#endif - - } - push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); /* diff --git a/GL/flush.c b/GL/flush.c index bd5602a..31f311e 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -88,108 +88,19 @@ void APIENTRY glKosInit() { glKosInitEx(&config); } -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -GL_FORCE_INLINE bool glIsVertex(const float flags) { - return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; -} - - -GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) { - TRACE(); - - /* Perform perspective divide on each vertex */ - Vertex* vertex = (Vertex*) src; - PREFETCH(vertex + 1); - - const float h = GetVideoMode()->height; - - while(n--) { - PREFETCH(vertex + 2); - - if(likely(glIsVertex(vertex->flags))) { - const float f = MATH_Fast_Invert(vertex->w); - - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = __builtin_fmaf( - VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth - ); - - vertex->xyz[1] = h - __builtin_fmaf( - VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight - ); - - /* Orthographic projections need to use invZ otherwise we lose - the depth information. As w == 1, and clip-space range is -w to +w - we add 1.0 to the Z to bring it into range. We add a little extra to - avoid a divide by zero. - */ - if(unlikely(vertex->w == 1.0f)) { - vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]); - } else { - vertex->xyz[2] = f; - } - } - - ++vertex; - } -} - -GL_FORCE_INLINE void glPerspectiveDivideFastMode(void* src, uint32_t n) { - TRACE(); - - /* Perform perspective divide on each vertex */ - Vertex* vertex = (Vertex*) src; - - const float h = GetVideoMode()->height; - - while(n--) { - PREFETCH(vertex + 1); - - if(likely(glIsVertex(vertex->flags))) { - const float f = MATH_Fast_Invert(vertex->w); - - /* Convert to NDC and apply viewport */ - vertex->xyz[0] = MATH_fmac( - VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth - ); - - vertex->xyz[1] = h - MATH_fmac( - VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight - ); - - vertex->xyz[2] = f; - } - - ++vertex; - } -} - -GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) { -#if FAST_MODE - glPerspectiveDivideFastMode(src, n); -#else - glPerspectiveDivideStandard(src, n); -#endif -} - void APIENTRY glKosSwapBuffers() { TRACE(); SceneBegin(); SceneListBegin(GPU_LIST_OP_POLY); - glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size); SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size); SceneListFinish(); SceneListBegin(GPU_LIST_PT_POLY); - glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size); SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size); SceneListFinish(); SceneListBegin(GPU_LIST_TR_POLY); - glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size); SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size); SceneListFinish(); SceneFinish(); @@ -199,4 +110,4 @@ void APIENTRY glKosSwapBuffers() { aligned_vector_clear(&TR_LIST.vector); _glApplyScissor(true); -} +} \ No newline at end of file diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 23d8c7a..321108a 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -8,6 +8,18 @@ #define PVR_VERTEX_BUF_SIZE 2560 * 256 +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +GL_FORCE_INLINE bool glIsVertex(const float flags) { + return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; +} + +GL_FORCE_INLINE bool glIsLastVertex(const float flags) { + return flags == GPU_CMD_VERTEX_EOL; +} + + void InitGPU(_Bool autosort, _Bool fsaa) { pvr_init_params_t params = { /* Enable opaque and translucent polygons with size 32 and 32 */ @@ -32,25 +44,295 @@ void SceneListBegin(GPUList list) { pvr_list_begin(list); } -void SceneListSubmit(void* src, int n) { - uint32_t *d = (uint32_t*) TA_SQ_ADDR; - uint32_t *s = src; +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { + const float f = MATH_Fast_Invert(vertex->w); - /* fill/write queues as many times necessary */ - while(n--) { - __asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */ - d[0] = *(s++); - d[1] = *(s++); - d[2] = *(s++); - d[3] = *(s++); - d[4] = *(s++); - d[5] = *(s++); - d[6] = *(s++); - d[7] = *(s++); - __asm__("pref @%0" : : "r"(d)); - d += 8; + /* Convert to NDC and apply viewport */ + vertex->xyz[0] = __builtin_fmaf( + VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth + ); + + vertex->xyz[1] = h - __builtin_fmaf( + VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight + ); + + /* Orthographic projections need to use invZ otherwise we lose + the depth information. As w == 1, and clip-space range is -w to +w + we add 1.0 to the Z to bring it into range. We add a little extra to + avoid a divide by zero. + */ + if(unlikely(vertex->w == 1.0f)) { + vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]); + } else { + vertex->xyz[2] = f; } +} +static uint32_t *d; // SQ target + +GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { + uint32_t *s = (uint32_t*) v; + __asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */ + d[0] = *(s++); + d[1] = *(s++); + d[2] = *(s++); + d[3] = *(s++); + d[4] = *(s++); + d[5] = *(s++); + d[6] = *(s++); + d[7] = *(s++); + __asm__("pref @%0" : : "r"(d)); + d += 8; +} + +static struct { + Vertex* v; + int visible; +} triangle[3]; + +static int tri_count = 0; + + +GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { + /* Clipping time! */ + const float d0 = v1->w + v1->xyz[2]; + const float d1 = v2->w + v2->xyz[2]; + + float t = MATH_Fast_Divide(d0, (d0 - d1)); + + vout->xyz[0] = MATH_fmac(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); + vout->xyz[1] = MATH_fmac(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); + vout->xyz[2] = MATH_fmac(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); + vout->w = MATH_fmac(v2->w - v1->w, t, v1->w); + + vout->uv[0] = MATH_fmac(v2->uv[0] - v1->uv[0], t, v1->uv[0]); + vout->uv[1] = MATH_fmac(v2->uv[1] - v1->uv[1], t, v1->uv[1]); + + vout->bgra[0] = 0xFF; + vout->bgra[1] = 0xFF; + vout->bgra[2] = 0xFF; + vout->bgra[3] = 0xFF; +} + +GL_FORCE_INLINE void ClearTriangle() { + tri_count = 0; +} + +GL_FORCE_INLINE void ShiftTriangle() { + tri_count--; + triangle[0] = triangle[1]; + triangle[1] = triangle[2]; + +#ifndef NDEBUG + triangle[2].v = NULL; + triangle[2].visible = false; +#endif +} + +void SceneListSubmit(void* src, int n) { + /* Do everything, everywhere, all at once */ + + /* Prep store queues */ + d = (uint32_t*) TA_SQ_ADDR; + + /* Perform perspective divide on each vertex */ + Vertex* vertex = (Vertex*) src; + + const float h = GetVideoMode()->height; + + tri_count = 0; + + int strip_count = 0; + + for(int i = 0; i < n; ++i) { + PREFETCH(vertex + 1); + + bool is_last_in_strip = glIsLastVertex(vertex->flags); + + /* Wait until we fill the triangle */ + if(tri_count < 3) { + if(likely(glIsVertex(vertex->flags))) { + triangle[tri_count].v = vertex; + triangle[tri_count].visible = vertex->w > 0 && vertex->xyz[2] > -vertex->w; + tri_count++; + strip_count++; + } else { + /* We hit a header */ + tri_count = 0; + strip_count = 0; + _glSubmitHeaderOrVertex(vertex); + } + + if(tri_count < 3) { + ++vertex; + continue; + } + } + + /* If we got here, then triangle contains 3 vertices */ + int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); + if(visible_mask == 7) { + /* All the vertices are visible! We divide and submit v0, then shift */ + _glPerspectiveDivideVertex(triangle[0].v, h); + _glSubmitHeaderOrVertex(triangle[0].v); + } else if(!visible_mask) { + /* None visible, just shift for the next in the strip */ + } else { + /* Clipping time! + + There are 6 distinct possibilities when clipping a triangle. 3 of them result + in another triangle, 3 of them result in a quadrilateral. + + Assuming you iterate the edges of the triangle in order, and create a new *visible* + vertex when you cross the plane, and discard vertices behind the plane, then the only + difference between the two cases is that the final two vertices that need submitting have + to be reversed. + + Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may + be used in a subsequent triangle in the strip and would end up being double divided. + */ + + Vertex tmp0, tmp1, tmp2, tmp3; + + switch(visible_mask) { + case 1: { + /* 0, 0a, 2a */ + tmp0 = *triangle[0].v; + _glClipEdge(triangle[0].v, triangle[1].v, &tmp1); + _glClipEdge(triangle[2].v, triangle[0].v, &tmp2); + + _glPerspectiveDivideVertex(&tmp0, h); + _glPerspectiveDivideVertex(&tmp1, h); + _glPerspectiveDivideVertex(&tmp2, h); + + tmp0.flags = tmp1.flags = GPU_CMD_VERTEX; + tmp2.flags = GPU_CMD_VERTEX_EOL; + + _glSubmitHeaderOrVertex(&tmp0); + _glSubmitHeaderOrVertex(&tmp1); + _glSubmitHeaderOrVertex(&tmp2); + } break; + case 2: { + /* 0a, 1, 1a */ + _glClipEdge(triangle[0].v, triangle[1].v, &tmp0); + tmp1 = *triangle[1].v; + _glClipEdge(triangle[1].v, triangle[2].v, &tmp2); + + _glPerspectiveDivideVertex(&tmp0, h); + _glPerspectiveDivideVertex(&tmp1, h); + _glPerspectiveDivideVertex(&tmp2, h); + + tmp0.flags = tmp1.flags = GPU_CMD_VERTEX; + tmp2.flags = GPU_CMD_VERTEX_EOL; + + _glSubmitHeaderOrVertex(&tmp0); + _glSubmitHeaderOrVertex(&tmp1); + _glSubmitHeaderOrVertex(&tmp2); + } break; + case 3: { + /* 0, 1, 2a, 1a */ + tmp0 = *triangle[0].v; + tmp1 = *triangle[1].v; + _glClipEdge(triangle[2].v, triangle[0].v, &tmp2); + _glClipEdge(triangle[1].v, triangle[2].v, &tmp3); + + _glPerspectiveDivideVertex(&tmp0, h); + _glPerspectiveDivideVertex(&tmp1, h); + _glPerspectiveDivideVertex(&tmp2, h); + _glPerspectiveDivideVertex(&tmp3, h); + + tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX; + tmp3.flags = GPU_CMD_VERTEX_EOL; + + _glSubmitHeaderOrVertex(&tmp0); + _glSubmitHeaderOrVertex(&tmp1); + _glSubmitHeaderOrVertex(&tmp2); + _glSubmitHeaderOrVertex(&tmp3); + } break; + case 4: { + /* 1a, 2, 2a */ + _glClipEdge(triangle[1].v, triangle[2].v, &tmp0); + tmp1 = *triangle[2].v; + _glClipEdge(triangle[2].v, triangle[0].v, &tmp2); + + _glPerspectiveDivideVertex(&tmp0, h); + _glPerspectiveDivideVertex(&tmp1, h); + _glPerspectiveDivideVertex(&tmp2, h); + + tmp0.flags = tmp1.flags = GPU_CMD_VERTEX; + tmp2.flags = GPU_CMD_VERTEX_EOL; + + _glSubmitHeaderOrVertex(&tmp0); + _glSubmitHeaderOrVertex(&tmp1); + _glSubmitHeaderOrVertex(&tmp2); + } break; + case 5: { + /* 0, 0a, 2, 1a */ + tmp0 = *triangle[0].v; + _glClipEdge(triangle[0].v, triangle[1].v, &tmp1); + tmp2 = *triangle[2].v; + _glClipEdge(triangle[1].v, triangle[2].v, &tmp3); + + _glPerspectiveDivideVertex(&tmp0, h); + _glPerspectiveDivideVertex(&tmp1, h); + _glPerspectiveDivideVertex(&tmp2, h); + _glPerspectiveDivideVertex(&tmp3, h); + + tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX; + tmp3.flags = GPU_CMD_VERTEX_EOL; + + _glSubmitHeaderOrVertex(&tmp0); + _glSubmitHeaderOrVertex(&tmp1); + _glSubmitHeaderOrVertex(&tmp2); + _glSubmitHeaderOrVertex(&tmp3); + } break; + case 6: { + /* 0a, 1, 2a, 2 */ + _glClipEdge(triangle[0].v, triangle[1].v, &tmp0); + tmp1 = *triangle[1].v; + _glClipEdge(triangle[2].v, triangle[0].v, &tmp2); + tmp3 = *triangle[2].v; + + _glPerspectiveDivideVertex(&tmp0, h); + _glPerspectiveDivideVertex(&tmp1, h); + _glPerspectiveDivideVertex(&tmp2, h); + _glPerspectiveDivideVertex(&tmp3, h); + + tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX; + tmp3.flags = GPU_CMD_VERTEX_EOL; + + _glSubmitHeaderOrVertex(&tmp0); + _glSubmitHeaderOrVertex(&tmp1); + _glSubmitHeaderOrVertex(&tmp2); + _glSubmitHeaderOrVertex(&tmp3); + } break; + default: + break; + } + + /* If this was the last in the strip, we don't need to + submit anything else, we just wipe the tri_count */ + if(is_last_in_strip) { + tri_count = 0; + strip_count = 0; + } + } + + /* If this was the last vertex in the strip, we're done with the + strip so we need to wipe out the tri_count */ + ShiftTriangle(); + + if(is_last_in_strip) { + for(int i = 0; i < tri_count; ++i) { + if(triangle[i].visible) { + _glPerspectiveDivideVertex(triangle[i].v, h); + _glSubmitHeaderOrVertex(triangle[i].v); + } + } + ClearTriangle(); + } + ++vertex; + } /* Wait for both store queues to complete */ d = (uint32_t *)0xe0000000; d[0] = d[8] = 0; diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 9e518f1..c8cedf7 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -8,6 +8,8 @@ #include #include "../types.h" +#include "../private.h" + #include "sh4_math.h" #ifndef NDEBUG