Move clipping into list submission

2022-06-09 13:07:51 +01:00 · 2022-06-09 13:07:51 +01:00 · dbb94d0cb9
commit dbb94d0cb9
parent 193f0bdc49
5 changed files with 302 additions and 150 deletions
--- a/GL/clip.c
+++ b/GL/clip.c
@ -251,7 +251,7 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
         */

 #define _VERT_VISIBLE(v) \
-    (v->w >= 0 && v->xyz[2] >= -v->w) \
+    (v->xyz[2] > -v->w) \

        uint8_t visible = (
            (_VERT_VISIBLE(v1) ? 4 : 0) |
--- a/GL/draw.c
+++ b/GL/draw.c
@ -933,16 +933,6 @@ static void transform(SubmissionTarget* target) {
    TransformVertices(vertex, target->count);
 }

-static void clip(SubmissionTarget* target) {
-    TRACE();
-
-    /* Perform clipping, generating new vertices as necessary */
-    _glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT);
-
-    /* Reset the count now that we may have added vertices */
-    target->count = target->output->vector.size - target->start_offset;
-}
-
 static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
    const uint8_t* dataIn = (const uint8_t*) xyz;
    uint8_t* dataOut = (uint8_t*) xyzOut;
@ -1172,39 +1162,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
        transform(target);
    }

-    if(_glIsClippingEnabled()) {
-#if DEBUG_CLIPPING
-        uint32_t i = 0;
-        fprintf(stderr, "=========\n");
-
-        for(i = 0; i < target->count; ++i) {
-            Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i);
-            if(v->flags == 0xe0000000 || v->flags == 0xf0000000) {
-                fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags);
-            } else {
-                fprintf(stderr, "%x\n", *((uint32_t*)v));
-            }
-        }
-#endif
-
-        clip(target);
-
-        assert(extras.size == target->count);
-
-#if DEBUG_CLIPPING
-        fprintf(stderr, "--------\n");
-        for(i = 0; i < target->count; ++i) {
-            Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i);
-            if(v->flags == 0xe0000000 || v->flags == 0xf0000000) {
-                fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags);
-            } else {
-                fprintf(stderr, "%x\n", *((uint32_t*)v));
-            }
-        }
-#endif
-
-    }
-
    push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);

    /*
--- a/GL/flush.c
+++ b/GL/flush.c
@ -88,108 +88,19 @@ void APIENTRY glKosInit() {
    glKosInitEx(&config);
 }

-#define likely(x)      __builtin_expect(!!(x), 1)
-#define unlikely(x)    __builtin_expect(!!(x), 0)
-
-GL_FORCE_INLINE bool glIsVertex(const float flags) {
-    return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
-}
-
-
-GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) {
-    TRACE();
-
-    /* Perform perspective divide on each vertex */
-    Vertex* vertex = (Vertex*) src;
-    PREFETCH(vertex + 1);
-
-    const float h = GetVideoMode()->height;
-
-    while(n--) {
-        PREFETCH(vertex + 2);
-
-        if(likely(glIsVertex(vertex->flags))) {
-            const float f = MATH_Fast_Invert(vertex->w);
-
-            /* Convert to NDC and apply viewport */
-            vertex->xyz[0] = __builtin_fmaf(
-                VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
-            );
-
-            vertex->xyz[1] = h - __builtin_fmaf(
-                VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
-            );
-
-            /* Orthographic projections need to use invZ otherwise we lose
-            the depth information. As w == 1, and clip-space range is -w to +w
-            we add 1.0 to the Z to bring it into range. We add a little extra to
-            avoid a divide by zero.
-            */
-            if(unlikely(vertex->w == 1.0f)) {
-                vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
-            } else {
-                vertex->xyz[2] = f;
-            }
-        }
-
-        ++vertex;
-    }
-}
-
-GL_FORCE_INLINE void glPerspectiveDivideFastMode(void* src, uint32_t n) {
-    TRACE();
-
-    /* Perform perspective divide on each vertex */
-    Vertex* vertex = (Vertex*) src;
-
-    const float h = GetVideoMode()->height;
-
-    while(n--) {
-        PREFETCH(vertex + 1);
-
-        if(likely(glIsVertex(vertex->flags))) {
-            const float f = MATH_Fast_Invert(vertex->w);
-
-            /* Convert to NDC and apply viewport */
-            vertex->xyz[0] = MATH_fmac(
-                VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
-            );
-
-            vertex->xyz[1] = h - MATH_fmac(
-                VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
-            );
-
-            vertex->xyz[2] = f;
-        }
-
-        ++vertex;
-    }
-}
-
-GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) {
-#if FAST_MODE
-        glPerspectiveDivideFastMode(src, n);
-#else
-        glPerspectiveDivideStandard(src, n);
-#endif
-}
-
 void APIENTRY glKosSwapBuffers() {
    TRACE();

    SceneBegin();
        SceneListBegin(GPU_LIST_OP_POLY);
-        glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size);
        SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
        SceneListFinish();

        SceneListBegin(GPU_LIST_PT_POLY);
-        glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size);
        SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
        SceneListFinish();

        SceneListBegin(GPU_LIST_TR_POLY);
-        glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size);
        SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
        SceneListFinish();
    SceneFinish();
@ -199,4 +110,4 @@ void APIENTRY glKosSwapBuffers() {
    aligned_vector_clear(&TR_LIST.vector);

    _glApplyScissor(true);
-}
+}
--- a/GL/platforms/sh4.c
+++ b/GL/platforms/sh4.c
@ -8,6 +8,18 @@

 #define PVR_VERTEX_BUF_SIZE 2560 * 256

+#define likely(x)      __builtin_expect(!!(x), 1)
+#define unlikely(x)    __builtin_expect(!!(x), 0)
+
+GL_FORCE_INLINE bool glIsVertex(const float flags) {
+    return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
+}
+
+GL_FORCE_INLINE bool glIsLastVertex(const float flags) {
+    return flags == GPU_CMD_VERTEX_EOL;
+}
+
+
 void InitGPU(_Bool autosort, _Bool fsaa) {
    pvr_init_params_t params = {
        /* Enable opaque and translucent polygons with size 32 and 32 */
@ -32,25 +44,295 @@ void SceneListBegin(GPUList list) {
    pvr_list_begin(list);
 }

-void SceneListSubmit(void* src, int n) {
-    uint32_t *d = (uint32_t*) TA_SQ_ADDR;
-    uint32_t *s = src;
+GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
+    const float f = MATH_Fast_Invert(vertex->w);

-    /* fill/write queues as many times necessary */
-    while(n--) {
-        __asm__("pref @%0" : : "r"(s + 8));  /* prefetch 32 bytes for next loop */
-        d[0] = *(s++);
-        d[1] = *(s++);
-        d[2] = *(s++);
-        d[3] = *(s++);
-        d[4] = *(s++);
-        d[5] = *(s++);
-        d[6] = *(s++);
-        d[7] = *(s++);
-        __asm__("pref @%0" : : "r"(d));
-        d += 8;
+    /* Convert to NDC and apply viewport */
+    vertex->xyz[0] = __builtin_fmaf(
+        VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
+    );
+
+    vertex->xyz[1] = h - __builtin_fmaf(
+        VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
+    );
+
+    /* Orthographic projections need to use invZ otherwise we lose
+    the depth information. As w == 1, and clip-space range is -w to +w
+    we add 1.0 to the Z to bring it into range. We add a little extra to
+    avoid a divide by zero.
+    */
+    if(unlikely(vertex->w == 1.0f)) {
+        vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
+    } else {
+        vertex->xyz[2] = f;
    }
+}

+static uint32_t *d;  // SQ target
+
+GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
+    uint32_t *s = (uint32_t*) v;
+    __asm__("pref @%0" : : "r"(s + 8));  /* prefetch 32 bytes for next loop */
+    d[0] = *(s++);
+    d[1] = *(s++);
+    d[2] = *(s++);
+    d[3] = *(s++);
+    d[4] = *(s++);
+    d[5] = *(s++);
+    d[6] = *(s++);
+    d[7] = *(s++);
+    __asm__("pref @%0" : : "r"(d));
+    d += 8;
+}
+
+static struct {
+    Vertex* v;
+    int visible;
+} triangle[3];
+
+static int tri_count = 0;
+
+
+GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
+    /* Clipping time! */
+    const float d0 = v1->w + v1->xyz[2];
+    const float d1 = v2->w + v2->xyz[2];
+
+    float t = MATH_Fast_Divide(d0, (d0 - d1));
+
+    vout->xyz[0] = MATH_fmac(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
+    vout->xyz[1] = MATH_fmac(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
+    vout->xyz[2] = MATH_fmac(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
+    vout->w = MATH_fmac(v2->w - v1->w, t, v1->w);
+
+    vout->uv[0] = MATH_fmac(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
+    vout->uv[1] = MATH_fmac(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
+
+    vout->bgra[0] = 0xFF;
+    vout->bgra[1] = 0xFF;
+    vout->bgra[2] = 0xFF;
+    vout->bgra[3] = 0xFF;
+}
+
+GL_FORCE_INLINE void ClearTriangle() {
+    tri_count = 0;
+}
+
+GL_FORCE_INLINE void ShiftTriangle() {
+    tri_count--;
+    triangle[0] = triangle[1];
+    triangle[1] = triangle[2];
+
+#ifndef NDEBUG
+    triangle[2].v = NULL;
+    triangle[2].visible = false;
+#endif
+}
+
+void SceneListSubmit(void* src, int n) {
+    /* Do everything, everywhere, all at once */
+
+    /* Prep store queues */
+    d = (uint32_t*) TA_SQ_ADDR;
+
+    /* Perform perspective divide on each vertex */
+    Vertex* vertex = (Vertex*) src;
+
+    const float h = GetVideoMode()->height;
+
+    tri_count = 0;
+
+    int strip_count = 0;
+
+    for(int i = 0; i < n; ++i) {
+        PREFETCH(vertex + 1);
+
+        bool is_last_in_strip = glIsLastVertex(vertex->flags);
+
+        /* Wait until we fill the triangle */
+        if(tri_count < 3) {
+            if(likely(glIsVertex(vertex->flags))) {
+                triangle[tri_count].v = vertex;
+                triangle[tri_count].visible = vertex->w > 0 && vertex->xyz[2] > -vertex->w;
+                tri_count++;
+                strip_count++;
+            } else {
+                /* We hit a header */
+                tri_count = 0;
+                strip_count = 0;
+                _glSubmitHeaderOrVertex(vertex);
+            }
+
+            if(tri_count < 3) {
+                ++vertex;
+                continue;
+            }
+        }
+
+        /* If we got here, then triangle contains 3 vertices */
+        int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
+        if(visible_mask == 7) {
+            /* All the vertices are visible! We divide and submit v0, then shift */
+            _glPerspectiveDivideVertex(triangle[0].v, h);
+            _glSubmitHeaderOrVertex(triangle[0].v);
+        } else if(!visible_mask) {
+            /* None visible, just shift for the next in the strip */
+        } else {
+            /* Clipping time!
+
+                There are 6 distinct possibilities when clipping a triangle. 3 of them result
+                in another triangle, 3 of them result in a quadrilateral.
+
+                Assuming you iterate the edges of the triangle in order, and create a new *visible*
+                vertex when you cross the plane, and discard vertices behind the plane, then the only
+                difference between the two cases is that the final two vertices that need submitting have
+                to be reversed.
+
+                Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
+                be used in a subsequent triangle in the strip and would end up being double divided.
+            */
+
+            Vertex tmp0, tmp1, tmp2, tmp3;
+
+            switch(visible_mask) {
+                case 1: {
+                    /* 0, 0a, 2a */
+                    tmp0 = *triangle[0].v;
+                    _glClipEdge(triangle[0].v, triangle[1].v, &tmp1);
+                    _glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
+
+                    _glPerspectiveDivideVertex(&tmp0, h);
+                    _glPerspectiveDivideVertex(&tmp1, h);
+                    _glPerspectiveDivideVertex(&tmp2, h);
+
+                    tmp0.flags = tmp1.flags = GPU_CMD_VERTEX;
+                    tmp2.flags = GPU_CMD_VERTEX_EOL;
+
+                    _glSubmitHeaderOrVertex(&tmp0);
+                    _glSubmitHeaderOrVertex(&tmp1);
+                    _glSubmitHeaderOrVertex(&tmp2);
+                } break;
+                case 2: {
+                    /* 0a, 1, 1a */
+                    _glClipEdge(triangle[0].v, triangle[1].v, &tmp0);
+                    tmp1 = *triangle[1].v;
+                    _glClipEdge(triangle[1].v, triangle[2].v, &tmp2);
+
+                    _glPerspectiveDivideVertex(&tmp0, h);
+                    _glPerspectiveDivideVertex(&tmp1, h);
+                    _glPerspectiveDivideVertex(&tmp2, h);
+
+                    tmp0.flags = tmp1.flags = GPU_CMD_VERTEX;
+                    tmp2.flags = GPU_CMD_VERTEX_EOL;
+
+                    _glSubmitHeaderOrVertex(&tmp0);
+                    _glSubmitHeaderOrVertex(&tmp1);
+                    _glSubmitHeaderOrVertex(&tmp2);
+                } break;
+                case 3: {
+                    /* 0, 1, 2a, 1a */
+                    tmp0 = *triangle[0].v;
+                    tmp1 = *triangle[1].v;
+                    _glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
+                    _glClipEdge(triangle[1].v, triangle[2].v, &tmp3);
+
+                    _glPerspectiveDivideVertex(&tmp0, h);
+                    _glPerspectiveDivideVertex(&tmp1, h);
+                    _glPerspectiveDivideVertex(&tmp2, h);
+                    _glPerspectiveDivideVertex(&tmp3, h);
+
+                    tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX;
+                    tmp3.flags = GPU_CMD_VERTEX_EOL;
+
+                    _glSubmitHeaderOrVertex(&tmp0);
+                    _glSubmitHeaderOrVertex(&tmp1);
+                    _glSubmitHeaderOrVertex(&tmp2);
+                    _glSubmitHeaderOrVertex(&tmp3);
+                } break;
+                case 4: {
+                    /* 1a, 2, 2a */
+                    _glClipEdge(triangle[1].v, triangle[2].v, &tmp0);
+                    tmp1 = *triangle[2].v;
+                    _glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
+
+                    _glPerspectiveDivideVertex(&tmp0, h);
+                    _glPerspectiveDivideVertex(&tmp1, h);
+                    _glPerspectiveDivideVertex(&tmp2, h);
+
+                    tmp0.flags = tmp1.flags = GPU_CMD_VERTEX;
+                    tmp2.flags = GPU_CMD_VERTEX_EOL;
+
+                    _glSubmitHeaderOrVertex(&tmp0);
+                    _glSubmitHeaderOrVertex(&tmp1);
+                    _glSubmitHeaderOrVertex(&tmp2);
+                } break;
+                case 5: {
+                    /* 0, 0a, 2, 1a */
+                    tmp0 = *triangle[0].v;
+                    _glClipEdge(triangle[0].v, triangle[1].v, &tmp1);
+                    tmp2 = *triangle[2].v;
+                    _glClipEdge(triangle[1].v, triangle[2].v, &tmp3);
+
+                    _glPerspectiveDivideVertex(&tmp0, h);
+                    _glPerspectiveDivideVertex(&tmp1, h);
+                    _glPerspectiveDivideVertex(&tmp2, h);
+                    _glPerspectiveDivideVertex(&tmp3, h);
+
+                    tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX;
+                    tmp3.flags = GPU_CMD_VERTEX_EOL;
+
+                    _glSubmitHeaderOrVertex(&tmp0);
+                    _glSubmitHeaderOrVertex(&tmp1);
+                    _glSubmitHeaderOrVertex(&tmp2);
+                    _glSubmitHeaderOrVertex(&tmp3);
+                } break;
+                case 6: {
+                    /* 0a, 1, 2a, 2 */
+                    _glClipEdge(triangle[0].v, triangle[1].v, &tmp0);
+                    tmp1 = *triangle[1].v;
+                    _glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
+                    tmp3 = *triangle[2].v;
+
+                    _glPerspectiveDivideVertex(&tmp0, h);
+                    _glPerspectiveDivideVertex(&tmp1, h);
+                    _glPerspectiveDivideVertex(&tmp2, h);
+                    _glPerspectiveDivideVertex(&tmp3, h);
+
+                    tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX;
+                    tmp3.flags = GPU_CMD_VERTEX_EOL;
+
+                    _glSubmitHeaderOrVertex(&tmp0);
+                    _glSubmitHeaderOrVertex(&tmp1);
+                    _glSubmitHeaderOrVertex(&tmp2);
+                    _glSubmitHeaderOrVertex(&tmp3);
+                } break;
+                default:
+                break;
+            }
+
+            /* If this was the last in the strip, we don't need to
+            submit anything else, we just wipe the tri_count */
+            if(is_last_in_strip) {
+                tri_count = 0;
+                strip_count = 0;
+            }
+        }
+
+        /* If this was the last vertex in the strip, we're done with the
+        strip so we need to wipe out the tri_count */
+        ShiftTriangle();
+
+        if(is_last_in_strip) {
+            for(int i = 0; i < tri_count; ++i) {
+                if(triangle[i].visible) {
+                    _glPerspectiveDivideVertex(triangle[i].v, h);
+                    _glSubmitHeaderOrVertex(triangle[i].v);
+                }
+            }
+            ClearTriangle();
+        }
+        ++vertex;
+    }
    /* Wait for both store queues to complete */
    d = (uint32_t *)0xe0000000;
    d[0] = d[8] = 0;
--- a/GL/platforms/sh4.h
+++ b/GL/platforms/sh4.h
@ -8,6 +8,8 @@
 #include <dc/matrix3d.h>

 #include "../types.h"
+#include "../private.h"
+
 #include "sh4_math.h"

 #ifndef NDEBUG