From 0923b5c601836339ef5511a84670a17a29f29142 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 23 Apr 2023 20:16:15 +0100 Subject: [PATCH] Further optimisations --- GL/platforms/sh4.c | 57 +++++++++++++++++++++++++--------------------- GL/private.h | 4 ++-- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 2ed55f0..061b5fe 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -154,9 +154,8 @@ void SceneListSubmit(void* src, int n) { /* The most vertices ever in the queue is 5 (as some clipping operations * produce and additional couple of vertice, but we add one more so the ring buffer doesn't * trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */ - Vertex __attribute__((aligned(32))) queue[4]; - const int queue_capacity = sizeof(queue) / sizeof(Vertex); - + const static uint8_t queue_capacity = 4; + Vertex __attribute__((aligned(32))) queue[queue_capacity]; Vertex* vertex = (Vertex*) src; #if CLIP_DEBUG @@ -195,28 +194,28 @@ void SceneListSubmit(void* src, int n) { fprintf(stderr, "%d\n", visible_mask); #endif Vertex __attribute__((aligned(32))) a, b; // Scratch vertices - switch(visible_mask) { + switch(visible_mask | (last_vertex << 3)) { case 0: break; + case 15: /* All visible, but final vertex in strip */ + const int8_t v1 = (queue_head + 1) % queue_capacity; + const int8_t v2 = (queue_head + 2) % queue_capacity; + + _glPerspectiveDivideVertex(&queue[queue_head], h); + _glSubmitHeaderOrVertex(d, &queue[queue_head]); + + _glPerspectiveDivideVertex(&queue[v1], h); + _glSubmitHeaderOrVertex(d, &queue[v1]); + + _glPerspectiveDivideVertex(&queue[v2], h); + _glSubmitHeaderOrVertex(d, &queue[v2]); + break; case 7: /* All visible, push the first vertex and move on */ _glPerspectiveDivideVertex(&queue[queue_head], h); _glSubmitHeaderOrVertex(d, &queue[queue_head]); - - if(last_vertex) { - /* If this was the last vertex in the strip, we need to flush the queue and then - restart it again */ - - int v1 = (queue_head + 1) % queue_capacity; - int v2 = (queue_head + 2) % queue_capacity; - - _glPerspectiveDivideVertex(&queue[v1], h); - _glSubmitHeaderOrVertex(d, &queue[v1]); - - _glPerspectiveDivideVertex(&queue[v2], h); - _glSubmitHeaderOrVertex(d, &queue[v2]); - } break; + case 9: case 1: /* First vertex was visible */ { @@ -235,37 +234,40 @@ void SceneListSubmit(void* src, int n) { b.flags = v2->flags; _glPerspectiveDivideVertex(v0, h); - _glPerspectiveDivideVertex(&a, h); - _glPerspectiveDivideVertex(&b, h); - _glSubmitHeaderOrVertex(d, v0); + + _glPerspectiveDivideVertex(&a, h); _glSubmitHeaderOrVertex(d, &a); + + _glPerspectiveDivideVertex(&b, h); _glSubmitHeaderOrVertex(d, &b); _glSubmitHeaderOrVertex(d, &b); } break; + case 10: case 2: /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ { Vertex* v0 = &queue[queue_head]; - const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity]; + Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity]; const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity]; - _glClipEdge(v0, v1, &a); - _glClipEdge(v1, v2, &b); + _glClipEdge(v0, &v1, &a); + _glClipEdge(&v1, v2, &b); a.flags = GPU_CMD_VERTEX; b.flags = v2->flags; - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(&v1, h); _glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(&b, h); _glSubmitHeaderOrVertex(d, &a); - _glSubmitHeaderOrVertex(d, v0); + _glSubmitHeaderOrVertex(d, &v1); _glSubmitHeaderOrVertex(d, &b); } break; + case 11: case 3: /* First and second vertex were visible */ { Vertex* v0 = &queue[queue_head]; @@ -290,6 +292,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &a); } break; + case 12: case 4: /* Third vertex was visible. */ { @@ -312,6 +315,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &v2); } break; + case 13: case 5: /* First and third vertex were visible */ { Vertex* v0 = &queue[queue_head]; @@ -338,6 +342,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &v2); } break; + case 14: case 6: /* Second and third vertex were visible */ { Vertex* v0 = &queue[queue_head]; diff --git a/GL/private.h b/GL/private.h index bfbfebb..c3f50cb 100644 --- a/GL/private.h +++ b/GL/private.h @@ -241,8 +241,8 @@ GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) { asm volatile ( "fschg\n\t" - "clrs\n" - ".align 2\n" + "clrs\n\t" + ".align 2\n\t" "fmov.d @%[in]+, %[scratch]\n\t" "fmov.d %[scratch], @%[out]\n\t" "fmov.d @%[in]+, %[scratch]\n\t"