Optimisations

This commit is contained in:
Luke Benstead 2023-05-11 20:00:13 +01:00
parent cba2fb7ceb
commit e683b8becb
4 changed files with 288 additions and 220 deletions

View File

@ -5,75 +5,123 @@
MAKE_FUNC(POLYMODE) MAKE_FUNC(POLYMODE)
{ {
const Vertex* const start = _glSubmissionTargetStart(target); static const float w = 1.0f;
const VertexExtra* const ve_start = aligned_vector_at(target->extras, 0); if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
const GLuint vstride = ATTRIB_POINTERS.vertex.stride;
GLuint uvstride = ATTRIB_POINTERS.uv.stride;
GLuint ststride = ATTRIB_POINTERS.st.stride;
GLuint dstride = ATTRIB_POINTERS.colour.stride;
GLuint nstride = ATTRIB_POINTERS.normal.stride;
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? ATTRIB_POINTERS.vertex.ptr + (first * vstride) : NULL;
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + (first * uvstride) : NULL;
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (first * dstride) : NULL;
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (first * ststride) : NULL;
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (first * nstride) : NULL;
const float w = 1.0f;
if(!pos) {
/* If we don't have vertices, do nothing */ /* If we don't have vertices, do nothing */
return; return;
} }
if(!col) { /* This is the best value we have. PROCESS_VERTEX_FLAGS needs to operate on quads and tris and so
col = (GLubyte*) &U4ONE; this need to be divisible by 4 and 3. Even though we should be able to go much higher than this
dstride = 0; and still be cache-local, trial and error says otherwise... */
}
if(!uv) { #define BATCH_SIZE 60
uv = (GLubyte*) &F2ZERO;
uvstride = 0;
}
if(!st) { GLuint min = 0;
st = (GLubyte*) &F2ZERO; GLuint stride;
ststride = 0; const GLubyte* ptr;
} Vertex* it;
VertexExtra* ve;
if(!n) {
n = (GLubyte*) &F3Z;
nstride = 0;
}
VertexExtra* ve = (VertexExtra*) ve_start; for(int min = 0; min < count; min += BATCH_SIZE) {
Vertex* it = (Vertex*) start; const Vertex* start = ((Vertex*) _glSubmissionTargetStart(target)) + min;
const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE;
const int offset = (first + min);
for(int_fast32_t i = 0; i < count; ++i) { stride = ATTRIB_POINTERS.vertex.stride;
TransformVertex((const float*) pos, &w, it->xyz, &it->w); ptr = ATTRIB_POINTERS.vertex.ptr + (offset * stride);
pos += vstride; it = (Vertex*) start;
PREFETCH(pos);
*((Float2*) it->uv) = *((Float2*) uv); PREFETCH(ptr);
uv += uvstride; for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(uv); PREFETCH(ptr + stride);
TransformVertex((const float*) ptr, &w, it->xyz, &it->w);
PROCESS_VERTEX_FLAGS(it, min + i);
ptr += stride;
}
*((uint32_t*) it->bgra) = *((uint32_t*) col); stride = ATTRIB_POINTERS.uv.stride;
col += dstride; ptr = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + ((first + min) * stride) : NULL;
PREFETCH(col); it = (Vertex*) start;
*((Float2*) ve->st) = *((Float2*) st); if(ptr) {
st += ststride; PREFETCH(ptr);
PREFETCH(st); for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
it->uv[0] = ((float*) ptr)[0];
it->uv[1] = ((float*) ptr)[1];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
it->uv[0] = 0;
it->uv[1] = 0;
}
}
*((Float3*) ve->nxyz) = *((Float3*) n); stride = ATTRIB_POINTERS.colour.stride;
n += nstride; ptr = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (offset * stride) : NULL;
PREFETCH(n); it = (Vertex*) start;
PROCESS_VERTEX_FLAGS(it, i); if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
it->bgra[0] = ptr[0];
it->bgra[1] = ptr[1];
it->bgra[2] = ptr[2];
it->bgra[3] = ptr[3];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
*((uint32_t*) it->bgra) = ~0;
}
}
++it; start = aligned_vector_at(target->extras, min);
++ve;
stride = ATTRIB_POINTERS.st.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (offset * stride) : NULL;
ve = (VertexExtra*) start;
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
PREFETCH(ptr + stride);
ve->st[0] = ((float*) ptr)[0];
ve->st[1] = ((float*) ptr)[1];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
ve->st[0] = 0;
ve->st[1] = 0;
}
}
stride = ATTRIB_POINTERS.normal.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (offset * stride) : NULL;
ve = (VertexExtra*) start;
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
PREFETCH(ptr + stride);
ve->nxyz[0] = ((float*) ptr)[0];
ve->nxyz[1] = ((float*) ptr)[1];
ve->nxyz[2] = ((float*) ptr)[2];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
ve->nxyz[0] = 0;
ve->nxyz[1] = 0;
ve->nxyz[2] = 0;
}
}
} }
} }

View File

@ -95,19 +95,19 @@ void APIENTRY glKosSwapBuffers() {
SceneBegin(); SceneBegin();
if(OP_LIST.vector.size > 2) { if(OP_LIST.vector.size > 2) {
SceneListBegin(GPU_LIST_OP_POLY); SceneListBegin(GPU_LIST_OP_POLY);
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size); SceneListSubmit((Vertex*) OP_LIST.vector.data, OP_LIST.vector.size);
SceneListFinish(); SceneListFinish();
} }
if(PT_LIST.vector.size > 2) { if(PT_LIST.vector.size > 2) {
SceneListBegin(GPU_LIST_PT_POLY); SceneListBegin(GPU_LIST_PT_POLY);
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size); SceneListSubmit((Vertex*) PT_LIST.vector.data, PT_LIST.vector.size);
SceneListFinish(); SceneListFinish();
} }
if(TR_LIST.vector.size > 2) { if(TR_LIST.vector.size > 2) {
SceneListBegin(GPU_LIST_TR_POLY); SceneListBegin(GPU_LIST_TR_POLY);
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size); SceneListSubmit((Vertex*) TR_LIST.vector.data, TR_LIST.vector.size);
SceneListFinish(); SceneListFinish();
} }
SceneFinish(); SceneFinish();

View File

@ -5,6 +5,7 @@
#include <stdbool.h> #include <stdbool.h>
#include "gl_assert.h" #include "gl_assert.h"
#include "types.h"
#define MEMSET(dst, v, size) memset((dst), (v), (size)) #define MEMSET(dst, v, size) memset((dst), (v), (size))
@ -260,7 +261,7 @@ typedef float Matrix4x4[16];
void SceneBegin(); void SceneBegin();
void SceneListBegin(GPUList list); void SceneListBegin(GPUList list);
void SceneListSubmit(void* src, int n); void SceneListSubmit(Vertex* v2, int n);
void SceneListFinish(); void SceneListFinish();
void SceneFinish(); void SceneFinish();

View File

@ -64,31 +64,26 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
} }
} }
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(volatile uint32_t* d, const Vertex* v) {
#ifndef NDEBUG
gl_assert(!isnan(v->xyz[2]));
gl_assert(!isnan(v->w));
#endif
#if CLIP_DEBUG volatile uint32_t *sq = SQ_BASE_ADDRESS;
fprintf(stderr, "Submitting: %x (%x)\n", v, v->flags);
#endif
uint32_t *s = (uint32_t*) v; static inline void _glFlushBuffer() {}
d[0] = *(s++); static inline void _glPushHeaderOrVertex(Vertex* v) {
d[1] = *(s++); uint32_t* s = (uint32_t*) v;
d[2] = *(s++); sq[0] = *(s++);
d[3] = *(s++); sq[1] = *(s++);
d[4] = *(s++); sq[2] = *(s++);
d[5] = *(s++); sq[3] = *(s++);
d[6] = *(s++); sq[4] = *(s++);
d[7] = *(s++); sq[5] = *(s++);
__asm__("pref @%0" : : "r"(d)); sq[6] = *(s++);
d += 8; sq[7] = *(s++);
__asm__("pref @%0" : : "r"(sq));
sq += 8;
} }
static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) {
const static float o = 1.0f / 255.0f; const static float o = 0.003921569f; // 1 / 255
const float d0 = v1->w + v1->xyz[2]; const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2]; const float d1 = v2->w + v2->xyz[2];
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f; const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
@ -117,7 +112,7 @@ static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888;
static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; static volatile uint32_t *QACR = (uint32_t*) 0xFF000038;
void SceneListSubmit(void* src, int n) { void SceneListSubmit(Vertex* v2, int n) {
/* You need at least a header, and 3 vertices to render anything */ /* You need at least a header, and 3 vertices to render anything */
if(n < 4) { if(n < 4) {
return; return;
@ -134,111 +129,113 @@ void SceneListSubmit(void* src, int n) {
//Set QACR registers //Set QACR registers
QACR[1] = QACR[0] = 0x11; QACR[1] = QACR[0] = 0x11;
volatile uint32_t *sq = SQ_BASE_ADDRESS;
uint32_t clipping_disabled_mask = (_glNearZClippingEnabled()) ? 0 : 0x7;
#if CLIP_DEBUG #if CLIP_DEBUG
Vertex* vertex = (Vertex*) src;
for(int i = 0; i < n; ++i) { for(int i = 0; i < n; ++i) {
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]); fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
} }
fprintf(stderr, "----\n"); fprintf(stderr, "----\n");
#endif #endif
uint8_t counter = 0; uint8_t __attribute__((aligned(32))) visible_mask = 0;
uint8_t __attribute__((aligned(32))) counter = 0;
Vertex* v2 = (Vertex*) src; sq = SQ_BASE_ADDRESS;
while(n--) {
__builtin_prefetch(v2 + 1);
for(int i = 0; i < n; ++i, ++v2) {
switch(v2->flags) { switch(v2->flags) {
case GPU_CMD_VERTEX_EOL: case GPU_CMD_VERTEX_EOL:
if(counter < 2) {
continue;
}
counter = 0;
break;
case GPU_CMD_VERTEX: case GPU_CMD_VERTEX:
if(++counter < 3) { ++counter;
v2++; if(counter < 3) {
continue; continue;
} }
break; break;
default: default:
_glSubmitHeaderOrVertex(sq, v2++); _glPushHeaderOrVertex(v2);
counter = 0; counter = 0;
continue; continue;
} };
Vertex* const v0 = v2 - 2; Vertex* const v0 = v2 - 2;
Vertex* const v1 = v2 - 1; Vertex* const v1 = v2 - 1;
const uint8_t visible_mask = ( visible_mask = (
(v0->xyz[2] > -v0->w) << 0 | (v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 | (v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 | (v2->xyz[2] > -v2->w) << 2 |
((v2->flags == GPU_CMD_VERTEX_EOL) << 3) | (counter == 0) << 3
clipping_disabled_mask // This forces everything to be marked visible if clipping is disabled
); );
switch(visible_mask) { switch(visible_mask) {
case 0:
break;
case 15: /* All visible, but final vertex in strip */ case 15: /* All visible, but final vertex in strip */
{ {
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
Vertex __attribute__((aligned(32))) a = *v1; _glPerspectiveDivideVertex(v1, h);
_glPerspectiveDivideVertex(&a, h); _glPushHeaderOrVertex(v1);
_glSubmitHeaderOrVertex(sq, &a);
a = *v2; _glPerspectiveDivideVertex(v2, h);
_glPerspectiveDivideVertex(&a, h); _glPushHeaderOrVertex(v2);
_glSubmitHeaderOrVertex(sq, &a);
} }
break; break;
case 7: case 7:
/* All visible, push the first vertex and move on */ /* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
break; break;
case 9: case 9:
/* First vertex was visible, last in strip */ /* First vertex was visible, last in strip */
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, &a); _glClipEdge(v0, v1, a);
a.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, &b); _glClipEdge(v2, v0, b);
b.flags = GPU_CMD_VERTEX_EOL; b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &a); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(b);
} }
break; break;
case 1: case 1:
/* First vertex was visible, but not last in strip */ /* First vertex was visible, but not last in strip */
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, &a); _glClipEdge(v0, v1, a);
a.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, &b); _glClipEdge(v2, v0, b);
b.flags = GPU_CMD_VERTEX; b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &a); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(b);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(b);
} }
break; break;
case 10: case 10:
@ -246,165 +243,187 @@ void SceneListSubmit(void* src, int n) {
/* Second vertex was visible. In self case we need to create a triangle and produce /* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */ two new vertices: 1-2, and 2-3. */
{ {
Vertex __attribute__((aligned(32))) a; Vertex __attribute__((aligned(32))) scratch[2];
Vertex __attribute__((aligned(32))) c = *v1; Vertex* a = &scratch[0];
_glClipEdge(v0, &c, &a); Vertex* c = &scratch[1];
a.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&a, h); memcpy_vertex(c, v1);
_glSubmitHeaderOrVertex(sq, &a);
_glClipEdge(&c, v2, &a); _glClipEdge(v0, c, a);
a.flags = v2->flags; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&c, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&a, h); _glClipEdge(c, v2, a);
_glSubmitHeaderOrVertex(sq, &a); a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
} }
break; break;
case 11: case 11:
case 3: /* First and second vertex were visible */ case 3: /* First and second vertex were visible */
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[3];
Vertex __attribute__((aligned(32))) c = *v1; Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
_glClipEdge(v2, v0, &b); memcpy_vertex(c, v1);
b.flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, &a); _glClipEdge(v1, v2, a);
a.flags = v2->flags; a->flags = v2->flags;
_glPerspectiveDivideVertex(&c, h); _glPerspectiveDivideVertex(c, h);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
_glSubmitHeaderOrVertex(sq, &a); _glPushHeaderOrVertex(a);
} }
break; break;
case 12: case 12:
case 4: case 4:
/* Third vertex was visible. */ /* Third vertex was visible. */
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[3];
Vertex __attribute__((aligned(32))) c = *v2; Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
_glClipEdge(v2, v0, &a); memcpy_vertex(c, v2);
a.flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, &b); _glClipEdge(v2, v0, a);
b.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&a, h); _glClipEdge(v1, v2, b);
_glSubmitHeaderOrVertex(sq, &a); b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &a); _glPushHeaderOrVertex(a);
_glSubmitHeaderOrVertex(sq, &b);
_glPerspectiveDivideVertex(&c, h); if(counter % 2 == 1) {
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(a);
}
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
} }
break; break;
case 13: case 13:
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[3];
Vertex __attribute__((aligned(32))) c = *v2; Vertex* a = &scratch[0];
c.flags = GPU_CMD_VERTEX; Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
_glClipEdge(v0, v1, &a); memcpy_vertex(c, v2);
a.flags = GPU_CMD_VERTEX; c->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, &b); _glClipEdge(v0, v1, a);
b.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &a); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&c, h); _glPerspectiveDivideVertex(c, h);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(b);
c.flags = GPU_CMD_VERTEX_EOL; c->flags = GPU_CMD_VERTEX_EOL;
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
} }
break; break;
case 5: /* First and third vertex were visible */ case 5: /* First and third vertex were visible */
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[3];
Vertex __attribute__((aligned(32))) c = *v2; Vertex* a = &scratch[0];
c.flags = GPU_CMD_VERTEX; Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
_glClipEdge(v0, v1, &a); memcpy_vertex(c, v2);
a.flags = GPU_CMD_VERTEX; c->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, &b); _glClipEdge(v0, v1, a);
b.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glSubmitHeaderOrVertex(sq, v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(&a, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &a); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&c, h); _glPerspectiveDivideVertex(c, h);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(b);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
} }
break; break;
case 14: case 14:
case 6: /* Second and third vertex were visible */ case 6: /* Second and third vertex were visible */
{ {
Vertex __attribute__((aligned(32))) a, b; Vertex __attribute__((aligned(32))) scratch[4];
Vertex __attribute__((aligned(32))) c = *v1; Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
_glClipEdge(v0, v1, &a); memcpy_vertex(c, v1);
a.flags = GPU_CMD_VERTEX; memcpy_vertex(d, v2);
_glClipEdge(v2, v0, &b); _glClipEdge(v0, v1, a);
b.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&a, h); _glClipEdge(v2, v0, b);
_glSubmitHeaderOrVertex(sq, &a); b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&c, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&b, h); _glPerspectiveDivideVertex(c, h);
_glSubmitHeaderOrVertex(sq, &b); _glPushHeaderOrVertex(c);
_glSubmitHeaderOrVertex(sq, &c);
c = *v2; _glPerspectiveDivideVertex(b, h);
_glPerspectiveDivideVertex(&c, h); _glPushHeaderOrVertex(b);
_glSubmitHeaderOrVertex(sq, &c); _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
} }
break; break;
default: default:
break; break;
} }
if(v2->flags == GPU_CMD_VERTEX_EOL) {
counter = 0;
} else {
--counter;
}
v2++;
} }
_glFlushBuffer();
} }
void SceneListFinish() { void SceneListFinish() {