Clean up submission code

This commit is contained in:
Luke Benstead 2023-03-20 19:37:19 +00:00
parent 279581c5a6
commit b18f390f02
3 changed files with 76 additions and 61 deletions

View File

@ -72,7 +72,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
}
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) {
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const uint32_t* s) {
#ifndef NDEBUG
gl_assert(!isnan(v->xyz[2]));
gl_assert(!isnan(v->w));
@ -81,8 +81,6 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) {
#if CLIP_DEBUG
printf("Submitting: %x (%x)\n", v, v->flags);
#endif
uint32_t *s = (uint32_t*) v;
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
d[0] = *(s++);
d[1] = *(s++);
@ -172,37 +170,28 @@ GL_FORCE_INLINE void ShiftRotateTriangle() {
#define SPAN_SORT_CFG 0x005F8030
void SceneListSubmit(void* src, int n) {
static inline void submit_unclipped(uint32_t* d, Vertex* vertex, int n) {
const float h = GetVideoMode()->height;
PVR_SET(SPAN_SORT_CFG, 0x0);
uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS;
*PVR_LMMODE0 = 0x0; /* Enable 64bit mode */
Vertex __attribute__((aligned(32))) tmp;
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
if(!_glNearZClippingEnabled()) {
/* Prep store queues */
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(d, vertex);
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
/* Wait for both store queues to complete */
d = (uint32_t *) SQ_BASE_ADDRESS;
d[0] = d[8] = 0;
return;
_glSubmitHeaderOrVertex(d, (const uint32_t*) vertex);
}
/* Wait for both store queues to complete */
d = (uint32_t *) SQ_BASE_ADDRESS;
d[0] = d[8] = 0;
return;
}
static inline void submit_clipped(uint32_t* d, Vertex* vertex, int n) {
static Vertex __attribute__((aligned(32))) tmp;
const float h = GetVideoMode()->height;
tri_count = 0;
strip_count = 0;
@ -226,7 +215,7 @@ void SceneListSubmit(void* src, int n) {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(d, vertex);
_glSubmitHeaderOrVertex(d, (const uint32_t*) vertex);
continue;
}
}
@ -257,11 +246,11 @@ void SceneListSubmit(void* src, int n) {
tmp = *(vertex - 2); \
/* If we had triangles ahead of this one, submit and finalize */ \
_glPerspectiveDivideVertex(&tmp, h); \
_glSubmitHeaderOrVertex(d, &tmp); \
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); \
tmp = *(vertex - 1); \
tmp.flags = GPU_CMD_VERTEX_EOL; \
_glPerspectiveDivideVertex(&tmp, h); \
_glSubmitHeaderOrVertex(d, &tmp); \
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); \
}
bool is_last_in_strip = glIsLastVertex(vertex->flags);
@ -273,17 +262,17 @@ void SceneListSubmit(void* src, int n) {
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
} break;
case 2: {
SUBMIT_QUEUED();
@ -291,17 +280,17 @@ void SceneListSubmit(void* src, int n) {
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
} break;
case 3: {
SUBMIT_QUEUED();
@ -309,22 +298,22 @@ void SceneListSubmit(void* src, int n) {
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
} break;
case 4: {
SUBMIT_QUEUED();
@ -332,17 +321,17 @@ void SceneListSubmit(void* src, int n) {
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
} break;
case 5: {
SUBMIT_QUEUED();
@ -350,22 +339,22 @@ void SceneListSubmit(void* src, int n) {
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
} break;
case 6: {
SUBMIT_QUEUED();
@ -373,33 +362,33 @@ void SceneListSubmit(void* src, int n) {
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp);
} break;
case 7: {
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(d, vertex - 2);
_glSubmitHeaderOrVertex(d, (const uint32_t*) (vertex - 2));
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(d, vertex - 1);
_glSubmitHeaderOrVertex(d, (const uint32_t*) (vertex - 1));
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(d, vertex);
_glSubmitHeaderOrVertex(d, (const uint32_t*) vertex);
tri_count = 0;
strip_count = 0;
}
@ -428,6 +417,25 @@ void SceneListSubmit(void* src, int n) {
d[0] = d[8] = 0;
}
void SceneListSubmit(void* src, int n) {
PVR_SET(SPAN_SORT_CFG, 0x0);
uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS;
*PVR_LMMODE0 = 0x0; /* Enable 64bit mode */
*((volatile int *)0xA05F6888) = 1;
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
if(!_glNearZClippingEnabled()) {
/* Prep store queues */
submit_unclipped(d, vertex, n);
} else {
submit_clipped(d, vertex, n);
}
}
void SceneListFinish() {
pvr_list_finish();
}

View File

@ -236,7 +236,8 @@ void glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data);
/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation
will happen as usual. Enabled client state *is NOT* respected. This uses the extended vertex format which includes
normals and secondary texture coordinates.
normals and secondary texture coordinates. These will only take effect if you've enabled secondary texture coordinates
with glEnable (for ST) or enabled lighting (for normals). GL_NORMALIZE is not respected.
ERRORS:

View File

@ -155,8 +155,10 @@ int main(int argc, char **argv)
InitGL(640, 480);
ReSizeGLScene(640, 480);
uint64_t us = timer_us_gettime64();
uint64_t accum = 0;
uint64_t last_time = timer_us_gettime64();
uint32_t frames = 0;
while(1) {
if(check_start())
break;
@ -165,12 +167,16 @@ int main(int argc, char **argv)
++frames;
uint64_t now = timer_us_gettime64();
uint64_t diff = (now - us);
if(diff > 5000000) {
uint64_t diff = (now - last_time);
last_time = now;
accum += diff;
if(accum > 5000000) {
printf("Frame time: %f\n", (float)accum / frames / 1000000.0f);
printf("FPS: %f\n", ((float) frames) / 5.0f);
fflush(stdout);
frames = 0;
us = now;
accum = 0;
}
}