Move clipping into list submission
This commit is contained in:
parent
193f0bdc49
commit
dbb94d0cb9
|
@ -251,7 +251,7 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
|
|||
*/
|
||||
|
||||
#define _VERT_VISIBLE(v) \
|
||||
(v->w >= 0 && v->xyz[2] >= -v->w) \
|
||||
(v->xyz[2] > -v->w) \
|
||||
|
||||
uint8_t visible = (
|
||||
(_VERT_VISIBLE(v1) ? 4 : 0) |
|
||||
|
|
43
GL/draw.c
43
GL/draw.c
|
@ -933,16 +933,6 @@ static void transform(SubmissionTarget* target) {
|
|||
TransformVertices(vertex, target->count);
|
||||
}
|
||||
|
||||
static void clip(SubmissionTarget* target) {
|
||||
TRACE();
|
||||
|
||||
/* Perform clipping, generating new vertices as necessary */
|
||||
_glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT);
|
||||
|
||||
/* Reset the count now that we may have added vertices */
|
||||
target->count = target->output->vector.size - target->start_offset;
|
||||
}
|
||||
|
||||
static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
|
||||
const uint8_t* dataIn = (const uint8_t*) xyz;
|
||||
uint8_t* dataOut = (uint8_t*) xyzOut;
|
||||
|
@ -1172,39 +1162,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
transform(target);
|
||||
}
|
||||
|
||||
if(_glIsClippingEnabled()) {
|
||||
#if DEBUG_CLIPPING
|
||||
uint32_t i = 0;
|
||||
fprintf(stderr, "=========\n");
|
||||
|
||||
for(i = 0; i < target->count; ++i) {
|
||||
Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i);
|
||||
if(v->flags == 0xe0000000 || v->flags == 0xf0000000) {
|
||||
fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags);
|
||||
} else {
|
||||
fprintf(stderr, "%x\n", *((uint32_t*)v));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
clip(target);
|
||||
|
||||
assert(extras.size == target->count);
|
||||
|
||||
#if DEBUG_CLIPPING
|
||||
fprintf(stderr, "--------\n");
|
||||
for(i = 0; i < target->count; ++i) {
|
||||
Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i);
|
||||
if(v->flags == 0xe0000000 || v->flags == 0xf0000000) {
|
||||
fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags);
|
||||
} else {
|
||||
fprintf(stderr, "%x\n", *((uint32_t*)v));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
|
||||
|
||||
/*
|
||||
|
|
89
GL/flush.c
89
GL/flush.c
|
@ -88,108 +88,19 @@ void APIENTRY glKosInit() {
|
|||
glKosInitEx(&config);
|
||||
}
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
GL_FORCE_INLINE bool glIsVertex(const float flags) {
|
||||
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
|
||||
}
|
||||
|
||||
|
||||
GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) {
|
||||
TRACE();
|
||||
|
||||
/* Perform perspective divide on each vertex */
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
PREFETCH(vertex + 1);
|
||||
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
while(n--) {
|
||||
PREFETCH(vertex + 2);
|
||||
|
||||
if(likely(glIsVertex(vertex->flags))) {
|
||||
const float f = MATH_Fast_Invert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = __builtin_fmaf(
|
||||
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||
);
|
||||
|
||||
vertex->xyz[1] = h - __builtin_fmaf(
|
||||
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||
);
|
||||
|
||||
/* Orthographic projections need to use invZ otherwise we lose
|
||||
the depth information. As w == 1, and clip-space range is -w to +w
|
||||
we add 1.0 to the Z to bring it into range. We add a little extra to
|
||||
avoid a divide by zero.
|
||||
*/
|
||||
if(unlikely(vertex->w == 1.0f)) {
|
||||
vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
|
||||
} else {
|
||||
vertex->xyz[2] = f;
|
||||
}
|
||||
}
|
||||
|
||||
++vertex;
|
||||
}
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void glPerspectiveDivideFastMode(void* src, uint32_t n) {
|
||||
TRACE();
|
||||
|
||||
/* Perform perspective divide on each vertex */
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
while(n--) {
|
||||
PREFETCH(vertex + 1);
|
||||
|
||||
if(likely(glIsVertex(vertex->flags))) {
|
||||
const float f = MATH_Fast_Invert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = MATH_fmac(
|
||||
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||
);
|
||||
|
||||
vertex->xyz[1] = h - MATH_fmac(
|
||||
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||
);
|
||||
|
||||
vertex->xyz[2] = f;
|
||||
}
|
||||
|
||||
++vertex;
|
||||
}
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) {
|
||||
#if FAST_MODE
|
||||
glPerspectiveDivideFastMode(src, n);
|
||||
#else
|
||||
glPerspectiveDivideStandard(src, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
void APIENTRY glKosSwapBuffers() {
|
||||
TRACE();
|
||||
|
||||
SceneBegin();
|
||||
SceneListBegin(GPU_LIST_OP_POLY);
|
||||
glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||
SceneListFinish();
|
||||
|
||||
SceneListBegin(GPU_LIST_PT_POLY);
|
||||
glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||
SceneListFinish();
|
||||
|
||||
SceneListBegin(GPU_LIST_TR_POLY);
|
||||
glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||
SceneListFinish();
|
||||
SceneFinish();
|
||||
|
|
|
@ -8,6 +8,18 @@
|
|||
|
||||
#define PVR_VERTEX_BUF_SIZE 2560 * 256
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
GL_FORCE_INLINE bool glIsVertex(const float flags) {
|
||||
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE bool glIsLastVertex(const float flags) {
|
||||
return flags == GPU_CMD_VERTEX_EOL;
|
||||
}
|
||||
|
||||
|
||||
void InitGPU(_Bool autosort, _Bool fsaa) {
|
||||
pvr_init_params_t params = {
|
||||
/* Enable opaque and translucent polygons with size 32 and 32 */
|
||||
|
@ -32,12 +44,34 @@ void SceneListBegin(GPUList list) {
|
|||
pvr_list_begin(list);
|
||||
}
|
||||
|
||||
void SceneListSubmit(void* src, int n) {
|
||||
uint32_t *d = (uint32_t*) TA_SQ_ADDR;
|
||||
uint32_t *s = src;
|
||||
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
|
||||
const float f = MATH_Fast_Invert(vertex->w);
|
||||
|
||||
/* fill/write queues as many times necessary */
|
||||
while(n--) {
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = __builtin_fmaf(
|
||||
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||
);
|
||||
|
||||
vertex->xyz[1] = h - __builtin_fmaf(
|
||||
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||
);
|
||||
|
||||
/* Orthographic projections need to use invZ otherwise we lose
|
||||
the depth information. As w == 1, and clip-space range is -w to +w
|
||||
we add 1.0 to the Z to bring it into range. We add a little extra to
|
||||
avoid a divide by zero.
|
||||
*/
|
||||
if(unlikely(vertex->w == 1.0f)) {
|
||||
vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
|
||||
} else {
|
||||
vertex->xyz[2] = f;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t *d; // SQ target
|
||||
|
||||
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
|
||||
uint32_t *s = (uint32_t*) v;
|
||||
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
|
||||
d[0] = *(s++);
|
||||
d[1] = *(s++);
|
||||
|
@ -49,8 +83,256 @@ void SceneListSubmit(void* src, int n) {
|
|||
d[7] = *(s++);
|
||||
__asm__("pref @%0" : : "r"(d));
|
||||
d += 8;
|
||||
}
|
||||
|
||||
static struct {
|
||||
Vertex* v;
|
||||
int visible;
|
||||
} triangle[3];
|
||||
|
||||
static int tri_count = 0;
|
||||
|
||||
|
||||
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
|
||||
/* Clipping time! */
|
||||
const float d0 = v1->w + v1->xyz[2];
|
||||
const float d1 = v2->w + v2->xyz[2];
|
||||
|
||||
float t = MATH_Fast_Divide(d0, (d0 - d1));
|
||||
|
||||
vout->xyz[0] = MATH_fmac(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
|
||||
vout->xyz[1] = MATH_fmac(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
|
||||
vout->xyz[2] = MATH_fmac(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
|
||||
vout->w = MATH_fmac(v2->w - v1->w, t, v1->w);
|
||||
|
||||
vout->uv[0] = MATH_fmac(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
|
||||
vout->uv[1] = MATH_fmac(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
|
||||
|
||||
vout->bgra[0] = 0xFF;
|
||||
vout->bgra[1] = 0xFF;
|
||||
vout->bgra[2] = 0xFF;
|
||||
vout->bgra[3] = 0xFF;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void ClearTriangle() {
|
||||
tri_count = 0;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void ShiftTriangle() {
|
||||
tri_count--;
|
||||
triangle[0] = triangle[1];
|
||||
triangle[1] = triangle[2];
|
||||
|
||||
#ifndef NDEBUG
|
||||
triangle[2].v = NULL;
|
||||
triangle[2].visible = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void SceneListSubmit(void* src, int n) {
|
||||
/* Do everything, everywhere, all at once */
|
||||
|
||||
/* Prep store queues */
|
||||
d = (uint32_t*) TA_SQ_ADDR;
|
||||
|
||||
/* Perform perspective divide on each vertex */
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
tri_count = 0;
|
||||
|
||||
int strip_count = 0;
|
||||
|
||||
for(int i = 0; i < n; ++i) {
|
||||
PREFETCH(vertex + 1);
|
||||
|
||||
bool is_last_in_strip = glIsLastVertex(vertex->flags);
|
||||
|
||||
/* Wait until we fill the triangle */
|
||||
if(tri_count < 3) {
|
||||
if(likely(glIsVertex(vertex->flags))) {
|
||||
triangle[tri_count].v = vertex;
|
||||
triangle[tri_count].visible = vertex->w > 0 && vertex->xyz[2] > -vertex->w;
|
||||
tri_count++;
|
||||
strip_count++;
|
||||
} else {
|
||||
/* We hit a header */
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
_glSubmitHeaderOrVertex(vertex);
|
||||
}
|
||||
|
||||
if(tri_count < 3) {
|
||||
++vertex;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we got here, then triangle contains 3 vertices */
|
||||
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
|
||||
if(visible_mask == 7) {
|
||||
/* All the vertices are visible! We divide and submit v0, then shift */
|
||||
_glPerspectiveDivideVertex(triangle[0].v, h);
|
||||
_glSubmitHeaderOrVertex(triangle[0].v);
|
||||
} else if(!visible_mask) {
|
||||
/* None visible, just shift for the next in the strip */
|
||||
} else {
|
||||
/* Clipping time!
|
||||
|
||||
There are 6 distinct possibilities when clipping a triangle. 3 of them result
|
||||
in another triangle, 3 of them result in a quadrilateral.
|
||||
|
||||
Assuming you iterate the edges of the triangle in order, and create a new *visible*
|
||||
vertex when you cross the plane, and discard vertices behind the plane, then the only
|
||||
difference between the two cases is that the final two vertices that need submitting have
|
||||
to be reversed.
|
||||
|
||||
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
|
||||
be used in a subsequent triangle in the strip and would end up being double divided.
|
||||
*/
|
||||
|
||||
Vertex tmp0, tmp1, tmp2, tmp3;
|
||||
|
||||
switch(visible_mask) {
|
||||
case 1: {
|
||||
/* 0, 0a, 2a */
|
||||
tmp0 = *triangle[0].v;
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp1);
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
|
||||
|
||||
_glPerspectiveDivideVertex(&tmp0, h);
|
||||
_glPerspectiveDivideVertex(&tmp1, h);
|
||||
_glPerspectiveDivideVertex(&tmp2, h);
|
||||
|
||||
tmp0.flags = tmp1.flags = GPU_CMD_VERTEX;
|
||||
tmp2.flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glSubmitHeaderOrVertex(&tmp0);
|
||||
_glSubmitHeaderOrVertex(&tmp1);
|
||||
_glSubmitHeaderOrVertex(&tmp2);
|
||||
} break;
|
||||
case 2: {
|
||||
/* 0a, 1, 1a */
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp0);
|
||||
tmp1 = *triangle[1].v;
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp2);
|
||||
|
||||
_glPerspectiveDivideVertex(&tmp0, h);
|
||||
_glPerspectiveDivideVertex(&tmp1, h);
|
||||
_glPerspectiveDivideVertex(&tmp2, h);
|
||||
|
||||
tmp0.flags = tmp1.flags = GPU_CMD_VERTEX;
|
||||
tmp2.flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glSubmitHeaderOrVertex(&tmp0);
|
||||
_glSubmitHeaderOrVertex(&tmp1);
|
||||
_glSubmitHeaderOrVertex(&tmp2);
|
||||
} break;
|
||||
case 3: {
|
||||
/* 0, 1, 2a, 1a */
|
||||
tmp0 = *triangle[0].v;
|
||||
tmp1 = *triangle[1].v;
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp3);
|
||||
|
||||
_glPerspectiveDivideVertex(&tmp0, h);
|
||||
_glPerspectiveDivideVertex(&tmp1, h);
|
||||
_glPerspectiveDivideVertex(&tmp2, h);
|
||||
_glPerspectiveDivideVertex(&tmp3, h);
|
||||
|
||||
tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX;
|
||||
tmp3.flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glSubmitHeaderOrVertex(&tmp0);
|
||||
_glSubmitHeaderOrVertex(&tmp1);
|
||||
_glSubmitHeaderOrVertex(&tmp2);
|
||||
_glSubmitHeaderOrVertex(&tmp3);
|
||||
} break;
|
||||
case 4: {
|
||||
/* 1a, 2, 2a */
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp0);
|
||||
tmp1 = *triangle[2].v;
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
|
||||
|
||||
_glPerspectiveDivideVertex(&tmp0, h);
|
||||
_glPerspectiveDivideVertex(&tmp1, h);
|
||||
_glPerspectiveDivideVertex(&tmp2, h);
|
||||
|
||||
tmp0.flags = tmp1.flags = GPU_CMD_VERTEX;
|
||||
tmp2.flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glSubmitHeaderOrVertex(&tmp0);
|
||||
_glSubmitHeaderOrVertex(&tmp1);
|
||||
_glSubmitHeaderOrVertex(&tmp2);
|
||||
} break;
|
||||
case 5: {
|
||||
/* 0, 0a, 2, 1a */
|
||||
tmp0 = *triangle[0].v;
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp1);
|
||||
tmp2 = *triangle[2].v;
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp3);
|
||||
|
||||
_glPerspectiveDivideVertex(&tmp0, h);
|
||||
_glPerspectiveDivideVertex(&tmp1, h);
|
||||
_glPerspectiveDivideVertex(&tmp2, h);
|
||||
_glPerspectiveDivideVertex(&tmp3, h);
|
||||
|
||||
tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX;
|
||||
tmp3.flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glSubmitHeaderOrVertex(&tmp0);
|
||||
_glSubmitHeaderOrVertex(&tmp1);
|
||||
_glSubmitHeaderOrVertex(&tmp2);
|
||||
_glSubmitHeaderOrVertex(&tmp3);
|
||||
} break;
|
||||
case 6: {
|
||||
/* 0a, 1, 2a, 2 */
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp0);
|
||||
tmp1 = *triangle[1].v;
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp2);
|
||||
tmp3 = *triangle[2].v;
|
||||
|
||||
_glPerspectiveDivideVertex(&tmp0, h);
|
||||
_glPerspectiveDivideVertex(&tmp1, h);
|
||||
_glPerspectiveDivideVertex(&tmp2, h);
|
||||
_glPerspectiveDivideVertex(&tmp3, h);
|
||||
|
||||
tmp0.flags = tmp1.flags = tmp2.flags = GPU_CMD_VERTEX;
|
||||
tmp3.flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glSubmitHeaderOrVertex(&tmp0);
|
||||
_glSubmitHeaderOrVertex(&tmp1);
|
||||
_glSubmitHeaderOrVertex(&tmp2);
|
||||
_glSubmitHeaderOrVertex(&tmp3);
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* If this was the last in the strip, we don't need to
|
||||
submit anything else, we just wipe the tri_count */
|
||||
if(is_last_in_strip) {
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* If this was the last vertex in the strip, we're done with the
|
||||
strip so we need to wipe out the tri_count */
|
||||
ShiftTriangle();
|
||||
|
||||
if(is_last_in_strip) {
|
||||
for(int i = 0; i < tri_count; ++i) {
|
||||
if(triangle[i].visible) {
|
||||
_glPerspectiveDivideVertex(triangle[i].v, h);
|
||||
_glSubmitHeaderOrVertex(triangle[i].v);
|
||||
}
|
||||
}
|
||||
ClearTriangle();
|
||||
}
|
||||
++vertex;
|
||||
}
|
||||
/* Wait for both store queues to complete */
|
||||
d = (uint32_t *)0xe0000000;
|
||||
d[0] = d[8] = 0;
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include <dc/matrix3d.h>
|
||||
|
||||
#include "../types.h"
|
||||
#include "../private.h"
|
||||
|
||||
#include "sh4_math.h"
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
|
Loading…
Reference in New Issue
Block a user