From 5e6927d9a16869d3695d2ec197a60fea3508084c Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 24 Mar 2019 08:09:02 +0000 Subject: [PATCH] Huge refactor of the drawing code --- GL/clip.c | 124 ++++++++++++------ GL/clip.h | 49 -------- GL/draw.c | 349 +++++++++++++++++++++++++++++++++++---------------- GL/flush.c | 1 - GL/private.h | 65 ++++++++-- 5 files changed, 387 insertions(+), 201 deletions(-) delete mode 100644 GL/clip.h diff --git a/GL/clip.c b/GL/clip.c index c578192..a0c61bf 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _arch_dreamcast #include @@ -9,7 +10,7 @@ #endif #include "profiler.h" -#include "clip.h" +#include "private.h" #include "../containers/aligned_vector.h" @@ -23,8 +24,8 @@ void _glEnableClipping(unsigned char v) { ZCLIP_ENABLED = v; } -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { +void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); +void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow *t = (NEAR_PLANE - v1->w) / (v2->w - v1->w); @@ -73,13 +74,22 @@ static inline void interpolateColour(const uint8_t* v1, const uint8_t* v2, const const uint32_t VERTEX_CMD_EOL = 0xf0000000; const uint32_t VERTEX_CMD = 0xe0000000; -void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) __attribute__((optimize("fast-math"))); -void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) { - uint8_t i, c = 0; +typedef struct { + ClipVertex vertex[3]; + VertexExtra extra[3]; + uint8_t visible; +} Triangle; +void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) __attribute__((optimize("fast-math"))); +void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) { + uint8_t i, c = 0; uint8_t lastVisible = 255; ClipVertex* last = NULL; + VertexExtra* veLast = NULL; + + const ClipVertex* vertices = triangle->vertex; + const VertexExtra* extras = triangle->extra; /* Used when flat shading is enabled */ uint32_t finalColour = *((uint32_t*) vertices[2].bgra); @@ -88,6 +98,8 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect uint8_t thisIndex = (i == 3) ? 0 : i; ClipVertex next; + VertexExtra veNext; + next.flags = VERTEX_CMD; uint8_t thisVisible = (visible & (1 << (2 - thisIndex))) > 0; @@ -97,13 +109,18 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect if(lastVisible < 255 && lastVisible != thisVisible) { const ClipVertex* v1 = &vertices[lastIndex]; const ClipVertex* v2 = &vertices[thisIndex]; + + const VertexExtra* ve1 = &extras[lastIndex]; + const VertexExtra* ve2 = &extras[thisIndex]; + float t; - clipLineToNearZ(v1, v2, &next, &t); + _glClipLineToNearZ(v1, v2, &next, &t); interpolateFloat(v1->w, v2->w, t, &next.w); - interpolateVec3(v1->nxyz, v2->nxyz, t, next.nxyz); interpolateVec2(v1->uv, v2->uv, t, next.uv); - interpolateVec2(v1->st, v2->st, t, next.st); + + interpolateVec3(ve1->nxyz, ve2->nxyz, t, veNext.nxyz); + interpolateVec2(ve1->st, ve2->st, t, veNext.st); if(flatShade) { *((uint32_t*) next.bgra) = finalColour; @@ -111,15 +128,22 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect interpolateColour(v1->bgra, v2->bgra, t, next.bgra); } - last = aligned_vector_push_back(output, &next, 1); + /* Push back the new vertices to the end of both the ClipVertex and VertexExtra lists */ + last = aligned_vector_push_back(&target->output->vector, &next, 1); last->flags = VERTEX_CMD; + + veLast = aligned_vector_push_back(target->extras, &veNext, 1); + ++c; } } if(thisVisible && i != 3) { - last = aligned_vector_push_back(output, &vertices[thisIndex], 1); + last = aligned_vector_push_back(&target->output->vector, &vertices[thisIndex], 1); last->flags = VERTEX_CMD; + + veLast = aligned_vector_push_back(target->extras, &extras[thisIndex], 1); + ++c; } @@ -134,13 +158,21 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect newVerts[1] = *(last - 1); newVerts[2] = *(last); + VertexExtra newExtras[3]; + newExtras[0] = *(veLast - 3); + newExtras[1] = *(veLast - 1); + newExtras[2] = *(veLast); + (last - 1)->flags = VERTEX_CMD_EOL; newVerts[0].flags = VERTEX_CMD; newVerts[1].flags = VERTEX_CMD; newVerts[2].flags = VERTEX_CMD_EOL; - aligned_vector_resize(output, output->size - 1); - aligned_vector_push_back(output, newVerts, 3); + aligned_vector_resize(&target->output->vector, target->output->vector.size - 1); + aligned_vector_push_back(&target->output->vector, newVerts, 3); + + aligned_vector_resize(target->extras, target->extras->size - 1); + aligned_vector_push_back(target->extras, newExtras, 3); } else { last->flags = VERTEX_CMD_EOL; } @@ -161,40 +193,39 @@ static inline void markDead(ClipVertex* vert) { #define B011 3 #define B110 6 -void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade) { - /* Room for clipping 16 triangles */ - typedef struct { - ClipVertex vertex[3]; - uint8_t visible; - } Triangle; +#define MAX_CLIP_TRIANGLES 255 - static Triangle TO_CLIP[256]; +void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { + static Triangle TO_CLIP[MAX_CLIP_TRIANGLES]; static uint8_t CLIP_COUNT = 0; CLIP_COUNT = 0; - uint32_t i = 0; - /* Skip the header */ + ClipVertex* vertex = _glSubmissionTargetStart(target); + const ClipVertex* end = _glSubmissionTargetEnd(target); + const ClipVertex* start = vertex; - assert(offset < vertices->size); - ClipVertex* header = (ClipVertex*) aligned_vector_at(vertices, offset); - ClipVertex* vertex = header + 1; + int32_t triangle = -1; - uint32_t count = vertices->size - offset; + /* Go to the (potential) end of the first triangle */ + vertex++; - int32_t triangle = 0; + uint32_t vi1, vi2, vi3; - /* Start at 3 due to the header */ - for(i = 3; i < count; ++i, ++triangle) { - assert(offset + i < vertices->size); - - vertex = aligned_vector_at(vertices, offset + i); + while(vertex < end) { + vertex++; + triangle++; uint8_t even = (triangle % 2) == 0; ClipVertex* v1 = (even) ? vertex - 2 : vertex - 1; ClipVertex* v2 = (even) ? vertex - 1 : vertex - 2; ClipVertex* v3 = vertex; + /* Indexes into extras array */ + vi1 = v1 - start; + vi2 = v2 - start; + vi3 = v3 - start; + /* Skip ahead if we don't have a complete triangle yet */ if(v1->flags != VERTEX_CMD || v2->flags != VERTEX_CMD) { triangle = -1; @@ -249,10 +280,17 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS case B101: case B011: case B110: + assert(CLIP_COUNT < MAX_CLIP_TRIANGLES); + /* Store the triangle for clipping */ TO_CLIP[CLIP_COUNT].vertex[0] = *v1; TO_CLIP[CLIP_COUNT].vertex[1] = *v2; TO_CLIP[CLIP_COUNT].vertex[2] = *v3; + + TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi1); + TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2); + TO_CLIP[CLIP_COUNT].extra[2] = *(VertexExtra*) aligned_vector_at(target->extras, vi3); + TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; @@ -287,6 +325,15 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS triangle = -1; } else { + /* FIXME: ? + * This situation doesn't actually seem possible, we always clip from one end + * of the triangle strip to the other, so we're never going to hit the plane in the + * middle of the strip (with previous/next unhandled tris). + * + * Uncomment if this actually happens */ + assert(0 && "Not Implemented (see comment)"); + + /* ClipVertex* v4 = vertex + 1; TO_CLIP[CLIP_COUNT].vertex[0] = *v3; @@ -298,26 +345,24 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; - /* Restart strip */ + // Restart strip triangle = -1; - /* Mark the second vertex as the end of the strip */ + // Mark the second vertex as the end of the strip (vertex - 1)->flags = VERTEX_CMD_EOL; if(v4->flags == VERTEX_CMD_EOL) { markDead(vertex); markDead(v4); } else { - /* Swap the next vertices to start a new strip */ + // Swap the next vertices to start a new strip ClipVertex tmp = *vertex; *vertex = *v4; *v4 = tmp; vertex->flags = VERTEX_CMD; v4->flags = VERTEX_CMD; - } - - i += 1; + } */ } break; default: @@ -326,7 +371,8 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS } /* Now, clip all the triangles and append them to the output */ + GLushort i; for(i = 0; i < CLIP_COUNT; ++i) { - clipTriangle(TO_CLIP[i].vertex, TO_CLIP[i].visible, vertices, fladeShade); + _glClipTriangle(&TO_CLIP[i], TO_CLIP[i].visible, target, fladeShade); } } diff --git a/GL/clip.h b/GL/clip.h deleted file mode 100644 index ce20ee6..0000000 --- a/GL/clip.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef CLIP_H -#define CLIP_H - -#include - -#include "../containers/aligned_vector.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - CLIP_RESULT_ALL_IN_FRONT, - CLIP_RESULT_ALL_BEHIND, - CLIP_RESULT_ALL_ON_PLANE, - CLIP_RESULT_FRONT_TO_BACK, - CLIP_RESULT_BACK_TO_FRONT -} ClipResult; - - -#define A8IDX 3 -#define R8IDX 2 -#define G8IDX 1 -#define B8IDX 0 - - -typedef struct { - /* Same 32 byte layout as pvr_vertex_t */ - uint32_t flags; - float xyz[3]; - float uv[2]; - uint8_t bgra[4]; - uint32_t oargb; - - /* Important, we have 24 bytes here. That means when submitting to the SQs we need to - * increment the pointer by 6 */ - float nxyz[3]; /* Normal */ - float w; - float st[2]; -} ClipVertex; - -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); -void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade); - -#ifdef __cplusplus -} -#endif - -#endif // CLIP_H diff --git a/GL/draw.c b/GL/draw.c index e8e78cf..f6b2b3a 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -81,7 +81,21 @@ static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride } } -static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { +/* VE == VertexExtra */ +static void _readVertexData3f3fVE(const float* input, GLuint count, GLubyte stride, float* output) { + const float* end = (float*) (((GLubyte*) input) + (count * stride)); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (float*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + +static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); while(input < end) { @@ -94,7 +108,20 @@ static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte st } } -static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { +static void _readVertexData3us3fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + +static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); while(input < end) { @@ -107,6 +134,19 @@ static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stri } } +static void _readVertexData3ui3fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; const GLubyte* end = ((GLubyte*) input) + (count * stride); @@ -121,6 +161,20 @@ static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte str } } +static void _readVertexData3ub3fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; + + input += stride; + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride, float* output) { const float* end = (float*) ((GLubyte*) input) + (count * stride); @@ -133,6 +187,18 @@ static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride } } +static void _readVertexData2f2fVE(const float* input, GLuint count, GLubyte stride, GLfloat* output) { + const float* end = (float*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (float*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride, float* output) { const float* end = (float*) ((GLubyte*) input) + (count * stride); @@ -185,6 +251,18 @@ static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte st } } +static void _readVertexData2us2fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stride, float* output) { const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); @@ -197,6 +275,18 @@ static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stri } } +static void _readVertexData2ui2fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); @@ -210,6 +300,19 @@ static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte str } } +static void _readVertexData2ub2fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + + input = (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); @@ -279,13 +382,12 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s } } -static void _fillWithNegZ(GLuint count, GLfloat* output) { - const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (sizeof(ClipVertex) * count); +static void _fillWithNegZVE(GLuint count, GLfloat* output) { + const GLfloat* end = output + (count * 3); while(output < end) { output[0] = output[1] = 0.0f; output[2] = -1.0f; - - output += sizeof(ClipVertex); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } @@ -303,10 +405,18 @@ static void _fillWhiteARGB(GLuint count, GLubyte* output) { } static void _fillZero2f(GLuint count, GLfloat* output) { - const GLfloat* end = output + (sizeof(ClipVertex) * count); + const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (count * sizeof(ClipVertex)); while(output < end) { output[0] = output[1] = 0.0f; - output += sizeof(ClipVertex); + output = (GLfloat*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _fillZero2fVE(GLuint count, GLfloat* output) { + const GLfloat* end = output + (2 * count); + while(output < end) { + output[0] = output[1] = 0.0f; + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } @@ -433,6 +543,18 @@ do { \ *b = temp; \ } while(0) +PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { + return aligned_vector_at(&target->output->vector, target->header_offset); +} + +ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target) { + return aligned_vector_at(&target->output->vector, target->start_offset); +} + +ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target) { + return _glSubmissionTargetStart(target) + target->count; +} + static inline void genTriangles(ClipVertex* output, GLuint count) { const ClipVertex* end = output + count; ClipVertex* it = output + 2; @@ -571,9 +693,10 @@ static inline void _readUVData(const GLuint first, const GLuint count, ClipVerte } } -static inline void _readSTData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readSTData(const GLuint first, const GLuint count, SubmissionTarget* target) { if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { - _fillZero2f(count, output->st); + VertexExtra* extra = aligned_vector_at(target->extras, 0); + _fillZero2fVE(count, extra->st); return; } @@ -581,21 +704,22 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride)); if(ST_POINTER.size == 2) { + VertexExtra* extra = aligned_vector_at(target->extras, 0); switch(ST_POINTER.type) { case GL_FLOAT: - _readVertexData2f2f(stptr, count, ststride, output[0].st); + _readVertexData2f2fVE(stptr, count, ststride, extra->st); break; case GL_BYTE: case GL_UNSIGNED_BYTE: - _readVertexData2ub2f(stptr, count, ststride, output[0].st); + _readVertexData2ub2fVE(stptr, count, ststride, extra->st); break; case GL_SHORT: case GL_UNSIGNED_SHORT: - _readVertexData2us2f(stptr, count, ststride, output[0].st); + _readVertexData2us2fVE(stptr, count, ststride, extra->st); break; case GL_INT: case GL_UNSIGNED_INT: - _readVertexData2ui2f(stptr, count, ststride, output[0].st); + _readVertexData2ui2fVE(stptr, count, ststride, extra->st); break; default: assert(0 && "Not Implemented"); @@ -605,9 +729,10 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte } } -static inline void _readNormalData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readNormalData(const GLuint first, const GLuint count, SubmissionTarget* target) { if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { - _fillWithNegZ(count, output->nxyz); + VertexExtra* extra = aligned_vector_at(target->extras, 0); + _fillWithNegZVE(count, extra->nxyz); return; } @@ -615,21 +740,22 @@ static inline void _readNormalData(const GLuint first, const GLuint count, ClipV const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride)); if(NORMAL_POINTER.size == 3) { + VertexExtra* extra = aligned_vector_at(target->extras, 0); switch(NORMAL_POINTER.type) { case GL_FLOAT: - _readVertexData3f3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3f3fVE(nptr, count, nstride, extra->nxyz); break; case GL_BYTE: case GL_UNSIGNED_BYTE: - _readVertexData3ub3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3ub3fVE(nptr, count, nstride, extra->nxyz); break; case GL_SHORT: case GL_UNSIGNED_SHORT: - _readVertexData3us3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3us3fVE(nptr, count, nstride, extra->nxyz); break; case GL_INT: case GL_UNSIGNED_INT: - _readVertexData3ui3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3ui3fVE(nptr, count, nstride, extra->nxyz); break; default: assert(0 && "Not Implemented"); @@ -694,7 +820,7 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Clip } } -static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLuint count, +static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count, const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) { /* Read from the client buffers and generate an array of ClipVertices */ @@ -703,14 +829,15 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const ClipVertex* end; if(!indices) { - _readPositionData(first, count, output); - _readDiffuseData(first, count, output); - if(doTexture) _readUVData(first, count, output); - if(doLighting) _readNormalData(first, count, output); - if(doTexture && doMultitexture) _readSTData(first, count, output); + _readPositionData(first, count, _glSubmissionTargetStart(target)); + _readDiffuseData(first, count, _glSubmissionTargetStart(target)); + if(doTexture) _readUVData(first, count, _glSubmissionTargetStart(target)); + if(doLighting) _readNormalData(first, count, target); + if(doTexture && doMultitexture) _readSTData(first, count, target); + + it = _glSubmissionTargetStart(target); + end = _glSubmissionTargetEnd(target); - it = output; - end = output + count; while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } @@ -718,25 +845,26 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(output, count); + genTriangles(_glSubmissionTargetStart(target), count); break; case GL_QUADS: - genQuads(output, count); + genQuads(_glSubmissionTargetStart(target), count); break; case GL_POLYGON: case GL_TRIANGLE_FAN: - genTriangleFan(output, count); + genTriangleFan(_glSubmissionTargetStart(target), count); break; case GL_TRIANGLE_STRIP: - genTriangleStrip(output, count); + genTriangleStrip(_glSubmissionTargetStart(target), count); break; default: assert(0 && "Not Implemented"); } } else { const IndexParseFunc indexFunc = _calcParseIndexFunc(type); - it = output; - end = output + count; + it = _glSubmissionTargetStart(target); + end = _glSubmissionTargetEnd(target); + GLuint j; const GLubyte* idx = indices; while(it < end) { @@ -744,31 +872,33 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, _readPositionData(j, 1, it); _readDiffuseData(j, 1, it); if(doTexture) _readUVData(j, 1, it); - if(doLighting) _readNormalData(j, 1, it); - if(doTexture && doMultitexture) _readSTData(j, 1, it); + //FIXME: Need to think about how we can share this */ + //if(doLighting) _readNormalData(j, 1, it); + //if(doTexture && doMultitexture) _readSTData(j, 1, it); ++it; idx += istride; } - it = output; + it = _glSubmissionTargetStart(target); while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } + it = _glSubmissionTargetStart(target); // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(output, count); + genTriangles(it, count); break; case GL_QUADS: - genQuads(output, count); + genQuads(it, count); break; case GL_POLYGON: case GL_TRIANGLE_FAN: - genTriangleFan(output, count); + genTriangleFan(it, count); break; case GL_TRIANGLE_STRIP: - genTriangleStrip(output, count); + genTriangleStrip(it, count); break; default: assert(0 && "Not Implemented"); @@ -776,15 +906,14 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, } } -static void transform(ClipVertex* output, const GLuint count) { +static void transform(SubmissionTarget* target) { /* Perform modelview transform, storing W */ - - ClipVertex* vertex = output; + ClipVertex* vertex = _glSubmissionTargetStart(target); + const ClipVertex* end = _glSubmissionTargetEnd(target); _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ - GLsizei i = count; - while(i--) { + while(vertex < end) { register float __x __asm__("fr12") = (vertex->xyz[0]); register float __y __asm__("fr13") = (vertex->xyz[1]); register float __z __asm__("fr14") = (vertex->xyz[2]); @@ -801,17 +930,16 @@ static void transform(ClipVertex* output, const GLuint count) { vertex->xyz[1] = __y; vertex->xyz[2] = __z; vertex->w = __w; - ++vertex; } } -static GLsizei clip(AlignedVector* polylist, uint32_t offset, const GLuint count) { +static void clip(SubmissionTarget* target) { /* Perform clipping, generating new vertices as necessary */ - clipTriangleStrip2(polylist, offset, _glGetShadeModel() == GL_FLAT); + _glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT); - /* List size, minus the original offset (which includes the header), minus the header */ - return polylist->size - offset - 1; + /* Reset the count now that we may have added vertices */ + target->count = target->output->vector.size - target->start_offset; } static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { @@ -846,7 +974,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u } } -static void light(ClipVertex* output, const GLuint count) { +static void light(SubmissionTarget* target) { if(!_glIsLightingEnabled()) { return; } @@ -863,22 +991,23 @@ static void light(ClipVertex* output, const GLuint count) { aligned_vector_init(eye_space_data, sizeof(EyeSpaceData)); } - aligned_vector_resize(eye_space_data, count); + aligned_vector_resize(eye_space_data, target->count); /* Perform lighting calculations and manipulate the colour */ - ClipVertex* vertex = output; + ClipVertex* vertex = _glSubmissionTargetStart(target); + VertexExtra* extra = aligned_vector_at(target->extras, 0); EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data; _glMatrixLoadModelView(); - mat_transform3(vertex->xyz, eye_space->xyz, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(ClipVertex), sizeof(EyeSpaceData)); _glMatrixLoadNormal(); - mat_transform_normal3(vertex->nxyz, eye_space->n, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData)); GLsizei i; EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0); - for(i = 0; i < count; ++i, ++vertex, ++ES) { + for(i = 0; i < target->count; ++i, ++vertex, ++ES) { /* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled * then the lighting calculation should possibly take it into account */ @@ -903,12 +1032,12 @@ static void light(ClipVertex* output, const GLuint count) { } } -static void divide(ClipVertex* output, const GLuint count) { +static void divide(SubmissionTarget* target) { /* Perform perspective divide on each vertex */ - ClipVertex* vertex = output; + ClipVertex* vertex = _glSubmissionTargetStart(target); + const ClipVertex* end = _glSubmissionTargetEnd(target); - GLsizei i = count; - while(i--) { + while(vertex < end) { vertex->xyz[2] = 1.0f / vertex->w; vertex->xyz[0] *= vertex->xyz[2]; vertex->xyz[1] *= vertex->xyz[2]; @@ -926,12 +1055,17 @@ static void push(PVRHeader* header, ClipVertex* output, const GLuint count, Poly pvr_poly_compile(&header->hdr, &cxt); /* Post-process the vertex list */ + /* + * This is currently unnecessary. aligned_vector memsets the allocated objects + * to zero, and we don't touch oargb, also, we don't *enable* oargb yet in the + * pvr header so it should be ignored anyway. If this ever becomes a problem, + * uncomment this. ClipVertex* vout = output; - - GLuint i = count; - while(i--) { + const ClipVertex* end = output + count; + while(vout < end) { vout->oargb = 0; } + */ } #define DEBUG_CLIPPING 0 @@ -942,6 +1076,21 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type return; } + static SubmissionTarget* target = NULL; + static AlignedVector extras; + + /* Initialization of the target and extras */ + if(!target) { + target = (SubmissionTarget*) malloc(sizeof(SubmissionTarget)); + target->extras = NULL; + target->count = 0; + target->output = NULL; + target->header_offset = target->start_offset = 0; + + aligned_vector_init(&extras, sizeof(VertexExtra)); + target->extras = &extras; + } + GLboolean doMultitexture, doTexture, doLighting; GLint activeTexture; glGetIntegerv(GL_ACTIVE_TEXTURE_ARB, &activeTexture); @@ -958,39 +1107,32 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type profiler_push(__func__); + target->output = _glActivePolyList(); + target->count = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + target->header_offset = target->output->vector.size; + target->start_offset = target->header_offset + 1; - PolyList* activeList = _glActivePolyList(); + /* Make sure we have enough room for all the "extra" data */ + aligned_vector_resize(&extras, target->count); - /* Make room in the list buffer */ - GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; - ClipVertex* start = aligned_vector_extend(&activeList->vector, spaceNeeded + 1); - - /* Store a pointer to the header for later */ - PVRHeader* header = (PVRHeader*) start++; - - /* We store an offset to the first ClipVertex because clipping may generate more - * vertices, which may cause a realloc and thus invalidate start and header - * we use this startOffset to reset those pointers after clipping */ - uint32_t startOffset = start - (ClipVertex*) activeList->vector.data; + /* Make room for the vertices and header */ + aligned_vector_extend(&target->output->vector, target->count + 1); profiler_checkpoint("allocate"); - generate(start, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); + generate(target, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); profiler_checkpoint("generate"); - light(start, spaceNeeded); + light(target); profiler_checkpoint("light"); - transform(start, spaceNeeded); + transform(target); profiler_checkpoint("transform"); if(_glIsClippingEnabled()) { - - uint32_t offset = ((start - 1) - (ClipVertex*) activeList->vector.data); - #if DEBUG_CLIPPING uint32_t i = 0; fprintf(stderr, "=========\n"); @@ -1005,11 +1147,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type } #endif - spaceNeeded = clip(&activeList->vector, offset, spaceNeeded); - - /* Clipping may have realloc'd so reset the start pointer */ - start = ((ClipVertex*) activeList->vector.data) + startOffset; - header = (PVRHeader*) (start - 1); /* Update the header pointer */ + clip(target); #if DEBUG_CLIPPING fprintf(stderr, "--------\n"); @@ -1027,11 +1165,11 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type profiler_checkpoint("clip"); - divide(start, spaceNeeded); + divide(target); profiler_checkpoint("divide"); - push(header, start, spaceNeeded, _glActivePolyList(), 0); + push(_glSubmissionTargetHeader(target), _glSubmissionTargetStart(target), target->count, _glActivePolyList(), 0); profiler_checkpoint("push"); /* @@ -1042,36 +1180,37 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type - We want to set the uv coordinates to the passed st ones */ + TextureObject* texture1 = _glGetTexture1(); + + /* Multitexture implicitly disabled */ + if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { + doMultitexture = GL_FALSE; + } + if(!doMultitexture) { /* Multitexture actively disabled */ profiler_pop(); return; } - TextureObject* texture1 = _glGetTexture1(); - - if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { - /* Multitexture implicitly disabled */ - profiler_pop(); - return; - } - /* Push back a copy of the list to the transparent poly list, including the header - (hence the - 1) + (hence the + 1) */ ClipVertex* vertex = aligned_vector_push_back( - &_glTransparentPolyList()->vector, start - 1, spaceNeeded + 1 + &_glTransparentPolyList()->vector, (ClipVertex*) _glSubmissionTargetHeader(target), target->count + 1 ); PVRHeader* mtHeader = (PVRHeader*) vertex++; ClipVertex* mtStart = vertex; - /* Copy ST coordinates to UV ones */ - GLsizei i = spaceNeeded; - while(i--) { - vertex->uv[0] = vertex->st[0]; - vertex->uv[1] = vertex->st[1]; + /* Replace the UV coordinates with the ST ones */ + const VertexExtra* end = aligned_vector_back(target->extras) + 1; + VertexExtra* ve = aligned_vector_at(target->extras, 0); + while(ve < end) { + vertex->uv[0] = ve->st[0]; + vertex->uv[1] = ve->st[1]; ++vertex; + ++ve; } /* Store state, as we're about to mess around with it */ @@ -1088,7 +1227,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); /* Send the buffer again to the transparent list */ - push(mtHeader, mtStart, spaceNeeded, _glTransparentPolyList(), 1); + push(mtHeader, mtStart, target->count, _glTransparentPolyList(), 1); /* Reset state */ glDepthFunc(depthFunc); diff --git a/GL/flush.c b/GL/flush.c index 74a1f04..46c0756 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -31,7 +31,6 @@ static void pvr_list_submit(void *src, int n) { d[7] = *(s++); __asm__("pref @%0" : : "r"(d)); d += 8; - s += CLIP_VERTEX_INT_PADDING; } /* Wait for both store queues to complete */ diff --git a/GL/private.h b/GL/private.h index bdb6038..87b6d5c 100644 --- a/GL/private.h +++ b/GL/private.h @@ -1,10 +1,11 @@ #ifndef PRIVATE_H #define PRIVATE_H +#include + #include "../include/gl.h" #include "../containers/aligned_vector.h" #include "../containers/named_array.h" -#include "./clip.h" #define TRACE_ENABLED 0 #define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);} @@ -17,11 +18,8 @@ #define MAX_TEXTURE_SIZE 1024 -#define CLIP_VERTEX_INT_PADDING 6 - typedef struct { pvr_poly_hdr_t hdr; - unsigned int padding[CLIP_VERTEX_INT_PADDING]; } PVRHeader; typedef struct { @@ -31,9 +29,6 @@ typedef struct { sy, /* Start y */ ex, /* End x */ ey; /* End y */ - - /* Padding to match clip vertex */ - unsigned int padding[CLIP_VERTEX_INT_PADDING]; } PVRTileClipCommand; /* Tile Clip command for the pvr */ typedef struct { @@ -97,6 +92,62 @@ typedef struct { GLboolean is_directional; } LightSource; +typedef struct { + /* Same 32 byte layout as pvr_vertex_t */ + uint32_t flags; + float xyz[3]; + float uv[2]; + uint8_t bgra[4]; + + /* In the pvr_vertex_t structure, this next 4 bytes is oargb + * but we're not using that for now, so having W here makes the code + * simpler */ + float w; +} ClipVertex; + +/* ClipVertex doesn't have room for these, so we need to parse them + * out separately. Potentially 'w' will be housed here if we support oargb */ +typedef struct { + float nxyz[3]; + float st[2]; +} VertexExtra; + +/* Generating PVR vertices from the user-submitted data gets complicated, particularly + * when a realloc could invalidate pointers. This structure holds all the information + * we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.) + */ +typedef struct { + PolyList* output; + uint32_t header_offset; // The offset of the header in the output list + uint32_t start_offset; // The offset into the output list + uint32_t count; // The number of vertices in this output + + /* Pointer to count * VertexExtra; */ + AlignedVector* extras; +} SubmissionTarget; + +PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target); +ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target); +ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target); + +typedef enum { + CLIP_RESULT_ALL_IN_FRONT, + CLIP_RESULT_ALL_BEHIND, + CLIP_RESULT_ALL_ON_PLANE, + CLIP_RESULT_FRONT_TO_BACK, + CLIP_RESULT_BACK_TO_FRONT +} ClipResult; + + +#define A8IDX 3 +#define R8IDX 2 +#define G8IDX 1 +#define B8IDX 0 + +struct SubmissionTarget; + +void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); +void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade); PolyList *_glActivePolyList(); PolyList *_glTransparentPolyList();