From 5e6927d9a16869d3695d2ec197a60fea3508084c Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 24 Mar 2019 08:09:02 +0000 Subject: [PATCH 01/33] Huge refactor of the drawing code --- GL/clip.c | 124 ++++++++++++------ GL/clip.h | 49 -------- GL/draw.c | 349 +++++++++++++++++++++++++++++++++++---------------- GL/flush.c | 1 - GL/private.h | 65 ++++++++-- 5 files changed, 387 insertions(+), 201 deletions(-) delete mode 100644 GL/clip.h diff --git a/GL/clip.c b/GL/clip.c index c578192..a0c61bf 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _arch_dreamcast #include @@ -9,7 +10,7 @@ #endif #include "profiler.h" -#include "clip.h" +#include "private.h" #include "../containers/aligned_vector.h" @@ -23,8 +24,8 @@ void _glEnableClipping(unsigned char v) { ZCLIP_ENABLED = v; } -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { +void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); +void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow *t = (NEAR_PLANE - v1->w) / (v2->w - v1->w); @@ -73,13 +74,22 @@ static inline void interpolateColour(const uint8_t* v1, const uint8_t* v2, const const uint32_t VERTEX_CMD_EOL = 0xf0000000; const uint32_t VERTEX_CMD = 0xe0000000; -void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) __attribute__((optimize("fast-math"))); -void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) { - uint8_t i, c = 0; +typedef struct { + ClipVertex vertex[3]; + VertexExtra extra[3]; + uint8_t visible; +} Triangle; +void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) __attribute__((optimize("fast-math"))); +void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) { + uint8_t i, c = 0; uint8_t lastVisible = 255; ClipVertex* last = NULL; + VertexExtra* veLast = NULL; + + const ClipVertex* vertices = triangle->vertex; + const VertexExtra* extras = triangle->extra; /* Used when flat shading is enabled */ uint32_t finalColour = *((uint32_t*) vertices[2].bgra); @@ -88,6 +98,8 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect uint8_t thisIndex = (i == 3) ? 0 : i; ClipVertex next; + VertexExtra veNext; + next.flags = VERTEX_CMD; uint8_t thisVisible = (visible & (1 << (2 - thisIndex))) > 0; @@ -97,13 +109,18 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect if(lastVisible < 255 && lastVisible != thisVisible) { const ClipVertex* v1 = &vertices[lastIndex]; const ClipVertex* v2 = &vertices[thisIndex]; + + const VertexExtra* ve1 = &extras[lastIndex]; + const VertexExtra* ve2 = &extras[thisIndex]; + float t; - clipLineToNearZ(v1, v2, &next, &t); + _glClipLineToNearZ(v1, v2, &next, &t); interpolateFloat(v1->w, v2->w, t, &next.w); - interpolateVec3(v1->nxyz, v2->nxyz, t, next.nxyz); interpolateVec2(v1->uv, v2->uv, t, next.uv); - interpolateVec2(v1->st, v2->st, t, next.st); + + interpolateVec3(ve1->nxyz, ve2->nxyz, t, veNext.nxyz); + interpolateVec2(ve1->st, ve2->st, t, veNext.st); if(flatShade) { *((uint32_t*) next.bgra) = finalColour; @@ -111,15 +128,22 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect interpolateColour(v1->bgra, v2->bgra, t, next.bgra); } - last = aligned_vector_push_back(output, &next, 1); + /* Push back the new vertices to the end of both the ClipVertex and VertexExtra lists */ + last = aligned_vector_push_back(&target->output->vector, &next, 1); last->flags = VERTEX_CMD; + + veLast = aligned_vector_push_back(target->extras, &veNext, 1); + ++c; } } if(thisVisible && i != 3) { - last = aligned_vector_push_back(output, &vertices[thisIndex], 1); + last = aligned_vector_push_back(&target->output->vector, &vertices[thisIndex], 1); last->flags = VERTEX_CMD; + + veLast = aligned_vector_push_back(target->extras, &extras[thisIndex], 1); + ++c; } @@ -134,13 +158,21 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect newVerts[1] = *(last - 1); newVerts[2] = *(last); + VertexExtra newExtras[3]; + newExtras[0] = *(veLast - 3); + newExtras[1] = *(veLast - 1); + newExtras[2] = *(veLast); + (last - 1)->flags = VERTEX_CMD_EOL; newVerts[0].flags = VERTEX_CMD; newVerts[1].flags = VERTEX_CMD; newVerts[2].flags = VERTEX_CMD_EOL; - aligned_vector_resize(output, output->size - 1); - aligned_vector_push_back(output, newVerts, 3); + aligned_vector_resize(&target->output->vector, target->output->vector.size - 1); + aligned_vector_push_back(&target->output->vector, newVerts, 3); + + aligned_vector_resize(target->extras, target->extras->size - 1); + aligned_vector_push_back(target->extras, newExtras, 3); } else { last->flags = VERTEX_CMD_EOL; } @@ -161,40 +193,39 @@ static inline void markDead(ClipVertex* vert) { #define B011 3 #define B110 6 -void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade) { - /* Room for clipping 16 triangles */ - typedef struct { - ClipVertex vertex[3]; - uint8_t visible; - } Triangle; +#define MAX_CLIP_TRIANGLES 255 - static Triangle TO_CLIP[256]; +void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { + static Triangle TO_CLIP[MAX_CLIP_TRIANGLES]; static uint8_t CLIP_COUNT = 0; CLIP_COUNT = 0; - uint32_t i = 0; - /* Skip the header */ + ClipVertex* vertex = _glSubmissionTargetStart(target); + const ClipVertex* end = _glSubmissionTargetEnd(target); + const ClipVertex* start = vertex; - assert(offset < vertices->size); - ClipVertex* header = (ClipVertex*) aligned_vector_at(vertices, offset); - ClipVertex* vertex = header + 1; + int32_t triangle = -1; - uint32_t count = vertices->size - offset; + /* Go to the (potential) end of the first triangle */ + vertex++; - int32_t triangle = 0; + uint32_t vi1, vi2, vi3; - /* Start at 3 due to the header */ - for(i = 3; i < count; ++i, ++triangle) { - assert(offset + i < vertices->size); - - vertex = aligned_vector_at(vertices, offset + i); + while(vertex < end) { + vertex++; + triangle++; uint8_t even = (triangle % 2) == 0; ClipVertex* v1 = (even) ? vertex - 2 : vertex - 1; ClipVertex* v2 = (even) ? vertex - 1 : vertex - 2; ClipVertex* v3 = vertex; + /* Indexes into extras array */ + vi1 = v1 - start; + vi2 = v2 - start; + vi3 = v3 - start; + /* Skip ahead if we don't have a complete triangle yet */ if(v1->flags != VERTEX_CMD || v2->flags != VERTEX_CMD) { triangle = -1; @@ -249,10 +280,17 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS case B101: case B011: case B110: + assert(CLIP_COUNT < MAX_CLIP_TRIANGLES); + /* Store the triangle for clipping */ TO_CLIP[CLIP_COUNT].vertex[0] = *v1; TO_CLIP[CLIP_COUNT].vertex[1] = *v2; TO_CLIP[CLIP_COUNT].vertex[2] = *v3; + + TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi1); + TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2); + TO_CLIP[CLIP_COUNT].extra[2] = *(VertexExtra*) aligned_vector_at(target->extras, vi3); + TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; @@ -287,6 +325,15 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS triangle = -1; } else { + /* FIXME: ? + * This situation doesn't actually seem possible, we always clip from one end + * of the triangle strip to the other, so we're never going to hit the plane in the + * middle of the strip (with previous/next unhandled tris). + * + * Uncomment if this actually happens */ + assert(0 && "Not Implemented (see comment)"); + + /* ClipVertex* v4 = vertex + 1; TO_CLIP[CLIP_COUNT].vertex[0] = *v3; @@ -298,26 +345,24 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; - /* Restart strip */ + // Restart strip triangle = -1; - /* Mark the second vertex as the end of the strip */ + // Mark the second vertex as the end of the strip (vertex - 1)->flags = VERTEX_CMD_EOL; if(v4->flags == VERTEX_CMD_EOL) { markDead(vertex); markDead(v4); } else { - /* Swap the next vertices to start a new strip */ + // Swap the next vertices to start a new strip ClipVertex tmp = *vertex; *vertex = *v4; *v4 = tmp; vertex->flags = VERTEX_CMD; v4->flags = VERTEX_CMD; - } - - i += 1; + } */ } break; default: @@ -326,7 +371,8 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS } /* Now, clip all the triangles and append them to the output */ + GLushort i; for(i = 0; i < CLIP_COUNT; ++i) { - clipTriangle(TO_CLIP[i].vertex, TO_CLIP[i].visible, vertices, fladeShade); + _glClipTriangle(&TO_CLIP[i], TO_CLIP[i].visible, target, fladeShade); } } diff --git a/GL/clip.h b/GL/clip.h deleted file mode 100644 index ce20ee6..0000000 --- a/GL/clip.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef CLIP_H -#define CLIP_H - -#include - -#include "../containers/aligned_vector.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - CLIP_RESULT_ALL_IN_FRONT, - CLIP_RESULT_ALL_BEHIND, - CLIP_RESULT_ALL_ON_PLANE, - CLIP_RESULT_FRONT_TO_BACK, - CLIP_RESULT_BACK_TO_FRONT -} ClipResult; - - -#define A8IDX 3 -#define R8IDX 2 -#define G8IDX 1 -#define B8IDX 0 - - -typedef struct { - /* Same 32 byte layout as pvr_vertex_t */ - uint32_t flags; - float xyz[3]; - float uv[2]; - uint8_t bgra[4]; - uint32_t oargb; - - /* Important, we have 24 bytes here. That means when submitting to the SQs we need to - * increment the pointer by 6 */ - float nxyz[3]; /* Normal */ - float w; - float st[2]; -} ClipVertex; - -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); -void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade); - -#ifdef __cplusplus -} -#endif - -#endif // CLIP_H diff --git a/GL/draw.c b/GL/draw.c index e8e78cf..f6b2b3a 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -81,7 +81,21 @@ static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride } } -static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { +/* VE == VertexExtra */ +static void _readVertexData3f3fVE(const float* input, GLuint count, GLubyte stride, float* output) { + const float* end = (float*) (((GLubyte*) input) + (count * stride)); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (float*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + +static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); while(input < end) { @@ -94,7 +108,20 @@ static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte st } } -static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { +static void _readVertexData3us3fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + +static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); while(input < end) { @@ -107,6 +134,19 @@ static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stri } } +static void _readVertexData3ui3fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; const GLubyte* end = ((GLubyte*) input) + (count * stride); @@ -121,6 +161,20 @@ static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte str } } +static void _readVertexData3ub3fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; + + input += stride; + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride, float* output) { const float* end = (float*) ((GLubyte*) input) + (count * stride); @@ -133,6 +187,18 @@ static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride } } +static void _readVertexData2f2fVE(const float* input, GLuint count, GLubyte stride, GLfloat* output) { + const float* end = (float*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (float*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride, float* output) { const float* end = (float*) ((GLubyte*) input) + (count * stride); @@ -185,6 +251,18 @@ static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte st } } +static void _readVertexData2us2fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stride, float* output) { const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); @@ -197,6 +275,18 @@ static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stri } } +static void _readVertexData2ui2fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); @@ -210,6 +300,19 @@ static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte str } } +static void _readVertexData2ub2fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + + input = (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); @@ -279,13 +382,12 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s } } -static void _fillWithNegZ(GLuint count, GLfloat* output) { - const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (sizeof(ClipVertex) * count); +static void _fillWithNegZVE(GLuint count, GLfloat* output) { + const GLfloat* end = output + (count * 3); while(output < end) { output[0] = output[1] = 0.0f; output[2] = -1.0f; - - output += sizeof(ClipVertex); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } @@ -303,10 +405,18 @@ static void _fillWhiteARGB(GLuint count, GLubyte* output) { } static void _fillZero2f(GLuint count, GLfloat* output) { - const GLfloat* end = output + (sizeof(ClipVertex) * count); + const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (count * sizeof(ClipVertex)); while(output < end) { output[0] = output[1] = 0.0f; - output += sizeof(ClipVertex); + output = (GLfloat*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _fillZero2fVE(GLuint count, GLfloat* output) { + const GLfloat* end = output + (2 * count); + while(output < end) { + output[0] = output[1] = 0.0f; + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } @@ -433,6 +543,18 @@ do { \ *b = temp; \ } while(0) +PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { + return aligned_vector_at(&target->output->vector, target->header_offset); +} + +ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target) { + return aligned_vector_at(&target->output->vector, target->start_offset); +} + +ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target) { + return _glSubmissionTargetStart(target) + target->count; +} + static inline void genTriangles(ClipVertex* output, GLuint count) { const ClipVertex* end = output + count; ClipVertex* it = output + 2; @@ -571,9 +693,10 @@ static inline void _readUVData(const GLuint first, const GLuint count, ClipVerte } } -static inline void _readSTData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readSTData(const GLuint first, const GLuint count, SubmissionTarget* target) { if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { - _fillZero2f(count, output->st); + VertexExtra* extra = aligned_vector_at(target->extras, 0); + _fillZero2fVE(count, extra->st); return; } @@ -581,21 +704,22 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride)); if(ST_POINTER.size == 2) { + VertexExtra* extra = aligned_vector_at(target->extras, 0); switch(ST_POINTER.type) { case GL_FLOAT: - _readVertexData2f2f(stptr, count, ststride, output[0].st); + _readVertexData2f2fVE(stptr, count, ststride, extra->st); break; case GL_BYTE: case GL_UNSIGNED_BYTE: - _readVertexData2ub2f(stptr, count, ststride, output[0].st); + _readVertexData2ub2fVE(stptr, count, ststride, extra->st); break; case GL_SHORT: case GL_UNSIGNED_SHORT: - _readVertexData2us2f(stptr, count, ststride, output[0].st); + _readVertexData2us2fVE(stptr, count, ststride, extra->st); break; case GL_INT: case GL_UNSIGNED_INT: - _readVertexData2ui2f(stptr, count, ststride, output[0].st); + _readVertexData2ui2fVE(stptr, count, ststride, extra->st); break; default: assert(0 && "Not Implemented"); @@ -605,9 +729,10 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte } } -static inline void _readNormalData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readNormalData(const GLuint first, const GLuint count, SubmissionTarget* target) { if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { - _fillWithNegZ(count, output->nxyz); + VertexExtra* extra = aligned_vector_at(target->extras, 0); + _fillWithNegZVE(count, extra->nxyz); return; } @@ -615,21 +740,22 @@ static inline void _readNormalData(const GLuint first, const GLuint count, ClipV const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride)); if(NORMAL_POINTER.size == 3) { + VertexExtra* extra = aligned_vector_at(target->extras, 0); switch(NORMAL_POINTER.type) { case GL_FLOAT: - _readVertexData3f3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3f3fVE(nptr, count, nstride, extra->nxyz); break; case GL_BYTE: case GL_UNSIGNED_BYTE: - _readVertexData3ub3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3ub3fVE(nptr, count, nstride, extra->nxyz); break; case GL_SHORT: case GL_UNSIGNED_SHORT: - _readVertexData3us3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3us3fVE(nptr, count, nstride, extra->nxyz); break; case GL_INT: case GL_UNSIGNED_INT: - _readVertexData3ui3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3ui3fVE(nptr, count, nstride, extra->nxyz); break; default: assert(0 && "Not Implemented"); @@ -694,7 +820,7 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Clip } } -static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLuint count, +static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count, const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) { /* Read from the client buffers and generate an array of ClipVertices */ @@ -703,14 +829,15 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const ClipVertex* end; if(!indices) { - _readPositionData(first, count, output); - _readDiffuseData(first, count, output); - if(doTexture) _readUVData(first, count, output); - if(doLighting) _readNormalData(first, count, output); - if(doTexture && doMultitexture) _readSTData(first, count, output); + _readPositionData(first, count, _glSubmissionTargetStart(target)); + _readDiffuseData(first, count, _glSubmissionTargetStart(target)); + if(doTexture) _readUVData(first, count, _glSubmissionTargetStart(target)); + if(doLighting) _readNormalData(first, count, target); + if(doTexture && doMultitexture) _readSTData(first, count, target); + + it = _glSubmissionTargetStart(target); + end = _glSubmissionTargetEnd(target); - it = output; - end = output + count; while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } @@ -718,25 +845,26 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(output, count); + genTriangles(_glSubmissionTargetStart(target), count); break; case GL_QUADS: - genQuads(output, count); + genQuads(_glSubmissionTargetStart(target), count); break; case GL_POLYGON: case GL_TRIANGLE_FAN: - genTriangleFan(output, count); + genTriangleFan(_glSubmissionTargetStart(target), count); break; case GL_TRIANGLE_STRIP: - genTriangleStrip(output, count); + genTriangleStrip(_glSubmissionTargetStart(target), count); break; default: assert(0 && "Not Implemented"); } } else { const IndexParseFunc indexFunc = _calcParseIndexFunc(type); - it = output; - end = output + count; + it = _glSubmissionTargetStart(target); + end = _glSubmissionTargetEnd(target); + GLuint j; const GLubyte* idx = indices; while(it < end) { @@ -744,31 +872,33 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, _readPositionData(j, 1, it); _readDiffuseData(j, 1, it); if(doTexture) _readUVData(j, 1, it); - if(doLighting) _readNormalData(j, 1, it); - if(doTexture && doMultitexture) _readSTData(j, 1, it); + //FIXME: Need to think about how we can share this */ + //if(doLighting) _readNormalData(j, 1, it); + //if(doTexture && doMultitexture) _readSTData(j, 1, it); ++it; idx += istride; } - it = output; + it = _glSubmissionTargetStart(target); while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } + it = _glSubmissionTargetStart(target); // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(output, count); + genTriangles(it, count); break; case GL_QUADS: - genQuads(output, count); + genQuads(it, count); break; case GL_POLYGON: case GL_TRIANGLE_FAN: - genTriangleFan(output, count); + genTriangleFan(it, count); break; case GL_TRIANGLE_STRIP: - genTriangleStrip(output, count); + genTriangleStrip(it, count); break; default: assert(0 && "Not Implemented"); @@ -776,15 +906,14 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, } } -static void transform(ClipVertex* output, const GLuint count) { +static void transform(SubmissionTarget* target) { /* Perform modelview transform, storing W */ - - ClipVertex* vertex = output; + ClipVertex* vertex = _glSubmissionTargetStart(target); + const ClipVertex* end = _glSubmissionTargetEnd(target); _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ - GLsizei i = count; - while(i--) { + while(vertex < end) { register float __x __asm__("fr12") = (vertex->xyz[0]); register float __y __asm__("fr13") = (vertex->xyz[1]); register float __z __asm__("fr14") = (vertex->xyz[2]); @@ -801,17 +930,16 @@ static void transform(ClipVertex* output, const GLuint count) { vertex->xyz[1] = __y; vertex->xyz[2] = __z; vertex->w = __w; - ++vertex; } } -static GLsizei clip(AlignedVector* polylist, uint32_t offset, const GLuint count) { +static void clip(SubmissionTarget* target) { /* Perform clipping, generating new vertices as necessary */ - clipTriangleStrip2(polylist, offset, _glGetShadeModel() == GL_FLAT); + _glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT); - /* List size, minus the original offset (which includes the header), minus the header */ - return polylist->size - offset - 1; + /* Reset the count now that we may have added vertices */ + target->count = target->output->vector.size - target->start_offset; } static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { @@ -846,7 +974,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u } } -static void light(ClipVertex* output, const GLuint count) { +static void light(SubmissionTarget* target) { if(!_glIsLightingEnabled()) { return; } @@ -863,22 +991,23 @@ static void light(ClipVertex* output, const GLuint count) { aligned_vector_init(eye_space_data, sizeof(EyeSpaceData)); } - aligned_vector_resize(eye_space_data, count); + aligned_vector_resize(eye_space_data, target->count); /* Perform lighting calculations and manipulate the colour */ - ClipVertex* vertex = output; + ClipVertex* vertex = _glSubmissionTargetStart(target); + VertexExtra* extra = aligned_vector_at(target->extras, 0); EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data; _glMatrixLoadModelView(); - mat_transform3(vertex->xyz, eye_space->xyz, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(ClipVertex), sizeof(EyeSpaceData)); _glMatrixLoadNormal(); - mat_transform_normal3(vertex->nxyz, eye_space->n, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData)); GLsizei i; EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0); - for(i = 0; i < count; ++i, ++vertex, ++ES) { + for(i = 0; i < target->count; ++i, ++vertex, ++ES) { /* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled * then the lighting calculation should possibly take it into account */ @@ -903,12 +1032,12 @@ static void light(ClipVertex* output, const GLuint count) { } } -static void divide(ClipVertex* output, const GLuint count) { +static void divide(SubmissionTarget* target) { /* Perform perspective divide on each vertex */ - ClipVertex* vertex = output; + ClipVertex* vertex = _glSubmissionTargetStart(target); + const ClipVertex* end = _glSubmissionTargetEnd(target); - GLsizei i = count; - while(i--) { + while(vertex < end) { vertex->xyz[2] = 1.0f / vertex->w; vertex->xyz[0] *= vertex->xyz[2]; vertex->xyz[1] *= vertex->xyz[2]; @@ -926,12 +1055,17 @@ static void push(PVRHeader* header, ClipVertex* output, const GLuint count, Poly pvr_poly_compile(&header->hdr, &cxt); /* Post-process the vertex list */ + /* + * This is currently unnecessary. aligned_vector memsets the allocated objects + * to zero, and we don't touch oargb, also, we don't *enable* oargb yet in the + * pvr header so it should be ignored anyway. If this ever becomes a problem, + * uncomment this. ClipVertex* vout = output; - - GLuint i = count; - while(i--) { + const ClipVertex* end = output + count; + while(vout < end) { vout->oargb = 0; } + */ } #define DEBUG_CLIPPING 0 @@ -942,6 +1076,21 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type return; } + static SubmissionTarget* target = NULL; + static AlignedVector extras; + + /* Initialization of the target and extras */ + if(!target) { + target = (SubmissionTarget*) malloc(sizeof(SubmissionTarget)); + target->extras = NULL; + target->count = 0; + target->output = NULL; + target->header_offset = target->start_offset = 0; + + aligned_vector_init(&extras, sizeof(VertexExtra)); + target->extras = &extras; + } + GLboolean doMultitexture, doTexture, doLighting; GLint activeTexture; glGetIntegerv(GL_ACTIVE_TEXTURE_ARB, &activeTexture); @@ -958,39 +1107,32 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type profiler_push(__func__); + target->output = _glActivePolyList(); + target->count = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + target->header_offset = target->output->vector.size; + target->start_offset = target->header_offset + 1; - PolyList* activeList = _glActivePolyList(); + /* Make sure we have enough room for all the "extra" data */ + aligned_vector_resize(&extras, target->count); - /* Make room in the list buffer */ - GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; - ClipVertex* start = aligned_vector_extend(&activeList->vector, spaceNeeded + 1); - - /* Store a pointer to the header for later */ - PVRHeader* header = (PVRHeader*) start++; - - /* We store an offset to the first ClipVertex because clipping may generate more - * vertices, which may cause a realloc and thus invalidate start and header - * we use this startOffset to reset those pointers after clipping */ - uint32_t startOffset = start - (ClipVertex*) activeList->vector.data; + /* Make room for the vertices and header */ + aligned_vector_extend(&target->output->vector, target->count + 1); profiler_checkpoint("allocate"); - generate(start, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); + generate(target, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); profiler_checkpoint("generate"); - light(start, spaceNeeded); + light(target); profiler_checkpoint("light"); - transform(start, spaceNeeded); + transform(target); profiler_checkpoint("transform"); if(_glIsClippingEnabled()) { - - uint32_t offset = ((start - 1) - (ClipVertex*) activeList->vector.data); - #if DEBUG_CLIPPING uint32_t i = 0; fprintf(stderr, "=========\n"); @@ -1005,11 +1147,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type } #endif - spaceNeeded = clip(&activeList->vector, offset, spaceNeeded); - - /* Clipping may have realloc'd so reset the start pointer */ - start = ((ClipVertex*) activeList->vector.data) + startOffset; - header = (PVRHeader*) (start - 1); /* Update the header pointer */ + clip(target); #if DEBUG_CLIPPING fprintf(stderr, "--------\n"); @@ -1027,11 +1165,11 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type profiler_checkpoint("clip"); - divide(start, spaceNeeded); + divide(target); profiler_checkpoint("divide"); - push(header, start, spaceNeeded, _glActivePolyList(), 0); + push(_glSubmissionTargetHeader(target), _glSubmissionTargetStart(target), target->count, _glActivePolyList(), 0); profiler_checkpoint("push"); /* @@ -1042,36 +1180,37 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type - We want to set the uv coordinates to the passed st ones */ + TextureObject* texture1 = _glGetTexture1(); + + /* Multitexture implicitly disabled */ + if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { + doMultitexture = GL_FALSE; + } + if(!doMultitexture) { /* Multitexture actively disabled */ profiler_pop(); return; } - TextureObject* texture1 = _glGetTexture1(); - - if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { - /* Multitexture implicitly disabled */ - profiler_pop(); - return; - } - /* Push back a copy of the list to the transparent poly list, including the header - (hence the - 1) + (hence the + 1) */ ClipVertex* vertex = aligned_vector_push_back( - &_glTransparentPolyList()->vector, start - 1, spaceNeeded + 1 + &_glTransparentPolyList()->vector, (ClipVertex*) _glSubmissionTargetHeader(target), target->count + 1 ); PVRHeader* mtHeader = (PVRHeader*) vertex++; ClipVertex* mtStart = vertex; - /* Copy ST coordinates to UV ones */ - GLsizei i = spaceNeeded; - while(i--) { - vertex->uv[0] = vertex->st[0]; - vertex->uv[1] = vertex->st[1]; + /* Replace the UV coordinates with the ST ones */ + const VertexExtra* end = aligned_vector_back(target->extras) + 1; + VertexExtra* ve = aligned_vector_at(target->extras, 0); + while(ve < end) { + vertex->uv[0] = ve->st[0]; + vertex->uv[1] = ve->st[1]; ++vertex; + ++ve; } /* Store state, as we're about to mess around with it */ @@ -1088,7 +1227,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); /* Send the buffer again to the transparent list */ - push(mtHeader, mtStart, spaceNeeded, _glTransparentPolyList(), 1); + push(mtHeader, mtStart, target->count, _glTransparentPolyList(), 1); /* Reset state */ glDepthFunc(depthFunc); diff --git a/GL/flush.c b/GL/flush.c index 74a1f04..46c0756 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -31,7 +31,6 @@ static void pvr_list_submit(void *src, int n) { d[7] = *(s++); __asm__("pref @%0" : : "r"(d)); d += 8; - s += CLIP_VERTEX_INT_PADDING; } /* Wait for both store queues to complete */ diff --git a/GL/private.h b/GL/private.h index bdb6038..87b6d5c 100644 --- a/GL/private.h +++ b/GL/private.h @@ -1,10 +1,11 @@ #ifndef PRIVATE_H #define PRIVATE_H +#include + #include "../include/gl.h" #include "../containers/aligned_vector.h" #include "../containers/named_array.h" -#include "./clip.h" #define TRACE_ENABLED 0 #define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);} @@ -17,11 +18,8 @@ #define MAX_TEXTURE_SIZE 1024 -#define CLIP_VERTEX_INT_PADDING 6 - typedef struct { pvr_poly_hdr_t hdr; - unsigned int padding[CLIP_VERTEX_INT_PADDING]; } PVRHeader; typedef struct { @@ -31,9 +29,6 @@ typedef struct { sy, /* Start y */ ex, /* End x */ ey; /* End y */ - - /* Padding to match clip vertex */ - unsigned int padding[CLIP_VERTEX_INT_PADDING]; } PVRTileClipCommand; /* Tile Clip command for the pvr */ typedef struct { @@ -97,6 +92,62 @@ typedef struct { GLboolean is_directional; } LightSource; +typedef struct { + /* Same 32 byte layout as pvr_vertex_t */ + uint32_t flags; + float xyz[3]; + float uv[2]; + uint8_t bgra[4]; + + /* In the pvr_vertex_t structure, this next 4 bytes is oargb + * but we're not using that for now, so having W here makes the code + * simpler */ + float w; +} ClipVertex; + +/* ClipVertex doesn't have room for these, so we need to parse them + * out separately. Potentially 'w' will be housed here if we support oargb */ +typedef struct { + float nxyz[3]; + float st[2]; +} VertexExtra; + +/* Generating PVR vertices from the user-submitted data gets complicated, particularly + * when a realloc could invalidate pointers. This structure holds all the information + * we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.) + */ +typedef struct { + PolyList* output; + uint32_t header_offset; // The offset of the header in the output list + uint32_t start_offset; // The offset into the output list + uint32_t count; // The number of vertices in this output + + /* Pointer to count * VertexExtra; */ + AlignedVector* extras; +} SubmissionTarget; + +PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target); +ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target); +ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target); + +typedef enum { + CLIP_RESULT_ALL_IN_FRONT, + CLIP_RESULT_ALL_BEHIND, + CLIP_RESULT_ALL_ON_PLANE, + CLIP_RESULT_FRONT_TO_BACK, + CLIP_RESULT_BACK_TO_FRONT +} ClipResult; + + +#define A8IDX 3 +#define R8IDX 2 +#define G8IDX 1 +#define B8IDX 0 + +struct SubmissionTarget; + +void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); +void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade); PolyList *_glActivePolyList(); PolyList *_glTransparentPolyList(); From bd0ef4cba3d9b4c48b425ea3bfcef7d7b261a2a2 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 24 Mar 2019 14:09:52 +0000 Subject: [PATCH 02/33] Demo multitexturing in zclip, default to GL_MODULATE on multitexture --- GL/draw.c | 4 ++- samples/zclip/main.c | 56 ++++++++++++++++++++++------- samples/zclip/romdisk/lightmap.tga | Bin 0 -> 4140 bytes 3 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 samples/zclip/romdisk/lightmap.tga diff --git a/GL/draw.c b/GL/draw.c index f6b2b3a..5e9a231 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1224,7 +1224,9 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type glDepthFunc(GL_EQUAL); glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + /* This is modulation, we need to switch depending on the texture env mode! */ + glBlendFunc(GL_DST_COLOR, GL_ZERO); /* Send the buffer again to the transparent list */ push(mtHeader, mtStart, target->count, _glTransparentPolyList(), 1); diff --git a/samples/zclip/main.c b/samples/zclip/main.c index b99f3a0..cbb88fd 100644 --- a/samples/zclip/main.c +++ b/samples/zclip/main.c @@ -3,6 +3,7 @@ #include #include "gl.h" +#include "glext.h" #include "glu.h" #include "glkos.h" @@ -23,6 +24,7 @@ typedef struct // Create A Structure TextureImage textures[3]; TextureImage road; +TextureImage lightmap; GLboolean LoadTGA(TextureImage *texture, const char *filename) // Loads A TGA File Into Memory @@ -123,7 +125,11 @@ void LoadGLTextures() { if(!LoadTGA(&road, "/rd/floor.tga")) { fprintf(stderr, "Error loading road texture"); } -}; + + if(!LoadTGA(&lightmap, "/rd/lightmap.tga")) { + fprintf(stderr, "Error loading lightmap texture"); + } +} /* A general OpenGL initialization function. Sets all of the initial parameters. */ @@ -170,40 +176,64 @@ void RenderTower(counter) { float v = 1.0f * (counter + 1); + glActiveTexture(GL_TEXTURE0); + glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, textures[counter].texID); + + glActiveTexture(GL_TEXTURE1); + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, lightmap.texID); + glBegin(GL_QUADS); - glTexCoord2f(0, 0); + glMultiTexCoord2f(GL_TEXTURE0, 0, 0); + glMultiTexCoord2f(GL_TEXTURE1, 0, 0); glVertex3f(-width, 0,-width); - glTexCoord2f(1, 0); + glMultiTexCoord2f(GL_TEXTURE0, 1, 0); + glMultiTexCoord2f(GL_TEXTURE1, 1, 0); glVertex3f(-width, 0, width); - glTexCoord2f(1, v); + glMultiTexCoord2f(GL_TEXTURE0, 1, v); + glMultiTexCoord2f(GL_TEXTURE1, 1, 1); glVertex3f(-width, height, width); - glTexCoord2f(0, v); + glMultiTexCoord2f(GL_TEXTURE0, 0, v); + glMultiTexCoord2f(GL_TEXTURE1, 0, 1); glVertex3f(-width, height,-width); - glTexCoord2f(0, 0); + glMultiTexCoord2f(GL_TEXTURE0, 0, 0); + glMultiTexCoord2f(GL_TEXTURE1, 0, 0); glVertex3f(-width, 0, width); - glTexCoord2f(1, 0); + glMultiTexCoord2f(GL_TEXTURE0, 1, 0); + glMultiTexCoord2f(GL_TEXTURE1, 1, 0); glVertex3f( width, 0, width); - glTexCoord2f(1, v); + glMultiTexCoord2f(GL_TEXTURE0, 1, v); + glMultiTexCoord2f(GL_TEXTURE1, 1, 1); glVertex3f( width, height, width); - glTexCoord2f(0, v); + glMultiTexCoord2f(GL_TEXTURE0, 0, v); + glMultiTexCoord2f(GL_TEXTURE1, 0, 1); glVertex3f(-width, height, width); - glTexCoord2f(0, 0); + glMultiTexCoord2f(GL_TEXTURE0, 0, 0); + glMultiTexCoord2f(GL_TEXTURE1, 0, 0); glVertex3f(width, 0,width); - glTexCoord2f(1, 0); + glMultiTexCoord2f(GL_TEXTURE0, 1, 0); + glMultiTexCoord2f(GL_TEXTURE1, 1, 0); glVertex3f(width, 0,-width); - glTexCoord2f(1, v); + glMultiTexCoord2f(GL_TEXTURE0, 1, v); + glMultiTexCoord2f(GL_TEXTURE1, 1, 1); glVertex3f(width, height,-width); - glTexCoord2f(0, v); + glMultiTexCoord2f(GL_TEXTURE0, 0, v); + glMultiTexCoord2f(GL_TEXTURE1, 0, 1); glVertex3f(width, height, width); glEnd(); } void RenderFloor() { + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, road.texID); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glBegin(GL_QUADS); glTexCoord2f(0.0f, 0.0f); glVertex3f(-100, 0, 0); diff --git a/samples/zclip/romdisk/lightmap.tga b/samples/zclip/romdisk/lightmap.tga new file mode 100644 index 0000000000000000000000000000000000000000..af277b64d85c23382866252d12a1a831af4ca9bf GIT binary patch literal 4140 zcmeH~e?*;o9LLYOjZ%!VC`Hq;DC^o)7R`xWb=Q;*?Uu#KqLdp=(N*qfigB_ii>BOB zicKj>v8&wC6kW?5WzjS@O3|DcrP#dh`}6re&(m|-zxThpCok{M^XvP4zn=OhkLNp& zeO&hNC&uIT!0YwG>+wQok4KYFnQOeR_vJZxM(@it>s(XTFy~(DJxPzVroFbLSx@fi zJNA3$*U?VyJvj*Rj4$_nN^m(04Z{uOSB0RLWdDnKa zJ?wSPu3NtH9b|3GS?}Aky{GkEBKDPUVs&$Wchg7KmhWRwM_JR^$MVy4WpD93dse&h zJT^8CadAX57SYksh#}E2F*;wkZ~=xWL~*wJn7;|`q5X`T?P$KG)lcp#W`EPu^pUQ~ zNy$h`PC}AoQW6p`CE`+IA`&eU5)zOQACGuuJoDnkixI@d#cF@!VP?m{a?$=#QI-cC z5`F=VQ~e%bRT@abd%}nNcZ`WmX;PlN@^-nl@w+QlBIjH>FzkV`iqC{kPt5} z2@(Bsb8?WIlZza3?b$ef>l1Nk=y^G1F?uIF7xUS6Jq^fnGT$jK#^huvRN z`)6OZGFy9RWoAJLl$9Y62%wBvQc{A_l2VkGGE12}FD@>Qps=tAg+)auDk@|a;zw71 z+ogd075exK{Om3L^BwZ&aNTkU2CEUQ4x*Y_RaF(q-6~X6R+0)-R0x$QFE2OTF&QWe zIQ`XwF5*!l4s?;Z_|`4la(PH^b)ZXQW1~YuLp|#28=%R#p}roqb+xFgtwpU;hkG?O zsJU0eyoVaj!C;X6tJ$9^F5wt4jE5p24;I?>tLg|4nH zbP~yj9UV#sQ~2${13Yjc-EG=9xBf~8L0qiV?$X~vdW%ET{U-E3d4m3ap&xyHz33Hs z`$#Vy_w=CWF_BarBh=lEP^cT-%xO-CsBk(p@}8 z>9Ml1f|b7rb9rfrEMs}uu(-I0*JSav@LKcFSFiBO#r(no=3Nf%{3O z&hGxZJ3H9f*;Tf;x8Y`M>jOS)ZDH%fmPwv(Zf+{?t(4xvnZp{^*d$hlgPt92}63R?2*Uz5PAx@9i=7vB%uq-GSr4d3*bxuN~N5 zTolfmo9`VDxQ@@CKf}%G$tg}xPjPZ`qIrCLjN>E2(a{l(IJ^7HtPbq14s`i=uD>|! zv-`eyF#qer^#6h{>hc9=4&rix6C&NkL-O14?F)SS0{`DH!2f~t__@5IsPgum+kvv| MyFcA7E&AT`FMLVBBLDyZ literal 0 HcmV?d00001 From 291e82d18a9a5d2f69db5c7f29b14f4fab9f100f Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 09:44:59 +0000 Subject: [PATCH 03/33] Rename ClipVertex -> Vertex --- GL/clip.c | 34 ++++++++--------- GL/draw.c | 102 +++++++++++++++++++++++++-------------------------- GL/flush.c | 6 +-- GL/private.h | 8 ++-- 4 files changed, 75 insertions(+), 75 deletions(-) diff --git a/GL/clip.c b/GL/clip.c index a0c61bf..5e1ffea 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -24,8 +24,8 @@ void _glEnableClipping(unsigned char v) { ZCLIP_ENABLED = v; } -void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); -void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { +void _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout, float* t) __attribute__((optimize("fast-math"))); +void _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout, float* t) { const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow *t = (NEAR_PLANE - v1->w) / (v2->w - v1->w); @@ -75,7 +75,7 @@ const uint32_t VERTEX_CMD_EOL = 0xf0000000; const uint32_t VERTEX_CMD = 0xe0000000; typedef struct { - ClipVertex vertex[3]; + Vertex vertex[3]; VertexExtra extra[3]; uint8_t visible; } Triangle; @@ -85,10 +85,10 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission uint8_t i, c = 0; uint8_t lastVisible = 255; - ClipVertex* last = NULL; + Vertex* last = NULL; VertexExtra* veLast = NULL; - const ClipVertex* vertices = triangle->vertex; + const Vertex* vertices = triangle->vertex; const VertexExtra* extras = triangle->extra; /* Used when flat shading is enabled */ @@ -97,7 +97,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission for(i = 0; i < 4; ++i) { uint8_t thisIndex = (i == 3) ? 0 : i; - ClipVertex next; + Vertex next; VertexExtra veNext; next.flags = VERTEX_CMD; @@ -107,8 +107,8 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission uint8_t lastIndex = (i == 3) ? 2 : thisIndex - 1; if(lastVisible < 255 && lastVisible != thisVisible) { - const ClipVertex* v1 = &vertices[lastIndex]; - const ClipVertex* v2 = &vertices[thisIndex]; + const Vertex* v1 = &vertices[lastIndex]; + const Vertex* v2 = &vertices[thisIndex]; const VertexExtra* ve1 = &extras[lastIndex]; const VertexExtra* ve2 = &extras[thisIndex]; @@ -153,7 +153,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission if(last) { if(c == 4) { /* Convert to two triangles */ - ClipVertex newVerts[3]; + Vertex newVerts[3]; newVerts[0] = *(last - 3); newVerts[1] = *(last - 1); newVerts[2] = *(last); @@ -180,7 +180,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission } } -static inline void markDead(ClipVertex* vert) { +static inline void markDead(Vertex* vert) { vert->flags = VERTEX_CMD_EOL; } @@ -201,9 +201,9 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { CLIP_COUNT = 0; - ClipVertex* vertex = _glSubmissionTargetStart(target); - const ClipVertex* end = _glSubmissionTargetEnd(target); - const ClipVertex* start = vertex; + Vertex* vertex = _glSubmissionTargetStart(target); + const Vertex* end = _glSubmissionTargetEnd(target); + const Vertex* start = vertex; int32_t triangle = -1; @@ -217,9 +217,9 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { triangle++; uint8_t even = (triangle % 2) == 0; - ClipVertex* v1 = (even) ? vertex - 2 : vertex - 1; - ClipVertex* v2 = (even) ? vertex - 1 : vertex - 2; - ClipVertex* v3 = vertex; + Vertex* v1 = (even) ? vertex - 2 : vertex - 1; + Vertex* v2 = (even) ? vertex - 1 : vertex - 2; + Vertex* v3 = vertex; /* Indexes into extras array */ vi1 = v1 - start; @@ -265,7 +265,7 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { markDead(v3); } else { markDead(v1); - ClipVertex tmp = *v2; + Vertex tmp = *v2; *v2 = *v3; *v3 = tmp; diff --git a/GL/draw.c b/GL/draw.c index 5e9a231..0fa098b 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -65,7 +65,7 @@ static inline GLuint byte_size(GLenum type) { typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in); typedef void (*ByteParseFunc)(GLubyte* out, const GLubyte* in); -typedef void (*PolyBuildFunc)(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i); +typedef void (*PolyBuildFunc)(Vertex* first, Vertex* previous, Vertex* vertex, Vertex* next, const GLsizei i); static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride, float* output) { @@ -77,7 +77,7 @@ static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride output[2] = input[2]; input = (float*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -104,7 +104,7 @@ static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte st output[2] = input[2]; input = (GLushort*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -130,7 +130,7 @@ static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stri output[2] = input[2]; input = (GLuint*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -157,7 +157,7 @@ static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte str output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; input += stride; - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -183,7 +183,7 @@ static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride output[1] = input[1]; input = (float*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -208,7 +208,7 @@ static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride output[2] = 0.0f; input = (float*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -222,7 +222,7 @@ static void _readVertexData2ub3f(const GLubyte* input, GLuint count, GLubyte str output[2] = 0.0f; input += stride; - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -235,7 +235,7 @@ static void _readVertexData2us3f(const GLushort* input, GLuint count, GLubyte st output[2] = 0.0f; input = (GLushort*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -247,7 +247,7 @@ static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte st output[1] = input[1]; input = (GLushort*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -271,7 +271,7 @@ static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stri output[1] = input[1]; input = (GLuint*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -296,7 +296,7 @@ static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte str output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; input = (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -322,7 +322,7 @@ static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stri output[2] = 0.0f; input = (GLuint*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -336,7 +336,7 @@ static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte s output[A8IDX] = input[3]; input = (GLubyte*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -350,7 +350,7 @@ static void _readVertexData4fARGB(const float* input, GLuint count, GLubyte stri output[A8IDX] = (GLubyte) (input[3] * 255.0f); input = (float*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -364,7 +364,7 @@ static void _readVertexData3fARGB(const float* input, GLuint count, GLubyte stri output[A8IDX] = 1.0f; input = (float*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -378,7 +378,7 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s output[A8IDX] = 1.0f; input = (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -392,7 +392,7 @@ static void _fillWithNegZVE(GLuint count, GLfloat* output) { } static void _fillWhiteARGB(GLuint count, GLubyte* output) { - const GLubyte* end = output + (sizeof(ClipVertex) * count); + const GLubyte* end = output + (sizeof(Vertex) * count); while(output < end) { output[R8IDX] = 255; @@ -400,15 +400,15 @@ static void _fillWhiteARGB(GLuint count, GLubyte* output) { output[B8IDX] = 255; output[A8IDX] = 255; - output += sizeof(ClipVertex); + output += sizeof(Vertex); } } static void _fillZero2f(GLuint count, GLfloat* output) { - const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (count * sizeof(ClipVertex)); + const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (count * sizeof(Vertex)); while(output < end) { output[0] = output[1] = 0.0f; - output = (GLfloat*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLfloat*) (((GLubyte*) output) + sizeof(Vertex)); } } @@ -538,7 +538,7 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { #define swapVertex(a, b) \ do { \ - ClipVertex temp = *a; \ + Vertex temp = *a; \ *a = *b; \ *b = temp; \ } while(0) @@ -547,28 +547,28 @@ PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { return aligned_vector_at(&target->output->vector, target->header_offset); } -ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target) { +Vertex* _glSubmissionTargetStart(SubmissionTarget* target) { return aligned_vector_at(&target->output->vector, target->start_offset); } -ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target) { +Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) { return _glSubmissionTargetStart(target) + target->count; } -static inline void genTriangles(ClipVertex* output, GLuint count) { - const ClipVertex* end = output + count; - ClipVertex* it = output + 2; +static inline void genTriangles(Vertex* output, GLuint count) { + const Vertex* end = output + count; + Vertex* it = output + 2; while(it < end) { it->flags = PVR_CMD_VERTEX_EOL; it += 3; } } -static inline void genQuads(ClipVertex* output, GLuint count) { - ClipVertex* previous; - ClipVertex* this = output + 3; +static inline void genQuads(Vertex* output, GLuint count) { + Vertex* previous; + Vertex* this = output + 3; - const ClipVertex* end = output + count; + const Vertex* end = output + count; while(this < end) { previous = this - 1; @@ -578,15 +578,15 @@ static inline void genQuads(ClipVertex* output, GLuint count) { } } -static void genTriangleStrip(ClipVertex* output, GLuint count) { +static void genTriangleStrip(Vertex* output, GLuint count) { output[count - 1].flags = PVR_CMD_VERTEX_EOL; } #define MAX_POLYGON_SIZE 32 -static void genTriangleFan(ClipVertex* output, GLuint count) { +static void genTriangleFan(Vertex* output, GLuint count) { assert(count < MAX_POLYGON_SIZE); - static ClipVertex buffer[MAX_POLYGON_SIZE]; + static Vertex buffer[MAX_POLYGON_SIZE]; if(count <= 3){ swapVertex(&output[1], &output[2]); @@ -594,13 +594,13 @@ static void genTriangleFan(ClipVertex* output, GLuint count) { return; } - memcpy(buffer, output, sizeof(ClipVertex) * count); + memcpy(buffer, output, sizeof(Vertex) * count); // First 3 vertices are in the right place, just end early output[2].flags = PVR_CMD_VERTEX_EOL; GLsizei i = 3, target = 3; - ClipVertex* first = &output[0]; + Vertex* first = &output[0]; for(; i < count; ++i) { output[target++] = *first; @@ -610,7 +610,7 @@ static void genTriangleFan(ClipVertex* output, GLuint count) { } } -static inline void _readPositionData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readPositionData(const GLuint first, const GLuint count, Vertex* output) { const GLubyte vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const void* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride)); @@ -659,7 +659,7 @@ static inline void _readPositionData(const GLuint first, const GLuint count, Cli } } -static inline void _readUVData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readUVData(const GLuint first, const GLuint count, Vertex* output) { if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { _fillZero2f(count, output->uv); return; @@ -765,7 +765,7 @@ static inline void _readNormalData(const GLuint first, const GLuint count, Submi } } -static inline void _readDiffuseData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readDiffuseData(const GLuint first, const GLuint count, Vertex* output) { if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { /* Just fill the whole thing white if the attribute is disabled */ _fillWhiteARGB(count, output[0].bgra); @@ -825,8 +825,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei /* Read from the client buffers and generate an array of ClipVertices */ const GLsizei istride = byte_size(type); - ClipVertex* it; - const ClipVertex* end; + Vertex* it; + const Vertex* end; if(!indices) { _readPositionData(first, count, _glSubmissionTargetStart(target)); @@ -908,8 +908,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei static void transform(SubmissionTarget* target) { /* Perform modelview transform, storing W */ - ClipVertex* vertex = _glSubmissionTargetStart(target); - const ClipVertex* end = _glSubmissionTargetEnd(target); + Vertex* vertex = _glSubmissionTargetStart(target); + const Vertex* end = _glSubmissionTargetEnd(target); _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ @@ -994,12 +994,12 @@ static void light(SubmissionTarget* target) { aligned_vector_resize(eye_space_data, target->count); /* Perform lighting calculations and manipulate the colour */ - ClipVertex* vertex = _glSubmissionTargetStart(target); + Vertex* vertex = _glSubmissionTargetStart(target); VertexExtra* extra = aligned_vector_at(target->extras, 0); EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data; _glMatrixLoadModelView(); - mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(Vertex), sizeof(EyeSpaceData)); _glMatrixLoadNormal(); mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData)); @@ -1034,8 +1034,8 @@ static void light(SubmissionTarget* target) { static void divide(SubmissionTarget* target) { /* Perform perspective divide on each vertex */ - ClipVertex* vertex = _glSubmissionTargetStart(target); - const ClipVertex* end = _glSubmissionTargetEnd(target); + Vertex* vertex = _glSubmissionTargetStart(target); + const Vertex* end = _glSubmissionTargetEnd(target); while(vertex < end) { vertex->xyz[2] = 1.0f / vertex->w; @@ -1045,7 +1045,7 @@ static void divide(SubmissionTarget* target) { } } -static void push(PVRHeader* header, ClipVertex* output, const GLuint count, PolyList* activePolyList, GLshort textureUnit) { +static void push(PVRHeader* header, Vertex* output, const GLuint count, PolyList* activePolyList, GLshort textureUnit) { // Compile the header pvr_poly_cxt_t cxt = *_glGetPVRContext(); cxt.list_type = activePolyList->list_type; @@ -1196,12 +1196,12 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type /* Push back a copy of the list to the transparent poly list, including the header (hence the + 1) */ - ClipVertex* vertex = aligned_vector_push_back( - &_glTransparentPolyList()->vector, (ClipVertex*) _glSubmissionTargetHeader(target), target->count + 1 + Vertex* vertex = aligned_vector_push_back( + &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1 ); PVRHeader* mtHeader = (PVRHeader*) vertex++; - ClipVertex* mtStart = vertex; + Vertex* mtStart = vertex; /* Replace the UV coordinates with the ST ones */ const VertexExtra* end = aligned_vector_back(target->extras) + 1; diff --git a/GL/flush.c b/GL/flush.c index 46c0756..6c68fe7 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -104,9 +104,9 @@ void APIENTRY glKosInitEx(GLdcConfig* config) { PT_LIST.list_type = PVR_LIST_PT_POLY; TR_LIST.list_type = PVR_LIST_TR_POLY; - aligned_vector_init(&OP_LIST.vector, sizeof(ClipVertex)); - aligned_vector_init(&PT_LIST.vector, sizeof(ClipVertex)); - aligned_vector_init(&TR_LIST.vector, sizeof(ClipVertex)); + aligned_vector_init(&OP_LIST.vector, sizeof(Vertex)); + aligned_vector_init(&PT_LIST.vector, sizeof(Vertex)); + aligned_vector_init(&TR_LIST.vector, sizeof(Vertex)); aligned_vector_reserve(&OP_LIST.vector, config->initial_op_capacity); aligned_vector_reserve(&PT_LIST.vector, config->initial_pt_capacity); diff --git a/GL/private.h b/GL/private.h index 87b6d5c..bdae752 100644 --- a/GL/private.h +++ b/GL/private.h @@ -103,7 +103,7 @@ typedef struct { * but we're not using that for now, so having W here makes the code * simpler */ float w; -} ClipVertex; +} Vertex; /* ClipVertex doesn't have room for these, so we need to parse them * out separately. Potentially 'w' will be housed here if we support oargb */ @@ -127,8 +127,8 @@ typedef struct { } SubmissionTarget; PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target); -ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target); -ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target); +Vertex* _glSubmissionTargetStart(SubmissionTarget* target); +Vertex* _glSubmissionTargetEnd(SubmissionTarget* target); typedef enum { CLIP_RESULT_ALL_IN_FRONT, @@ -146,7 +146,7 @@ typedef enum { struct SubmissionTarget; -void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); +void _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout, float* t); void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade); PolyList *_glActivePolyList(); From 671881eafd0832dff0b586ca723406b277f8423e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 09:48:58 +0000 Subject: [PATCH 04/33] Optimise GL_POLYGON rendering --- GL/draw.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 0fa098b..262caad 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -850,7 +850,6 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei case GL_QUADS: genQuads(_glSubmissionTargetStart(target), count); break; - case GL_POLYGON: case GL_TRIANGLE_FAN: genTriangleFan(_glSubmissionTargetStart(target), count); break; @@ -893,7 +892,6 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei case GL_QUADS: genQuads(it, count); break; - case GL_POLYGON: case GL_TRIANGLE_FAN: genTriangleFan(it, count); break; @@ -1107,8 +1105,25 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type profiler_push(__func__); + /* Polygons are treated as triangle fans, the only time this would be a + * problem is if we supported glPolygonMode(..., GL_LINE) but we don't. + * We optimise the triangle and quad cases. + */ + if(mode == GL_POLYGON) { + if(count == 3) { + mode = GL_TRIANGLES; + } else if(count == 4) { + mode = GL_QUADS; + } else { + mode = GL_TRIANGLE_FAN; + } + } + + // We don't handle this any further, so just make sure we never pass it down */ + assert(mode != GL_POLYGON); + target->output = _glActivePolyList(); - target->count = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; target->header_offset = target->output->vector.size; target->start_offset = target->header_offset + 1; From 0e71588e6c9788cb23c7d9f2f430e9484bb5fa22 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 10:33:05 +0000 Subject: [PATCH 05/33] Try to optimise the quad generation --- GL/draw.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 262caad..da852ef 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -536,13 +536,27 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { mat_trans_normal3(normal[0], normal[1], normal[2]); } + +/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */ +#define _XOR_SWAP32(x, y) \ + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ + *((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); + + #define swapVertex(a, b) \ do { \ - Vertex temp = *a; \ - *a = *b; \ - *b = temp; \ + _XOR_SWAP32(a->flags, b->flags); \ + _XOR_SWAP32(a->xyz[0], b->xyz[0]); \ + _XOR_SWAP32(a->xyz[1], b->xyz[1]); \ + _XOR_SWAP32(a->xyz[2], b->xyz[2]); \ + _XOR_SWAP32(a->uv[0], b->uv[0]); \ + _XOR_SWAP32(a->uv[1], b->uv[1]); \ + _XOR_SWAP32(a->bgra, b->bgra); \ + _XOR_SWAP32(a->w, b->w); \ } while(0) + PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { return aligned_vector_at(&target->output->vector, target->header_offset); } @@ -565,16 +579,16 @@ static inline void genTriangles(Vertex* output, GLuint count) { } static inline void genQuads(Vertex* output, GLuint count) { - Vertex* previous; - Vertex* this = output + 3; - + Vertex* this = output + 2; + Vertex* next = this + 1; const Vertex* end = output + count; while(this < end) { - previous = this - 1; - swapVertex(previous, this); - this->flags = PVR_CMD_VERTEX_EOL; + swapVertex(this, next); + next->flags = PVR_CMD_VERTEX_EOL; + this += 4; + next += 4; } } @@ -589,7 +603,6 @@ static void genTriangleFan(Vertex* output, GLuint count) { static Vertex buffer[MAX_POLYGON_SIZE]; if(count <= 3){ - swapVertex(&output[1], &output[2]); output[2].flags = PVR_CMD_VERTEX_EOL; return; } From 8fd90bd0401467417057464a686f64d08a507e3b Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 12:38:34 +0000 Subject: [PATCH 06/33] Switch back to normal swapping --- GL/draw.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index da852ef..b580854 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -538,22 +538,29 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { /* FIXME: SH4 has a swap.w instruction, we should leverage it here! */ -#define _XOR_SWAP32(x, y) \ +#define _SWAP32(x, y) \ +do { \ + uint32_t t = *((uint32_t*) &x); \ + *((uint32_t*) &x) = *((uint32_t*) &y); \ + *((uint32_t*) &y) = t; \ +} while(0) + +/* *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ *((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ - *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */ #define swapVertex(a, b) \ do { \ - _XOR_SWAP32(a->flags, b->flags); \ - _XOR_SWAP32(a->xyz[0], b->xyz[0]); \ - _XOR_SWAP32(a->xyz[1], b->xyz[1]); \ - _XOR_SWAP32(a->xyz[2], b->xyz[2]); \ - _XOR_SWAP32(a->uv[0], b->uv[0]); \ - _XOR_SWAP32(a->uv[1], b->uv[1]); \ - _XOR_SWAP32(a->bgra, b->bgra); \ - _XOR_SWAP32(a->w, b->w); \ + _SWAP32(a->flags, b->flags); \ + _SWAP32(a->xyz[0], b->xyz[0]); \ + _SWAP32(a->xyz[1], b->xyz[1]); \ + _SWAP32(a->xyz[2], b->xyz[2]); \ + _SWAP32(a->uv[0], b->uv[0]); \ + _SWAP32(a->uv[1], b->uv[1]); \ + _SWAP32(a->bgra, b->bgra); \ + _SWAP32(a->w, b->w); \ } while(0) From b771a5339b76f9a5c85ee4d05c2957710e84739e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 12:56:48 +0000 Subject: [PATCH 07/33] Send colours as bytes in immediate mode --- GL/immediate.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/GL/immediate.c b/GL/immediate.c index 3bbc9d1..24c7ab4 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -23,14 +23,14 @@ static AlignedVector NORMALS; static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f}; -static GLfloat COLOR[4] = {1.0f, 1.0f, 1.0f, 1.0f}; +static GLubyte COLOR[4] = {255, 255, 255, 255}; static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(GLfloat)); - aligned_vector_init(&COLOURS, sizeof(GLfloat)); + aligned_vector_init(&COLOURS, sizeof(GLubyte)); aligned_vector_init(&UV_COORDS, sizeof(GLfloat)); aligned_vector_init(&ST_COORDS, sizeof(GLfloat)); aligned_vector_init(&NORMALS, sizeof(GLfloat)); @@ -65,21 +65,19 @@ void APIENTRY glBegin(GLenum mode) { } void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { + COLOR[0] = (GLubyte)(r * 255); + COLOR[1] = (GLubyte)(g * 255); + COLOR[2] = (GLubyte)(b * 255); + COLOR[3] = (GLubyte)(a * 255); +} + +void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { COLOR[0] = r; COLOR[1] = g; COLOR[2] = b; COLOR[3] = a; } -void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { - glColor4f( - ((GLfloat) r) / 255.0f, - ((GLfloat) g) / 255.0f, - ((GLfloat) b) / 255.0f, - ((GLfloat) a) / 255.0f - ); -} - void APIENTRY glColor4fv(const GLfloat* v) { glColor4f(v[0], v[1], v[2], v[3]); } @@ -90,7 +88,10 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { } void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { - glColor3f((float) red / 255, (float) green / 255, (float) blue / 255); + COLOR[0] = red; + COLOR[1] = green; + COLOR[2] = blue; + COLOR[3] = 255; } void APIENTRY glColor3fv(const GLfloat* v) { @@ -185,7 +186,7 @@ void APIENTRY glEnd() { glEnableClientState(GL_NORMAL_ARRAY); glVertexPointer(3, GL_FLOAT, 0, VERTICES.data); - glColorPointer(4, GL_FLOAT, 0, COLOURS.data); + glColorPointer(4, GL_UNSIGNED_BYTE, 0, COLOURS.data); glNormalPointer(GL_FLOAT, 0, NORMALS.data); GLint activeTexture; From 70feea6c6f71557d4456d0100636fde65df5853d Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 16:05:36 +0000 Subject: [PATCH 08/33] Minor optimisations --- GL/immediate.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/GL/immediate.c b/GL/immediate.c index 24c7ab4..ccd8a5e 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -79,12 +79,17 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { } void APIENTRY glColor4fv(const GLfloat* v) { - glColor4f(v[0], v[1], v[2], v[3]); + COLOR[0] = (GLubyte)(v[0] * 255); + COLOR[1] = (GLubyte)(v[1] * 255); + COLOR[2] = (GLubyte)(v[2] * 255); + COLOR[3] = (GLubyte)(v[3] * 255); } void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { - static float a = 1.0f; - glColor4f(r, g, b, a); + COLOR[0] = (GLubyte)(r * 255); + COLOR[1] = (GLubyte)(g * 255); + COLOR[2] = (GLubyte)(b * 255); + COLOR[3] = 255; } void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { @@ -95,7 +100,10 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { } void APIENTRY glColor3fv(const GLfloat* v) { - glColor3f(v[0], v[1], v[2]); + COLOR[0] = (GLubyte)(v[0] * 255); + COLOR[1] = (GLubyte)(v[1] * 255); + COLOR[2] = (GLubyte)(v[2] * 255); + COLOR[3] = 255; } void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { From 4355d0f224af3124dbff0aad478640d48d40e288 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 16:06:08 +0000 Subject: [PATCH 09/33] Add additional profiler checkpoints --- GL/draw.c | 14 +++++++++++++- GL/immediate.c | 13 +++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/GL/draw.c b/GL/draw.c index b580854..3170e01 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -849,11 +849,18 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei const Vertex* end; if(!indices) { - _readPositionData(first, count, _glSubmissionTargetStart(target)); + profiler_push(__func__); + + _readPositionData(first, count, _glSubmissionTargetStart(target)); + profiler_checkpoint("positions"); + _readDiffuseData(first, count, _glSubmissionTargetStart(target)); + profiler_checkpoint("diffuse"); + if(doTexture) _readUVData(first, count, _glSubmissionTargetStart(target)); if(doLighting) _readNormalData(first, count, target); if(doTexture && doMultitexture) _readSTData(first, count, target); + profiler_checkpoint("others"); it = _glSubmissionTargetStart(target); end = _glSubmissionTargetEnd(target); @@ -862,6 +869,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei (it++)->flags = PVR_CMD_VERTEX; } + profiler_checkpoint("flags"); + // Drawing arrays switch(mode) { case GL_TRIANGLES: @@ -879,6 +888,9 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei default: assert(0 && "Not Implemented"); } + + profiler_checkpoint("quads"); + profiler_pop(); } else { const IndexParseFunc indexFunc = _calcParseIndexFunc(type); it = _glSubmissionTargetStart(target); diff --git a/GL/immediate.c b/GL/immediate.c index ccd8a5e..1ea573a 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -9,6 +9,7 @@ #include "../include/gl.h" #include "../include/glext.h" +#include "profiler.h" #include "private.h" @@ -174,6 +175,8 @@ void APIENTRY glNormal3fv(const GLfloat* v) { } void APIENTRY glEnd() { + profiler_push(__func__); + IMMEDIATE_MODE_ACTIVE = GL_FALSE; GLboolean vertexArrayEnabled, colorArrayEnabled, normalArrayEnabled; @@ -189,6 +192,8 @@ void APIENTRY glEnd() { AttribPointer uvptr = *_glGetUVAttribPointer(); AttribPointer stptr = *_glGetSTAttribPointer(); + profiler_checkpoint("prep"); + glEnableClientState(GL_VERTEX_ARRAY); glEnableClientState(GL_COLOR_ARRAY); glEnableClientState(GL_NORMAL_ARRAY); @@ -210,14 +215,20 @@ void APIENTRY glEnd() { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(2, GL_FLOAT, 0, ST_COORDS.data); + profiler_checkpoint("client_state"); + glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size / 3); + profiler_checkpoint("draw_arrays"); + aligned_vector_clear(&VERTICES); aligned_vector_clear(&COLOURS); aligned_vector_clear(&UV_COORDS); aligned_vector_clear(&ST_COORDS); aligned_vector_clear(&NORMALS); + profiler_checkpoint("clear"); + *_glGetVertexAttribPointer() = vptr; *_glGetDiffuseAttribPointer() = dptr; *_glGetNormalAttribPointer() = nptr; @@ -248,6 +259,8 @@ void APIENTRY glEnd() { glClientActiveTextureARB((GLuint) activeTexture); + profiler_checkpoint("restore"); + profiler_pop(); } void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) { From f712f00602f8cc52b54ee4dba0d01859b1f67b79 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 16:06:41 +0000 Subject: [PATCH 10/33] Don't do anything if there are no vertices submitted --- GL/draw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GL/draw.c b/GL/draw.c index 3170e01..1def91d 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1106,6 +1106,11 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type return; } + /* No vertices? Do nothing */ + if(!count) { + return; + } + static SubmissionTarget* target = NULL; static AlignedVector extras; From ba04f62f6df5f14e7a07646e1b6e5621100c6fab Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 16:07:12 +0000 Subject: [PATCH 11/33] Minor optimisation --- GL/clip.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GL/clip.c b/GL/clip.c index 5e1ffea..5b0a120 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -221,17 +221,17 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { Vertex* v2 = (even) ? vertex - 1 : vertex - 2; Vertex* v3 = vertex; - /* Indexes into extras array */ - vi1 = v1 - start; - vi2 = v2 - start; - vi3 = v3 - start; - /* Skip ahead if we don't have a complete triangle yet */ if(v1->flags != VERTEX_CMD || v2->flags != VERTEX_CMD) { triangle = -1; continue; } + /* Indexes into extras array */ + vi1 = v1 - start; + vi2 = v2 - start; + vi3 = v3 - start; + uint8_t visible = ((v1->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v3->w > 0) ? 1 : 0); switch(visible) { From 493592ea30fb5f5d3b12afa9c5abad762070c84c Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 16:07:27 +0000 Subject: [PATCH 12/33] Tweaks --- GL/draw.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 1def91d..05058ed 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1232,14 +1232,16 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type - We want to set the uv coordinates to the passed st ones */ + if(!doMultitexture) { + /* Multitexture actively disabled */ + profiler_pop(); + return; + } + TextureObject* texture1 = _glGetTexture1(); /* Multitexture implicitly disabled */ if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { - doMultitexture = GL_FALSE; - } - - if(!doMultitexture) { /* Multitexture actively disabled */ profiler_pop(); return; @@ -1256,8 +1258,10 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type Vertex* mtStart = vertex; /* Replace the UV coordinates with the ST ones */ - const VertexExtra* end = aligned_vector_back(target->extras) + 1; + VertexExtra* ve = aligned_vector_at(target->extras, 0); + const VertexExtra* end = ve + target->count; + while(ve < end) { vertex->uv[0] = ve->st[0]; vertex->uv[1] = ve->st[1]; From 7e47b21fb591bf9e11feffdacfbeb18efa1a6aa4 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 16:07:54 +0000 Subject: [PATCH 13/33] Move back and forth in the zclip sample --- samples/zclip/main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/samples/zclip/main.c b/samples/zclip/main.c index cbb88fd..95e6e91 100644 --- a/samples/zclip/main.c +++ b/samples/zclip/main.c @@ -249,8 +249,28 @@ void RenderFloor() { /* The main drawing function. */ void DrawGLScene() { + static float z = 0.0f; + static char increasing = 1; + + if(increasing) { + z += 10.0f; + } else { + z -= 10.0f; + } + + if(z > 10.0f) { + increasing = !increasing; + z = 10.0f; + } + + if(z < 0.0f) { + increasing = !increasing; + z = 0.0f; + } + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer glLoadIdentity(); // Reset The View + glTranslatef(0, 0, z); GLubyte i = 0; From e0613faced1fda3538d6f4e44699e34c160b8fa9 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 18:51:36 +0000 Subject: [PATCH 14/33] Tweak zclip demo --- samples/zclip/main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/samples/zclip/main.c b/samples/zclip/main.c index 95e6e91..8da2498 100644 --- a/samples/zclip/main.c +++ b/samples/zclip/main.c @@ -252,15 +252,17 @@ void DrawGLScene() static float z = 0.0f; static char increasing = 1; + const float max = 50.0f; + if(increasing) { - z += 10.0f; + z += 1.0f; } else { - z -= 10.0f; + z -= 1.0f; } - if(z > 10.0f) { + if(z > max) { increasing = !increasing; - z = 10.0f; + z = max; } if(z < 0.0f) { From bdeb9cd45ad5ddb6a49ae0e8e96fb69a8f3ae974 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 19:04:50 +0000 Subject: [PATCH 15/33] Restore clipping case that apparently can happen after all --- GL/clip.c | 43 +++++++++++++++++++++++-------------------- GL/draw.c | 28 ---------------------------- GL/private.h | 26 ++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/GL/clip.c b/GL/clip.c index 5b0a120..bc0684d 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -287,9 +287,13 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { TO_CLIP[CLIP_COUNT].vertex[1] = *v2; TO_CLIP[CLIP_COUNT].vertex[2] = *v3; - TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi1); - TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2); - TO_CLIP[CLIP_COUNT].extra[2] = *(VertexExtra*) aligned_vector_at(target->extras, vi3); + VertexExtra* ve1 = (VertexExtra*) aligned_vector_at(target->extras, vi1); + VertexExtra* ve2 = (VertexExtra*) aligned_vector_at(target->extras, vi2); + VertexExtra* ve3 = (VertexExtra*) aligned_vector_at(target->extras, vi3); + + TO_CLIP[CLIP_COUNT].extra[0] = *ve1; + TO_CLIP[CLIP_COUNT].extra[1] = *ve2; + TO_CLIP[CLIP_COUNT].extra[2] = *ve3; TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; @@ -325,21 +329,18 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { triangle = -1; } else { - /* FIXME: ? - * This situation doesn't actually seem possible, we always clip from one end - * of the triangle strip to the other, so we're never going to hit the plane in the - * middle of the strip (with previous/next unhandled tris). - * - * Uncomment if this actually happens */ - assert(0 && "Not Implemented (see comment)"); - - /* - ClipVertex* v4 = vertex + 1; + Vertex* v4 = v3 + 1; + uint32_t vi4 = v4 - start; TO_CLIP[CLIP_COUNT].vertex[0] = *v3; TO_CLIP[CLIP_COUNT].vertex[1] = *v2; TO_CLIP[CLIP_COUNT].vertex[2] = *v4; + VertexExtra* ve4 = (VertexExtra*) aligned_vector_at(target->extras, vi4); + TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi3); + TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2); + TO_CLIP[CLIP_COUNT].extra[2] = *ve4; + visible = ((v3->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v4->w > 0) ? 1 : 0); TO_CLIP[CLIP_COUNT].visible = visible; @@ -352,17 +353,19 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { (vertex - 1)->flags = VERTEX_CMD_EOL; if(v4->flags == VERTEX_CMD_EOL) { - markDead(vertex); + markDead(v3); markDead(v4); } else { // Swap the next vertices to start a new strip - ClipVertex tmp = *vertex; - *vertex = *v4; - *v4 = tmp; - - vertex->flags = VERTEX_CMD; + swapVertex(v3, v4); + v3->flags = VERTEX_CMD; v4->flags = VERTEX_CMD; - } */ + + /* Swap the extra data too */ + VertexExtra t = *ve4; + *ve3 = *ve4; + *ve4 = t; + } } break; default: diff --git a/GL/draw.c b/GL/draw.c index 05058ed..f1fe8a9 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -536,34 +536,6 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { mat_trans_normal3(normal[0], normal[1], normal[2]); } - -/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */ -#define _SWAP32(x, y) \ -do { \ - uint32_t t = *((uint32_t*) &x); \ - *((uint32_t*) &x) = *((uint32_t*) &y); \ - *((uint32_t*) &y) = t; \ -} while(0) - -/* - *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ - *((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ - *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */ - - -#define swapVertex(a, b) \ -do { \ - _SWAP32(a->flags, b->flags); \ - _SWAP32(a->xyz[0], b->xyz[0]); \ - _SWAP32(a->xyz[1], b->xyz[1]); \ - _SWAP32(a->xyz[2], b->xyz[2]); \ - _SWAP32(a->uv[0], b->uv[0]); \ - _SWAP32(a->uv[1], b->uv[1]); \ - _SWAP32(a->bgra, b->bgra); \ - _SWAP32(a->w, b->w); \ -} while(0) - - PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { return aligned_vector_at(&target->output->vector, target->header_offset); } diff --git a/GL/private.h b/GL/private.h index bdae752..53528be 100644 --- a/GL/private.h +++ b/GL/private.h @@ -105,6 +105,32 @@ typedef struct { float w; } Vertex; +/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */ +#define _SWAP32(x, y) \ +do { \ + uint32_t t = *((uint32_t*) &x); \ + *((uint32_t*) &x) = *((uint32_t*) &y); \ + *((uint32_t*) &y) = t; \ +} while(0) + +/* + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ + *((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */ + + +#define swapVertex(a, b) \ +do { \ + _SWAP32(a->flags, b->flags); \ + _SWAP32(a->xyz[0], b->xyz[0]); \ + _SWAP32(a->xyz[1], b->xyz[1]); \ + _SWAP32(a->xyz[2], b->xyz[2]); \ + _SWAP32(a->uv[0], b->uv[0]); \ + _SWAP32(a->uv[1], b->uv[1]); \ + _SWAP32(a->bgra, b->bgra); \ + _SWAP32(a->w, b->w); \ +} while(0) + /* ClipVertex doesn't have room for these, so we need to parse them * out separately. Potentially 'w' will be housed here if we support oargb */ typedef struct { From 9e12e15a13b6399c18bdecf2e6835284ccee7572 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 19:30:45 +0000 Subject: [PATCH 16/33] Make use of swapVertex --- GL/clip.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/GL/clip.c b/GL/clip.c index bc0684d..c140c01 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -265,10 +265,7 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { markDead(v3); } else { markDead(v1); - Vertex tmp = *v2; - *v2 = *v3; - *v3 = tmp; - + swapVertex(v2, v3); triangle = -1; v2->flags = VERTEX_CMD; v3->flags = VERTEX_CMD; From d44a24d339e2c7efa02006ef28fe1cb81ba9cfd5 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 25 Mar 2019 19:43:03 +0000 Subject: [PATCH 17/33] Add assertions --- GL/draw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/GL/draw.c b/GL/draw.c index f1fe8a9..9d6278b 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -537,10 +537,12 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { } PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { + assert(target->header_offset < target->output->vector.size); return aligned_vector_at(&target->output->vector, target->header_offset); } Vertex* _glSubmissionTargetStart(SubmissionTarget* target) { + assert(target->start_offset < target->output->vector.size); return aligned_vector_at(&target->output->vector, target->start_offset); } @@ -686,6 +688,8 @@ static inline void _readUVData(const GLuint first, const GLuint count, Vertex* o } static inline void _readSTData(const GLuint first, const GLuint count, SubmissionTarget* target) { + assert(target->extras->size == count); + if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { VertexExtra* extra = aligned_vector_at(target->extras, 0); _fillZero2fVE(count, extra->st); @@ -722,6 +726,8 @@ static inline void _readSTData(const GLuint first, const GLuint count, Submissio } static inline void _readNormalData(const GLuint first, const GLuint count, SubmissionTarget* target) { + assert(target->extras->size == count); + if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { VertexExtra* extra = aligned_vector_at(target->extras, 0); _fillWithNegZVE(count, extra->nxyz); @@ -1173,6 +1179,8 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type clip(target); + assert(extras.size == target->count); + #if DEBUG_CLIPPING fprintf(stderr, "--------\n"); for(i = offset; i < activeList->vector.size; ++i) { From 434f3165267db9e5a3e2a2d926315bf1f0348437 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Tue, 26 Mar 2019 09:09:07 +0000 Subject: [PATCH 18/33] Switch pointer comparisons for counters --- GL/draw.c | 137 +++++++++++++++++------------------------------------- 1 file changed, 42 insertions(+), 95 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 9d6278b..0227f2c 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -20,6 +20,12 @@ static AttribPointer DIFFUSE_POINTER; static GLuint ENABLED_VERTEX_ATTRIBUTES = 0; static GLubyte ACTIVE_CLIENT_TEXTURE = 0; + +#define ITERATE(count) \ + GLushort i = count; \ + while(i--) + + void _glInitAttributePointers() { TRACE(); @@ -69,9 +75,7 @@ typedef void (*PolyBuildFunc)(Vertex* first, Vertex* previous, Vertex* vertex, V static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) (((GLubyte*) input) + (count * stride)); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; @@ -83,9 +87,7 @@ static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride /* VE == VertexExtra */ static void _readVertexData3f3fVE(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) (((GLubyte*) input) + (count * stride)); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; @@ -96,9 +98,7 @@ static void _readVertexData3f3fVE(const float* input, GLuint count, GLubyte stri } static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; @@ -109,9 +109,7 @@ static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte st } static void _readVertexData3us3fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; @@ -122,9 +120,7 @@ static void _readVertexData3us3fVE(const GLushort* input, GLuint count, GLubyte } static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; @@ -135,9 +131,7 @@ static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stri } static void _readVertexData3ui3fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; @@ -149,9 +143,7 @@ static void _readVertexData3ui3fVE(const GLuint* input, GLuint count, GLubyte st static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; @@ -163,9 +155,7 @@ static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte str static void _readVertexData3ub3fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; @@ -176,9 +166,7 @@ static void _readVertexData3ub3fVE(const GLubyte* input, GLuint count, GLubyte s } static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; @@ -188,9 +176,7 @@ static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride } static void _readVertexData2f2fVE(const float* input, GLuint count, GLubyte stride, GLfloat* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; @@ -200,9 +186,7 @@ static void _readVertexData2f2fVE(const float* input, GLuint count, GLubyte stri } static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = 0.0f; @@ -214,9 +198,7 @@ static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride static void _readVertexData2ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; output[2] = 0.0f; @@ -227,9 +209,7 @@ static void _readVertexData2ub3f(const GLubyte* input, GLuint count, GLubyte str } static void _readVertexData2us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = 0.0f; @@ -240,9 +220,7 @@ static void _readVertexData2us3f(const GLushort* input, GLuint count, GLubyte st } static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte stride, float* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; @@ -252,9 +230,7 @@ static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte st } static void _readVertexData2us2fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; @@ -264,9 +240,7 @@ static void _readVertexData2us2fVE(const GLushort* input, GLuint count, GLubyte } static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stride, float* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; @@ -276,9 +250,7 @@ static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stri } static void _readVertexData2ui2fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; @@ -289,9 +261,7 @@ static void _readVertexData2ui2fVE(const GLuint* input, GLuint count, GLubyte st static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; @@ -302,9 +272,7 @@ static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte str static void _readVertexData2ub2fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; @@ -314,9 +282,7 @@ static void _readVertexData2ub2fVE(const GLubyte* input, GLuint count, GLubyte s } static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = 0.0f; @@ -327,9 +293,7 @@ static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stri } static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = input[0]; output[G8IDX] = input[1]; output[B8IDX] = input[2]; @@ -341,9 +305,7 @@ static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte s } static void _readVertexData4fARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = (GLubyte) (input[0] * 255.0f); output[G8IDX] = (GLubyte) (input[1] * 255.0f); output[B8IDX] = (GLubyte) (input[2] * 255.0f); @@ -355,9 +317,7 @@ static void _readVertexData4fARGB(const float* input, GLuint count, GLubyte stri } static void _readVertexData3fARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = (GLubyte) (input[0] * 255.0f); output[G8IDX] = (GLubyte) (input[1] * 255.0f); output[B8IDX] = (GLubyte) (input[2] * 255.0f); @@ -369,9 +329,7 @@ static void _readVertexData3fARGB(const float* input, GLuint count, GLubyte stri } static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = input[0]; output[G8IDX] = input[1]; output[B8IDX] = input[2]; @@ -383,8 +341,7 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s } static void _fillWithNegZVE(GLuint count, GLfloat* output) { - const GLfloat* end = output + (count * 3); - while(output < end) { + ITERATE(count) { output[0] = output[1] = 0.0f; output[2] = -1.0f; output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); @@ -392,9 +349,7 @@ static void _fillWithNegZVE(GLuint count, GLfloat* output) { } static void _fillWhiteARGB(GLuint count, GLubyte* output) { - const GLubyte* end = output + (sizeof(Vertex) * count); - - while(output < end) { + ITERATE(count) { output[R8IDX] = 255; output[G8IDX] = 255; output[B8IDX] = 255; @@ -405,16 +360,14 @@ static void _fillWhiteARGB(GLuint count, GLubyte* output) { } static void _fillZero2f(GLuint count, GLfloat* output) { - const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (count * sizeof(Vertex)); - while(output < end) { + ITERATE(count) { output[0] = output[1] = 0.0f; output = (GLfloat*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _fillZero2fVE(GLuint count, GLfloat* output) { - const GLfloat* end = output + (2 * count); - while(output < end) { + ITERATE(count) { output[0] = output[1] = 0.0f; output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } @@ -551,9 +504,8 @@ Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) { } static inline void genTriangles(Vertex* output, GLuint count) { - const Vertex* end = output + count; Vertex* it = output + 2; - while(it < end) { + ITERATE(count) { it->flags = PVR_CMD_VERTEX_EOL; it += 3; } @@ -562,9 +514,8 @@ static inline void genTriangles(Vertex* output, GLuint count) { static inline void genQuads(Vertex* output, GLuint count) { Vertex* this = output + 2; Vertex* next = this + 1; - const Vertex* end = output + count; - while(this < end) { + ITERATE(count) { swapVertex(this, next); next->flags = PVR_CMD_VERTEX_EOL; @@ -841,9 +792,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei profiler_checkpoint("others"); it = _glSubmissionTargetStart(target); - end = _glSubmissionTargetEnd(target); - while(it < end) { + ITERATE(target->count) { (it++)->flags = PVR_CMD_VERTEX; } @@ -872,11 +822,10 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei } else { const IndexParseFunc indexFunc = _calcParseIndexFunc(type); it = _glSubmissionTargetStart(target); - end = _glSubmissionTargetEnd(target); GLuint j; const GLubyte* idx = indices; - while(it < end) { + ITERATE(count) { j = indexFunc(idx); _readPositionData(j, 1, it); _readDiffuseData(j, 1, it); @@ -889,6 +838,7 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei } it = _glSubmissionTargetStart(target); + end = _glSubmissionTargetEnd(target); while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } @@ -917,11 +867,10 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei static void transform(SubmissionTarget* target) { /* Perform modelview transform, storing W */ Vertex* vertex = _glSubmissionTargetStart(target); - const Vertex* end = _glSubmissionTargetEnd(target); _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ - while(vertex < end) { + ITERATE(target->count) { register float __x __asm__("fr12") = (vertex->xyz[0]); register float __y __asm__("fr13") = (vertex->xyz[1]); register float __z __asm__("fr14") = (vertex->xyz[2]); @@ -953,9 +902,8 @@ static void clip(SubmissionTarget* target) { static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { uint8_t* dataIn = (uint8_t*) xyz; uint8_t* dataOut = (uint8_t*) xyzOut; - uint32_t i = count; - while(i--) { + ITERATE(count) { float* in = (float*) dataIn; float* out = (float*) dataOut; @@ -969,9 +917,8 @@ static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t static void mat_transform_normal3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { uint8_t* dataIn = (uint8_t*) xyz; uint8_t* dataOut = (uint8_t*) xyzOut; - uint32_t i = count; - while(i--) { + ITERATE(count) { float* in = (float*) dataIn; float* out = (float*) dataOut; From 30f8564298910eda18f0aefc454c355f4d82be56 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 27 Mar 2019 09:39:06 +0000 Subject: [PATCH 19/33] Fix glDrawElements --- GL/draw.c | 42 ++++++++++++++++++++--------------------- samples/nehe02de/main.c | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 0227f2c..319f095 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -638,11 +638,8 @@ static inline void _readUVData(const GLuint first, const GLuint count, Vertex* o } } -static inline void _readSTData(const GLuint first, const GLuint count, SubmissionTarget* target) { - assert(target->extras->size == count); - +static inline void _readSTData(const GLuint first, const GLuint count, VertexExtra* extra) { if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { - VertexExtra* extra = aligned_vector_at(target->extras, 0); _fillZero2fVE(count, extra->st); return; } @@ -651,7 +648,6 @@ static inline void _readSTData(const GLuint first, const GLuint count, Submissio const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride)); if(ST_POINTER.size == 2) { - VertexExtra* extra = aligned_vector_at(target->extras, 0); switch(ST_POINTER.type) { case GL_FLOAT: _readVertexData2f2fVE(stptr, count, ststride, extra->st); @@ -676,11 +672,8 @@ static inline void _readSTData(const GLuint first, const GLuint count, Submissio } } -static inline void _readNormalData(const GLuint first, const GLuint count, SubmissionTarget* target) { - assert(target->extras->size == count); - +static inline void _readNormalData(const GLuint first, const GLuint count, VertexExtra* extra) { if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { - VertexExtra* extra = aligned_vector_at(target->extras, 0); _fillWithNegZVE(count, extra->nxyz); return; } @@ -689,7 +682,6 @@ static inline void _readNormalData(const GLuint first, const GLuint count, Submi const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride)); if(NORMAL_POINTER.size == 3) { - VertexExtra* extra = aligned_vector_at(target->extras, 0); switch(NORMAL_POINTER.type) { case GL_FLOAT: _readVertexData3f3fVE(nptr, count, nstride, extra->nxyz); @@ -787,8 +779,11 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei profiler_checkpoint("diffuse"); if(doTexture) _readUVData(first, count, _glSubmissionTargetStart(target)); - if(doLighting) _readNormalData(first, count, target); - if(doTexture && doMultitexture) _readSTData(first, count, target); + + VertexExtra* ve = aligned_vector_at(target->extras, 0); + + if(doLighting) _readNormalData(first, count, ve); + if(doTexture && doMultitexture) _readSTData(first, count, ve); profiler_checkpoint("others"); it = _glSubmissionTargetStart(target); @@ -821,19 +816,24 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei profiler_pop(); } else { const IndexParseFunc indexFunc = _calcParseIndexFunc(type); - it = _glSubmissionTargetStart(target); - GLuint j; const GLubyte* idx = indices; + + Vertex* vertices = _glSubmissionTargetStart(target); + VertexExtra* extras = aligned_vector_at(target->extras, 0); + ITERATE(count) { j = indexFunc(idx); - _readPositionData(j, 1, it); - _readDiffuseData(j, 1, it); - if(doTexture) _readUVData(j, 1, it); - //FIXME: Need to think about how we can share this */ - //if(doLighting) _readNormalData(j, 1, it); - //if(doTexture && doMultitexture) _readSTData(j, 1, it); - ++it; + + _readPositionData(j, 1, vertices); + _readDiffuseData(j, 1, vertices); + if(doTexture) _readUVData(j, 1, vertices); + if(doLighting) _readNormalData(j, 1, extras); + if(doTexture && doMultitexture) _readSTData(j, 1, extras); + + ++vertices; + ++extras; + idx += istride; } diff --git a/samples/nehe02de/main.c b/samples/nehe02de/main.c index 9855e54..eba7fca 100644 --- a/samples/nehe02de/main.c +++ b/samples/nehe02de/main.c @@ -7,7 +7,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG { glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black glClearDepth(1.0); // Enables Clearing Of The Depth Buffer - glDepthFunc(GL_LESS); // The Type Of Depth Test To Do + glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do glEnable(GL_DEPTH_TEST); // Enables Depth Testing glShadeModel(GL_SMOOTH); // Enables Smooth Color Shading From cd04784c335df046dd2f8f22faf846595eb8b250 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:05:13 +0000 Subject: [PATCH 20/33] Fix memory corruption --- GL/draw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 319f095..c008fb1 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -505,7 +505,7 @@ Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) { static inline void genTriangles(Vertex* output, GLuint count) { Vertex* it = output + 2; - ITERATE(count) { + ITERATE(count / 3) { it->flags = PVR_CMD_VERTEX_EOL; it += 3; } @@ -513,9 +513,9 @@ static inline void genTriangles(Vertex* output, GLuint count) { static inline void genQuads(Vertex* output, GLuint count) { Vertex* this = output + 2; - Vertex* next = this + 1; + Vertex* next = output + 3; - ITERATE(count) { + ITERATE(count / 4) { swapVertex(this, next); next->flags = PVR_CMD_VERTEX_EOL; From 89824444060bbf1b57c2c0f1a832179bc9ee5897 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:07:53 +0000 Subject: [PATCH 21/33] Consistently allow > 16384 verts --- GL/draw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index c008fb1..c1d821b 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -22,7 +22,7 @@ static GLubyte ACTIVE_CLIENT_TEXTURE = 0; #define ITERATE(count) \ - GLushort i = count; \ + GLuint i = count; \ while(i--) @@ -1238,7 +1238,7 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) { return; } - submitVertices(mode, first, count, GL_UNSIGNED_SHORT, NULL); + submitVertices(mode, first, count, GL_UNSIGNED_INT, NULL); } void APIENTRY glEnableClientState(GLenum cap) { From 093b1af056590d5a6f0fbab458ba37579a8c9076 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:08:23 +0000 Subject: [PATCH 22/33] Add some assertions to aligned_vector --- containers/aligned_vector.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index 8fda257..55ddc7e 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -2,6 +2,7 @@ #include #include #include +#include #if defined(__APPLE__) || defined(__WIN32__) /* Linux + Kos define this, OSX does not, so just use malloc there */ @@ -33,6 +34,10 @@ static inline unsigned int round_to_chunk_size(unsigned int val) { void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { + if(element_count == 0) { + return; + } + if(element_count <= vector->capacity) { return; } @@ -44,7 +49,9 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { unsigned int new_byte_size = element_count * vector->element_size; unsigned char* original_data = vector->data; + vector->data = (unsigned char*) memalign(0x20, new_byte_size); + assert(vector->data); if(original_data) { memcpy(vector->data, original_data, original_byte_size); @@ -56,10 +63,14 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) { /* Resize enough room */ + assert(count); + assert(vector->element_size); unsigned int initial_size = vector->size; aligned_vector_resize(vector, vector->size + count); + assert(vector->size == initial_size + count); + unsigned char* dest = vector->data + (vector->element_size * initial_size); /* Copy the objects in */ From 3cd15de332b8646733dce5fc86d116a95555fbc3 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:09:04 +0000 Subject: [PATCH 23/33] Add additional traces --- GL/draw.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/GL/draw.c b/GL/draw.c index c1d821b..66fb981 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -764,6 +764,7 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Vert static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count, const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) { /* Read from the client buffers and generate an array of ClipVertices */ + TRACE(); const GLsizei istride = byte_size(type); Vertex* it; @@ -865,6 +866,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei } static void transform(SubmissionTarget* target) { + TRACE(); + /* Perform modelview transform, storing W */ Vertex* vertex = _glSubmissionTargetStart(target); @@ -892,6 +895,8 @@ static void transform(SubmissionTarget* target) { } static void clip(SubmissionTarget* target) { + TRACE(); + /* Perform clipping, generating new vertices as necessary */ _glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT); @@ -988,6 +993,8 @@ static void light(SubmissionTarget* target) { } static void divide(SubmissionTarget* target) { + TRACE(); + /* Perform perspective divide on each vertex */ Vertex* vertex = _glSubmissionTargetStart(target); const Vertex* end = _glSubmissionTargetEnd(target); @@ -1001,6 +1008,8 @@ static void divide(SubmissionTarget* target) { } static void push(PVRHeader* header, Vertex* output, const GLuint count, PolyList* activePolyList, GLshort textureUnit) { + TRACE(); + // Compile the header pvr_poly_cxt_t cxt = *_glGetPVRContext(); cxt.list_type = activePolyList->list_type; @@ -1026,6 +1035,8 @@ static void push(PVRHeader* header, Vertex* output, const GLuint count, PolyList #define DEBUG_CLIPPING 0 static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) { + TRACE(); + /* Do nothing if vertices aren't enabled */ if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { return; From e15a47b6fb9c365b8c4ae42e6274a09a54968d3d Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:09:36 +0000 Subject: [PATCH 24/33] Use the list stored on the SubmissionTarget --- GL/draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/draw.c b/GL/draw.c index 66fb981..0c3c0a7 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1159,7 +1159,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type profiler_checkpoint("divide"); - push(_glSubmissionTargetHeader(target), _glSubmissionTargetStart(target), target->count, _glActivePolyList(), 0); + push(_glSubmissionTargetHeader(target), _glSubmissionTargetStart(target), target->count, target->output, 0); profiler_checkpoint("push"); /* From 41ee67cbf1fbbc2d93ca4586e1a02b667f5bfdbb Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:10:53 +0000 Subject: [PATCH 25/33] Code cleanup --- GL/draw.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 0c3c0a7..881bf91 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -767,19 +767,19 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei TRACE(); const GLsizei istride = byte_size(type); - Vertex* it; - const Vertex* end; if(!indices) { profiler_push(__func__); - _readPositionData(first, count, _glSubmissionTargetStart(target)); + Vertex* start = _glSubmissionTargetStart(target); + + _readPositionData(first, count, start); profiler_checkpoint("positions"); - _readDiffuseData(first, count, _glSubmissionTargetStart(target)); + _readDiffuseData(first, count, start); profiler_checkpoint("diffuse"); - if(doTexture) _readUVData(first, count, _glSubmissionTargetStart(target)); + if(doTexture) _readUVData(first, count, start); VertexExtra* ve = aligned_vector_at(target->extras, 0); @@ -787,10 +787,11 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei if(doTexture && doMultitexture) _readSTData(first, count, ve); profiler_checkpoint("others"); - it = _glSubmissionTargetStart(target); + Vertex* it = _glSubmissionTargetStart(target); - ITERATE(target->count) { - (it++)->flags = PVR_CMD_VERTEX; + ITERATE(count) { + it->flags = PVR_CMD_VERTEX; + ++it; } profiler_checkpoint("flags"); @@ -798,13 +799,13 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(_glSubmissionTargetStart(target), count); + genTriangles(start, count); break; case GL_QUADS: - genQuads(_glSubmissionTargetStart(target), count); + genQuads(start, count); break; case GL_TRIANGLE_FAN: - genTriangleFan(_glSubmissionTargetStart(target), count); + genTriangleFan(start, count); break; case GL_TRIANGLE_STRIP: genTriangleStrip(_glSubmissionTargetStart(target), count); @@ -838,8 +839,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei idx += istride; } - it = _glSubmissionTargetStart(target); - end = _glSubmissionTargetEnd(target); + Vertex* it = _glSubmissionTargetStart(target); + const Vertex* end = _glSubmissionTargetEnd(target); while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } @@ -997,9 +998,8 @@ static void divide(SubmissionTarget* target) { /* Perform perspective divide on each vertex */ Vertex* vertex = _glSubmissionTargetStart(target); - const Vertex* end = _glSubmissionTargetEnd(target); - while(vertex < end) { + ITERATE(target->count) { vertex->xyz[2] = 1.0f / vertex->w; vertex->xyz[0] *= vertex->xyz[2]; vertex->xyz[1] *= vertex->xyz[2]; @@ -1100,6 +1100,8 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type target->header_offset = target->output->vector.size; target->start_offset = target->header_offset + 1; + assert(target->count); + /* Make sure we have enough room for all the "extra" data */ aligned_vector_resize(&extras, target->count); @@ -1192,15 +1194,14 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1 ); + assert(vertex); + PVRHeader* mtHeader = (PVRHeader*) vertex++; Vertex* mtStart = vertex; /* Replace the UV coordinates with the ST ones */ - VertexExtra* ve = aligned_vector_at(target->extras, 0); - const VertexExtra* end = ve + target->count; - - while(ve < end) { + ITERATE(target->count) { vertex->uv[0] = ve->st[0]; vertex->uv[1] = ve->st[1]; ++vertex; From ccc9b59d0355688c9162846cc72902a7e214b402 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 28 Mar 2019 13:12:00 +0000 Subject: [PATCH 26/33] Turn up to 11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8f0e0b1..76a1114 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ OBJS += GL/matrix.o GL/fog.o GL/error.o GL/clip.o containers/stack.o containers/ SUBDIRS = -KOS_CFLAGS += -ffast-math -O3 -Iinclude +KOS_CFLAGS += -ffast-math -Ofast -Iinclude link: $(KOS_AR) rcs $(TARGET) $(OBJS) From 2e33dc0718c0d0c179dfd08f29785bb8e6028139 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 29 Mar 2019 07:51:09 +0000 Subject: [PATCH 27/33] Add missing profiler_pop(). Fixes #36 --- GL/draw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GL/draw.c b/GL/draw.c index 881bf91..2f52b6a 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1231,6 +1231,8 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type glBlendFunc(blendSrc, blendDst); (blendEnabled) ? glEnable(GL_BLEND) : glDisable(GL_BLEND); (depthEnabled) ? glEnable(GL_DEPTH_TEST) : glDisable(GL_DEPTH_TEST); + + profiler_pop(); } void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { From 1e18fbc5277c92f9c118d4a200194b1a42acbc66 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 29 Mar 2019 07:54:50 +0000 Subject: [PATCH 28/33] Fix twiddling code. Fixes #37 --- GL/texture.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index 36e1acf..1ea25c8 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -940,15 +940,17 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, if(needsTwiddling) { assert(type == GL_UNSIGNED_BYTE); // Anything else needs this loop adjusting - GLuint x, y, min, min2, mask; + GLuint x, y, min, mask; + + GLubyte *pixels = (GLubyte*) data; + GLushort *vtex = (GLushort*) targetData; min = MIN(w, h); - min2 = min * min; mask = min - 1; - for(y = 0; y < h; y++) { + for(y = 0; y < h; y += 2) { for(x = 0; x < w; x++) { - targetData[TWIDOUT(x & mask, y & mask) + (x / min + y / min) * min2] = ((GLubyte*) data)[y * w + x]; + vtex[TWIDOUT((y & mask) / 2, x & mask) + (x / min + y / min)*min * min / 2] = pixels[y * w + x] | (pixels[(y + 1) * w + x] << 8); } } } else { From a75ced3fef843a4bcf483df2339745fbfb1e435e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 29 Mar 2019 08:47:55 +0000 Subject: [PATCH 29/33] Speed up immediate mode submission --- GL/draw.c | 4 +- GL/immediate.c | 134 ++++++++++++++++++++++++++----------------------- GL/private.h | 2 +- GL/profiler.c | 2 +- GL/state.c | 2 +- 5 files changed, 76 insertions(+), 68 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 2f52b6a..8811480 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -389,8 +389,8 @@ static void _readVertexData4uiARGB(const GLuint* input, GLuint count, GLubyte st assert(0 && "Not Implemented"); } -GLuint _glGetEnabledAttributes() { - return ENABLED_VERTEX_ATTRIBUTES; +GLuint* _glGetEnabledAttributes() { + return &ENABLED_VERTEX_ATTRIBUTES; } AttribPointer* _glGetVertexAttribPointer() { diff --git a/GL/immediate.c b/GL/immediate.c index 1ea573a..77b1919 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -29,6 +29,12 @@ static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; +static AttribPointer VERTEX_ATTRIB; +static AttribPointer DIFFUSE_ATTRIB; +static AttribPointer UV_ATTRIB; +static AttribPointer ST_ATTRIB; +static AttribPointer NORMAL_ATTRIB; + void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(GLfloat)); aligned_vector_init(&COLOURS, sizeof(GLubyte)); @@ -41,6 +47,31 @@ void _glInitImmediateMode(GLuint initial_size) { aligned_vector_reserve(&UV_COORDS, initial_size); aligned_vector_reserve(&ST_COORDS, initial_size); aligned_vector_reserve(&NORMALS, initial_size); + + VERTEX_ATTRIB.ptr = VERTICES.data; + VERTEX_ATTRIB.size = 3; + VERTEX_ATTRIB.type = GL_FLOAT; + VERTEX_ATTRIB.stride = 0; + + DIFFUSE_ATTRIB.ptr = COLOURS.data; + DIFFUSE_ATTRIB.size = 4; + DIFFUSE_ATTRIB.type = GL_UNSIGNED_BYTE; + DIFFUSE_ATTRIB.stride = 0; + + UV_ATTRIB.ptr = UV_COORDS.data; + UV_ATTRIB.stride = 0; + UV_ATTRIB.type = GL_FLOAT; + UV_ATTRIB.size = 2; + + ST_ATTRIB.ptr = ST_COORDS.data; + ST_ATTRIB.stride = 0; + ST_ATTRIB.type = GL_FLOAT; + ST_ATTRIB.size = 2; + + NORMAL_ATTRIB.ptr = NORMALS.data; + NORMAL_ATTRIB.stride = 0; + NORMAL_ATTRIB.type = GL_FLOAT; + NORMAL_ATTRIB.size = 3; } GLubyte _glCheckImmediateModeInactive(const char* func) { @@ -179,85 +210,62 @@ void APIENTRY glEnd() { IMMEDIATE_MODE_ACTIVE = GL_FALSE; - GLboolean vertexArrayEnabled, colorArrayEnabled, normalArrayEnabled; - GLboolean texArray0Enabled, texArray1Enabled; + /* Resizing could have invalidated these pointers */ + VERTEX_ATTRIB.ptr = VERTICES.data; + DIFFUSE_ATTRIB.ptr = COLOURS.data; + UV_ATTRIB.ptr = UV_COORDS.data; + ST_ATTRIB.ptr = ST_COORDS.data; + NORMAL_ATTRIB.ptr = NORMALS.data; - glGetBooleanv(GL_VERTEX_ARRAY, &vertexArrayEnabled); - glGetBooleanv(GL_COLOR_ARRAY, &colorArrayEnabled); - glGetBooleanv(GL_NORMAL_ARRAY, &normalArrayEnabled); + GLuint* attrs = _glGetEnabledAttributes(); - AttribPointer vptr = *_glGetVertexAttribPointer(); - AttribPointer dptr = *_glGetDiffuseAttribPointer(); - AttribPointer nptr = *_glGetNormalAttribPointer(); - AttribPointer uvptr = *_glGetUVAttribPointer(); - AttribPointer stptr = *_glGetSTAttribPointer(); + AttribPointer* vattr = _glGetVertexAttribPointer(); + AttribPointer* dattr = _glGetDiffuseAttribPointer(); + AttribPointer* nattr = _glGetNormalAttribPointer(); + AttribPointer* uattr = _glGetUVAttribPointer(); + AttribPointer* sattr = _glGetSTAttribPointer(); - profiler_checkpoint("prep"); + /* Stash existing values */ + AttribPointer vptr = *vattr; + AttribPointer dptr = *dattr; + AttribPointer nptr = *nattr; + AttribPointer uvptr = *uattr; + AttribPointer stptr = *sattr; - glEnableClientState(GL_VERTEX_ARRAY); - glEnableClientState(GL_COLOR_ARRAY); - glEnableClientState(GL_NORMAL_ARRAY); + GLuint prevAttrs = *attrs; - glVertexPointer(3, GL_FLOAT, 0, VERTICES.data); - glColorPointer(4, GL_UNSIGNED_BYTE, 0, COLOURS.data); - glNormalPointer(GL_FLOAT, 0, NORMALS.data); + /* Switch to our immediate mode arrays */ + *vattr = VERTEX_ATTRIB; + *dattr = DIFFUSE_ATTRIB; + *nattr = NORMAL_ATTRIB; + *uattr = UV_ATTRIB; + *sattr = ST_ATTRIB; - GLint activeTexture; - glGetIntegerv(GL_CLIENT_ACTIVE_TEXTURE, &activeTexture); - - glClientActiveTextureARB(GL_TEXTURE0); - glGetBooleanv(GL_TEXTURE_COORD_ARRAY, &texArray0Enabled); - glEnableClientState(GL_TEXTURE_COORD_ARRAY); - glTexCoordPointer(2, GL_FLOAT, 0, UV_COORDS.data); - - glClientActiveTextureARB(GL_TEXTURE1); - glGetBooleanv(GL_TEXTURE_COORD_ARRAY, &texArray1Enabled); - glEnableClientState(GL_TEXTURE_COORD_ARRAY); - glTexCoordPointer(2, GL_FLOAT, 0, ST_COORDS.data); - - profiler_checkpoint("client_state"); + *attrs = ~0; // Enable everything glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size / 3); - profiler_checkpoint("draw_arrays"); + /* Restore everything */ + *vattr = vptr; + *dattr = dptr; + *nattr = nptr; + *uattr = uvptr; + *sattr = stptr; + *attrs = prevAttrs; + + /* Clear arrays for next polys */ aligned_vector_clear(&VERTICES); aligned_vector_clear(&COLOURS); aligned_vector_clear(&UV_COORDS); aligned_vector_clear(&ST_COORDS); aligned_vector_clear(&NORMALS); - profiler_checkpoint("clear"); - - *_glGetVertexAttribPointer() = vptr; - *_glGetDiffuseAttribPointer() = dptr; - *_glGetNormalAttribPointer() = nptr; - *_glGetUVAttribPointer() = uvptr; - *_glGetSTAttribPointer() = stptr; - - if(!vertexArrayEnabled) { - glDisableClientState(GL_VERTEX_ARRAY); - } - - if(!colorArrayEnabled) { - glDisableClientState(GL_COLOR_ARRAY); - } - - if(!normalArrayEnabled) { - glDisableClientState(GL_NORMAL_ARRAY); - } - - if(!texArray0Enabled) { - glClientActiveTextureARB(GL_TEXTURE0); - glDisableClientState(GL_TEXTURE_COORD_ARRAY); - } - - if(!texArray1Enabled) { - glClientActiveTextureARB(GL_TEXTURE1); - glDisableClientState(GL_TEXTURE_COORD_ARRAY); - } - - glClientActiveTextureARB((GLuint) activeTexture); + *vattr = vptr; + *dattr = dptr; + *nattr = nptr; + *uattr = uvptr; + *sattr = stptr; profiler_checkpoint("restore"); profiler_pop(); diff --git a/GL/private.h b/GL/private.h index 53528be..45286f8 100644 --- a/GL/private.h +++ b/GL/private.h @@ -210,7 +210,7 @@ typedef struct { GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func); -GLuint _glGetEnabledAttributes(); +GLuint* _glGetEnabledAttributes(); AttribPointer* _glGetVertexAttribPointer(); AttribPointer* _glGetDiffuseAttribPointer(); AttribPointer* _glGetNormalAttribPointer(); diff --git a/GL/profiler.c b/GL/profiler.c index 968bc9f..479daac 100644 --- a/GL/profiler.c +++ b/GL/profiler.c @@ -24,7 +24,7 @@ typedef struct { static RootProfiler* root = NULL; -static char PROFILER_ENABLED = 0; +static char PROFILER_ENABLED = 1; void profiler_enable() { PROFILER_ENABLED = 1; diff --git a/GL/state.c b/GL/state.c index 8cc8c5f..95edb11 100644 --- a/GL/state.c +++ b/GL/state.c @@ -576,7 +576,7 @@ static GLenum COMPRESSED_FORMATS [] = { static GLint NUM_COMPRESSED_FORMATS = sizeof(COMPRESSED_FORMATS) / sizeof(GLenum); void APIENTRY glGetBooleanv(GLenum pname, GLboolean* params) { - GLuint enabledAttrs = _glGetEnabledAttributes(); + GLuint enabledAttrs = *_glGetEnabledAttributes(); GLuint activeClientTexture = _glGetActiveClientTexture(); switch(pname) { From 9fafe31a379c730d2b021d2b5f5f9bb5cfedad51 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 29 Mar 2019 08:48:01 +0000 Subject: [PATCH 30/33] Disable profiler --- GL/profiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GL/profiler.c b/GL/profiler.c index 479daac..968bc9f 100644 --- a/GL/profiler.c +++ b/GL/profiler.c @@ -24,7 +24,7 @@ typedef struct { static RootProfiler* root = NULL; -static char PROFILER_ENABLED = 1; +static char PROFILER_ENABLED = 0; void profiler_enable() { PROFILER_ENABLED = 1; From a88a9029f53614ef6f174d4c9e4c15655d18e51a Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 29 Mar 2019 09:54:11 +0000 Subject: [PATCH 31/33] Add a polymark sample --- samples/Makefile | 1 + samples/polymark/Makefile | 29 +++++ samples/polymark/main.c | 181 +++++++++++++++++++++++++++ samples/polymark/romdisk/PLACEHOLDER | 0 4 files changed, 211 insertions(+) create mode 100644 samples/polymark/Makefile create mode 100644 samples/polymark/main.c create mode 100644 samples/polymark/romdisk/PLACEHOLDER diff --git a/samples/Makefile b/samples/Makefile index 1d588b5..177f547 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -25,3 +25,4 @@ all: $(KOS_MAKE) -C paletted all $(KOS_MAKE) -C paletted_pcx all $(KOS_MAKE) -C depth_funcs all + $(KOS_MAKE) -C polymark all diff --git a/samples/polymark/Makefile b/samples/polymark/Makefile new file mode 100644 index 0000000..9910cf4 --- /dev/null +++ b/samples/polymark/Makefile @@ -0,0 +1,29 @@ +TARGET = polymark.elf +OBJS = main.o + +all: rm-elf $(TARGET) + +include $(KOS_BASE)/Makefile.rules + +clean: + -rm -f $(TARGET) $(OBJS) romdisk.* + +rm-elf: + -rm -f $(TARGET) romdisk.* + +$(TARGET): $(OBJS) romdisk.o + $(KOS_CC) $(KOS_CFLAGS) $(KOS_LDFLAGS) -o $(TARGET) $(KOS_START) \ + $(OBJS) romdisk.o $(OBJEXTRA) -lm -lkosutils $(KOS_LIBS) + +romdisk.img: + $(KOS_GENROMFS) -f romdisk.img -d romdisk -v + +romdisk.o: romdisk.img + $(KOS_BASE)/utils/bin2o/bin2o romdisk.img romdisk romdisk.o + +run: $(TARGET) + $(KOS_LOADER) $(TARGET) + +dist: + rm -f $(OBJS) romdisk.o romdisk.img + $(KOS_STRIP) $(TARGET) diff --git a/samples/polymark/main.c b/samples/polymark/main.c new file mode 100644 index 0000000..2cad762 --- /dev/null +++ b/samples/polymark/main.c @@ -0,0 +1,181 @@ +/* + KallistiGL 2.0.0 + + quadmark.c + (c)2018 Luke Benstead + (c)2014 Josh Pearson + (c)2002 Dan Potter, Paul Boese +*/ + +#include + +#include + +#include +#include + +enum { PHASE_HALVE, PHASE_INCR, PHASE_DECR, PHASE_FINAL }; + +int polycnt; +int phase = PHASE_HALVE; +float avgfps = -1; + +void running_stats() { + pvr_stats_t stats; + pvr_get_stats(&stats); + + if(avgfps == -1) + avgfps = stats.frame_rate; + else + avgfps = (avgfps + stats.frame_rate) / 2.0f; +} + +void stats() { + pvr_stats_t stats; + + pvr_get_stats(&stats); + dbglog(DBG_DEBUG, "3D Stats: %d VBLs, frame rate ~%f fps\n", + stats.vbl_count, stats.frame_rate); +} + + +int check_start() { + maple_device_t *cont; + cont_state_t *state; + + cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER); + + if(cont) { + state = (cont_state_t *)maple_dev_status(cont); + + if(state) + return state->buttons & CONT_START; + } + + return 0; +} + +pvr_poly_hdr_t hdr; + +void setup() { + glKosInit(); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glOrtho(0, 640, 0, 480, -100, 100); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); +} + +void do_frame() { + int x, y, z; + int size; + int i; + float col; + + + + for(i = 0; i < polycnt; i++) { + glBegin(GL_POLYGON); + x = rand() % 640; + y = rand() % 480; + z = rand() % 100 + 1; + size = rand() % 50 + 1; + col = (rand() % 255) * 0.00391f; + + glColor3f(col, col, col); + glVertex3f(x - size, y - size, z); + glVertex3f(x + size, y - size, z); + glVertex3f(x + size, y + size, z); + glVertex3f(x, y + size + (size / 2), z); + glVertex3f(x - size, y + size, z); + glEnd(); + } + + + + glKosSwapBuffers(); +} + +time_t start; +void switch_tests(int ppf) { + printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", + ppf * 2, ppf * 2 * 60); + avgfps = -1; + polycnt = ppf; +} + +void check_switch() { + time_t now; + + now = time(NULL); + + if(now >= (start + 5)) { + start = time(NULL); + printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2)); + + switch(phase) { + case PHASE_HALVE: + + if(avgfps < 55) { + switch_tests(polycnt / 1.2f); + } + else { + printf(" Entering PHASE_INCR\n"); + phase = PHASE_INCR; + } + + break; + + case PHASE_INCR: + + if(avgfps >= 55) { + switch_tests(polycnt + 15); + } + else { + printf(" Entering PHASE_DECR\n"); + phase = PHASE_DECR; + } + + break; + + case PHASE_DECR: + + if(avgfps < 55) { + switch_tests(polycnt - 30); + } + else { + printf(" Entering PHASE_FINAL\n"); + phase = PHASE_FINAL; + } + + break; + + case PHASE_FINAL: + break; + } + } +} + +int main(int argc, char **argv) { + setup(); + + /* Start off with something obscene */ + switch_tests(200000 / 60); + start = time(NULL); + + for(;;) { + if(check_start()) + break; + + printf(" \r"); + do_frame(); + running_stats(); + check_switch(); + } + + stats(); + + return 0; +} + + diff --git a/samples/polymark/romdisk/PLACEHOLDER b/samples/polymark/romdisk/PLACEHOLDER new file mode 100644 index 0000000..e69de29 From e876dcb14fd90da00585e36609e6650aa0eca8fe Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 29 Mar 2019 11:23:48 +0000 Subject: [PATCH 32/33] Massively optimise GL_POLYGON/GL_TRIANGLE_FAN --- GL/draw.c | 30 +++++++++--------------------- samples/polymark/main.c | 4 ---- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 8811480..753ee11 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -528,30 +528,18 @@ static void genTriangleStrip(Vertex* output, GLuint count) { output[count - 1].flags = PVR_CMD_VERTEX_EOL; } -#define MAX_POLYGON_SIZE 32 - static void genTriangleFan(Vertex* output, GLuint count) { - assert(count < MAX_POLYGON_SIZE); - static Vertex buffer[MAX_POLYGON_SIZE]; + assert(count <= 255); - if(count <= 3){ - output[2].flags = PVR_CMD_VERTEX_EOL; - return; - } + Vertex* dst = output + (((count - 2) * 3) - 1); + Vertex* src = output + (count - 1); - memcpy(buffer, output, sizeof(Vertex) * count); - - // First 3 vertices are in the right place, just end early - output[2].flags = PVR_CMD_VERTEX_EOL; - - GLsizei i = 3, target = 3; - Vertex* first = &output[0]; - - for(; i < count; ++i) { - output[target++] = *first; - output[target++] = buffer[i - 1]; - output[target] = buffer[i]; - output[target++].flags = PVR_CMD_VERTEX_EOL; + GLubyte i = count - 2; + while(i--) { + *dst = *src--; + (*dst--).flags = PVR_CMD_VERTEX_EOL; + *dst-- = *src; + *dst-- = *output; } } diff --git a/samples/polymark/main.c b/samples/polymark/main.c index 2cad762..092fb9b 100644 --- a/samples/polymark/main.c +++ b/samples/polymark/main.c @@ -72,8 +72,6 @@ void do_frame() { int i; float col; - - for(i = 0; i < polycnt; i++) { glBegin(GL_POLYGON); x = rand() % 640; @@ -91,8 +89,6 @@ void do_frame() { glEnd(); } - - glKosSwapBuffers(); } From 26dd1c120557601593ae1af8b713e4baa3fa2f5a Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 30 Mar 2019 07:11:08 +0000 Subject: [PATCH 33/33] Tweak the polymark sample --- samples/polymark/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/polymark/main.c b/samples/polymark/main.c index 092fb9b..2a671fc 100644 --- a/samples/polymark/main.c +++ b/samples/polymark/main.c @@ -64,6 +64,7 @@ void setup() { glOrtho(0, 640, 0, 480, -100, 100); glMatrixMode(GL_PROJECTION); glLoadIdentity(); + glEnable(GL_CULL_FACE); } void do_frame() { @@ -95,7 +96,7 @@ void do_frame() { time_t start; void switch_tests(int ppf) { printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", - ppf * 2, ppf * 2 * 60); + ppf * 3, ppf * 3 * 60); avgfps = -1; polycnt = ppf; }