From 5bf58ec1a89de2a29845b18bcb97d07cea4ad938 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 13 Mar 2019 11:24:35 +0000 Subject: [PATCH 1/6] Refactor array submission to be (hopefully) faster --- GL/draw.c | 636 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 447 insertions(+), 189 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 3b9cceb..90e807a 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -67,6 +67,234 @@ typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in); typedef void (*ByteParseFunc)(GLubyte* out, const GLubyte* in); typedef void (*PolyBuildFunc)(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i); + +static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride, float* output) { + const float* end = (float*) (((GLubyte*) input) + (count * stride)); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (float*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; + + input += stride; + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride, float* output) { + const float* end = (float*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (float*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride, float* output) { + const float* end = (float*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = 0.0f; + + input = (float*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = 0.0f; + + input += stride; + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = 0.0f; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte stride, float* output) { + const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stride, float* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + + input = (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { + const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = 0.0f; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { + const GLubyte* end = ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[R8IDX] = input[0]; + output[G8IDX] = input[1]; + output[B8IDX] = input[2]; + output[A8IDX] = input[3]; + + input = (GLubyte*) (((GLubyte*) input) + stride); + output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData4fARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { + const float* end = (float*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[R8IDX] = (GLubyte) (input[0] * 255.0f); + output[G8IDX] = (GLubyte) (input[1] * 255.0f); + output[B8IDX] = (GLubyte) (input[2] * 255.0f); + output[A8IDX] = (GLubyte) (input[3] * 255.0f); + + input = (float*) (((GLubyte*) input) + stride); + output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData3fARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { + const float* end = (float*) ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[R8IDX] = (GLubyte) (input[0] * 255.0f); + output[G8IDX] = (GLubyte) (input[1] * 255.0f); + output[B8IDX] = (GLubyte) (input[2] * 255.0f); + output[A8IDX] = 1.0f; + + input = (float*) (((GLubyte*) input) + stride); + output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { + const GLubyte* end = ((GLubyte*) input) + (count * stride); + + while(input < end) { + output[R8IDX] = input[0]; + output[G8IDX] = input[1]; + output[B8IDX] = input[2]; + output[A8IDX] = 1.0f; + + input = (((GLubyte*) input) + stride); + output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + } +} + +static void _readVertexData3usARGB(const GLushort* input, GLuint count, GLubyte stride, GLubyte* output) { + assert(0 && "Not Implemented"); +} + +static void _readVertexData3uiARGB(const GLuint* input, GLuint count, GLubyte stride, GLubyte* output) { + assert(0 && "Not Implemented"); +} + +static void _readVertexData4usARGB(const GLushort* input, GLuint count, GLubyte stride, GLubyte* output) { + assert(0 && "Not Implemented"); +} + +static void _readVertexData4uiARGB(const GLuint* input, GLuint count, GLubyte stride, GLubyte* output) { + assert(0 && "Not Implemented"); +} + GLuint _glGetEnabledAttributes() { return ENABLED_VERTEX_ATTRIBUTES; } @@ -466,7 +694,7 @@ static inline void nullFloatParseFunc(GLfloat* out, const GLubyte* in) {} static inline void genElementsCommon( ClipVertex* output, const GLubyte* iptr, GLuint istride, GLenum type, - GLsizei count, + GLuint count, const GLubyte* vptr, GLuint vstride, const GLubyte* cptr, GLuint cstride, const GLubyte* uvptr, GLuint uvstride, @@ -506,7 +734,7 @@ static inline void genElementsCommon( static inline void genElementsTriangles( ClipVertex* output, - GLsizei count, + GLuint count, const GLubyte* iptr, GLuint istride, GLenum type, const GLubyte* vptr, GLuint vstride, const GLubyte* cptr, GLuint cstride, @@ -530,7 +758,7 @@ static inline void genElementsTriangles( static inline void genElementsQuads( ClipVertex* output, - GLsizei count, + GLuint count, const GLubyte* iptr, GLuint istride, GLenum type, const GLubyte* vptr, GLuint vstride, const GLubyte* cptr, GLuint cstride, @@ -555,7 +783,7 @@ static inline void genElementsQuads( static inline void genElementsTriangleFan( ClipVertex* output, - GLsizei count, + GLuint count, const GLubyte* iptr, GLuint istride, GLenum type, const GLubyte* vptr, GLuint vstride, const GLubyte* cptr, GLuint cstride, @@ -595,7 +823,7 @@ static inline void genElementsTriangleFan( static inline void genElementsTriangleStrip( ClipVertex* output, - GLsizei count, + GLuint count, const GLubyte* iptr, GLuint istride, GLenum type, const GLubyte* vptr, GLuint vstride, const GLubyte* cptr, GLuint cstride, @@ -614,95 +842,7 @@ static inline void genElementsTriangleStrip( output[count - 1].flags = PVR_CMD_VERTEX_EOL; } -static inline void genArraysCommon( - ClipVertex* output, - GLsizei count, - const GLubyte* vptr, GLuint vstride, - const GLubyte* cptr, GLuint cstride, - const GLubyte* uvptr, GLuint uvstride, - const GLubyte* stptr, GLuint ststride, - const GLubyte* nptr, GLuint nstride, - GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting -) { - const FloatParseFunc vertexFunc = _calcVertexParseFunc(); - const ByteParseFunc diffuseFunc = _calcDiffuseParseFunc(); - const FloatParseFunc uvFunc = _calcUVParseFunc(); - const FloatParseFunc stFunc = _calcSTParseFunc(); - const FloatParseFunc normalFunc = _calcNormalParseFunc(); - - assert(vertexFunc); - assert(diffuseFunc); - assert(uvFunc); - assert(stFunc); - assert(normalFunc); - - GLsizei i = count; - - ClipVertex* vertex = output; - - while(i--) { - vertex->flags = PVR_CMD_VERTEX; - vertexFunc(vertex->xyz, vptr); - vptr += vstride; - vertex++; - } - - i = count; - vertex = output; - while(i--) { - diffuseFunc(vertex->bgra, cptr); - cptr += cstride; - vertex++; - } - - if(doTexture) { - i = count; - vertex = output; - while(i--) { - uvFunc(vertex->uv, uvptr); - uvptr += uvstride; - vertex++; - } - } - - if(doMultitexture) { - i = count; - vertex = output; - while(i--) { - stFunc(vertex->st, stptr); - stptr += ststride; - ++vertex; - } - } - - if(doLighting) { - i = count; - vertex = output; - while(i--) { - normalFunc(vertex->nxyz, nptr); - nptr += nstride; - ++vertex; - } - } -} - - -static inline void genArraysTriangles( - ClipVertex* output, - GLsizei count, - const GLubyte* vptr, GLuint vstride, - const GLubyte* cptr, GLuint cstride, - const GLubyte* uvptr, GLuint uvstride, - const GLubyte* stptr, GLuint ststride, - const GLubyte* nptr, GLuint nstride, - GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) { - - genArraysCommon( - output, count, - vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride, - doTexture, doMultitexture, doLighting - ); - +static inline void genArraysTriangles(ClipVertex* output, GLuint count) { GLsizei i = count; ClipVertex* vertex = output; for(i = 2; i < count; i += 3) { @@ -710,22 +850,7 @@ static inline void genArraysTriangles( } } -static void genArraysQuads( - ClipVertex* output, - GLsizei count, - const GLubyte* vptr, GLuint vstride, - const GLubyte* cptr, GLuint cstride, - const GLubyte* uvptr, GLuint uvstride, - const GLubyte* stptr, GLuint ststride, - const GLubyte* nptr, GLuint nstride, - GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) { - - genArraysCommon( - output, count, - vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride, - doTexture, doMultitexture, doLighting - ); - +static void genArraysQuads(ClipVertex* output, GLuint count) { GLsizei i = 3; for(; i < count; i += 4) { @@ -737,46 +862,16 @@ static void genArraysQuads( } } -static void genArraysTriangleStrip( - ClipVertex* output, - GLsizei count, - const GLubyte* vptr, GLuint vstride, - const GLubyte* cptr, GLuint cstride, - const GLubyte* uvptr, GLuint uvstride, - const GLubyte* stptr, GLuint ststride, - const GLubyte* nptr, GLuint nstride, - GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) { - - genArraysCommon( - output, count, - vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride, - doTexture, doMultitexture, doLighting - ); - +static void genArraysTriangleStrip(ClipVertex* output, GLuint count) { output[count - 1].flags = PVR_CMD_VERTEX_EOL; } #define MAX_POLYGON_SIZE 32 -static void genArraysTriangleFan( - ClipVertex* output, - GLsizei count, - const GLubyte* vptr, GLuint vstride, - const GLubyte* cptr, GLuint cstride, - const GLubyte* uvptr, GLuint uvstride, - const GLubyte* stptr, GLuint ststride, - const GLubyte* nptr, GLuint nstride, - GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) { - +static void genArraysTriangleFan(ClipVertex* output, GLuint count) { assert(count < MAX_POLYGON_SIZE); static ClipVertex buffer[MAX_POLYGON_SIZE]; - genArraysCommon( - output, count, - vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride, - doTexture, doMultitexture, doLighting - ); - if(count <= 3){ swapVertex(&output[1], &output[2]); output[2].flags = PVR_CMD_VERTEX_EOL; @@ -799,7 +894,192 @@ static void genArraysTriangleFan( } } -static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLsizei count, +static inline void _readPositionData(const GLuint first, const GLuint count, ClipVertex* output) { + const GLubyte vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); + const void* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride)); + + if(VERTEX_POINTER.size == 3) { + switch(VERTEX_POINTER.type) { + case GL_FLOAT: + _readVertexData3f3f(vptr, count, vstride, output[0].xyz); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData3ub3f(vptr, count, vstride, output[0].xyz); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData3us3f(vptr, count, vstride, output[0].xyz); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData3ui3f(vptr, count, vstride, output[0].xyz); + break; + default: + assert(0 && "Not Implemented"); + } + } else if(VERTEX_POINTER.size == 2) { + switch(VERTEX_POINTER.type) { + case GL_FLOAT: + _readVertexData2f3f(vptr, count, vstride, output[0].xyz); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData2ub3f(vptr, count, vstride, output[0].xyz); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData2us3f(vptr, count, vstride, output[0].xyz); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData2ui3f(vptr, count, vstride, output[0].xyz); + break; + default: + assert(0 && "Not Implemented"); + } + } else { + assert(0 && "Not Implemented"); + } +} + +static inline void _readUVData(const GLuint first, const GLuint count, ClipVertex* output) { + const GLubyte uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); + const void* uvptr = ((GLubyte*) UV_POINTER.ptr + (first * uvstride)); + + if(UV_POINTER.size == 2) { + switch(UV_POINTER.type) { + case GL_FLOAT: + _readVertexData2f2f(uvptr, count, uvstride, output[0].uv); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData2ub2f(uvptr, count, uvstride, output[0].uv); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData2us2f(uvptr, count, uvstride, output[0].uv); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData2ui2f(uvptr, count, uvstride, output[0].uv); + break; + default: + assert(0 && "Not Implemented"); + } + } else { + assert(0 && "Not Implemented"); + } +} + +static inline void _readSTData(const GLuint first, const GLuint count, ClipVertex* output) { + const GLubyte ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type); + const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride)); + + if(ST_POINTER.size == 2) { + switch(ST_POINTER.type) { + case GL_FLOAT: + _readVertexData2f2f(stptr, count, ststride, output[0].st); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData2ub2f(stptr, count, ststride, output[0].st); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData2us2f(stptr, count, ststride, output[0].st); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData2ui2f(stptr, count, ststride, output[0].st); + break; + default: + assert(0 && "Not Implemented"); + } + } else { + assert(0 && "Not Implemented"); + } +} + +static inline void _readNormalData(const GLuint first, const GLuint count, ClipVertex* output) { + const GLuint nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); + const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride)); + + if(NORMAL_POINTER.size == 3) { + switch(NORMAL_POINTER.type) { + case GL_FLOAT: + _readVertexData3f3f(nptr, count, nstride, output[0].nxyz); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData3ub3f(nptr, count, nstride, output[0].nxyz); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData3us3f(nptr, count, nstride, output[0].nxyz); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData3ui3f(nptr, count, nstride, output[0].nxyz); + break; + default: + assert(0 && "Not Implemented"); + } + } else { + assert(0 && "Not Implemented"); + } +} + +static inline void _readDiffuseData(const GLuint first, const GLuint count, ClipVertex* output) { + const GLubyte cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type); + const void* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr + (first * cstride)); + + if(DIFFUSE_POINTER.size == 3) { + switch(DIFFUSE_POINTER.type) { + case GL_FLOAT: + _readVertexData3fARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData3ubARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData3usARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData3uiARGB(cptr, count, cstride, output[0].bgra); + break; + default: + assert(0 && "Not Implemented"); + } + } else if(DIFFUSE_POINTER.size == 4) { + switch(DIFFUSE_POINTER.type) { + case GL_FLOAT: + _readVertexData4fARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData4ubARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData4usARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData4uiARGB(cptr, count, cstride, output[0].bgra); + break; + default: + assert(0 && "Not Implemented"); + } + } else { + assert(0 && "Not Implemented"); + } +} + +static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLuint count, const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) { /* Read from the client buffers and generate an array of ClipVertices */ @@ -817,57 +1097,35 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLsizei istride = byte_size(type); if(!indices) { + _readPositionData(first, count, output); + _readDiffuseData(first, count, output); + if(doTexture) _readUVData(first, count, output); + if(doLighting) _readNormalData(first, count, output); + if(doTexture && doMultitexture) _readSTData(first, count, output); + + ClipVertex* it = output; + const ClipVertex* end = output + count; + while(it < end) { + (it++)->flags = PVR_CMD_VERTEX; + } + // Drawing arrays switch(mode) { case GL_TRIANGLES: - genArraysTriangles( - output, - count, - vptr, vstride, - cptr, cstride, - uvptr, uvstride, - stptr, ststride, - nptr, nstride, - doTexture, doMultitexture, doLighting - ); + genArraysTriangles(output, count); break; case GL_QUADS: - genArraysQuads( - output, - count, - vptr, vstride, - cptr, cstride, - uvptr, uvstride, - stptr, ststride, - nptr, nstride, - doTexture, doMultitexture, doLighting - ); + genArraysQuads(output, count); break; case GL_POLYGON: case GL_TRIANGLE_FAN: - genArraysTriangleFan( - output, - count, - vptr, vstride, - cptr, cstride, - uvptr, uvstride, - stptr, ststride, - nptr, nstride, - doTexture, doMultitexture, doLighting - ); + genArraysTriangleFan(output, count); break; case GL_TRIANGLE_STRIP: + genArraysTriangleStrip(output, count); + break; default: - genArraysTriangleStrip( - output, - count, - vptr, vstride, - cptr, cstride, - uvptr, uvstride, - stptr, ststride, - nptr, nstride, - doTexture, doMultitexture, doLighting - ); + assert(0 && "Not Implemented"); } } else if(mode == GL_TRIANGLES) { genElementsTriangles( @@ -920,7 +1178,7 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, } } -static void transform(ClipVertex* output, const GLsizei count) { +static void transform(ClipVertex* output, const GLuint count) { /* Perform modelview transform, storing W */ ClipVertex* vertex = output; @@ -950,7 +1208,7 @@ static void transform(ClipVertex* output, const GLsizei count) { } } -static GLsizei clip(AlignedVector* polylist, uint32_t offset, const GLsizei count) { +static GLsizei clip(AlignedVector* polylist, uint32_t offset, const GLuint count) { /* Perform clipping, generating new vertices as necessary */ clipTriangleStrip2(polylist, offset, _glGetShadeModel() == GL_FLAT); @@ -990,7 +1248,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u } } -static void light(ClipVertex* output, const GLsizei count) { +static void light(ClipVertex* output, const GLuint count) { if(!_glIsLightingEnabled()) { return; } @@ -1047,7 +1305,7 @@ static void light(ClipVertex* output, const GLsizei count) { } } -static void divide(ClipVertex* output, const GLsizei count) { +static void divide(ClipVertex* output, const GLuint count) { /* Perform perspective divide on each vertex */ ClipVertex* vertex = output; @@ -1060,7 +1318,7 @@ static void divide(ClipVertex* output, const GLsizei count) { } } -static void push(PVRHeader* header, ClipVertex* output, const GLsizei count, PolyList* activePolyList, GLshort textureUnit) { +static void push(PVRHeader* header, ClipVertex* output, const GLuint count, PolyList* activePolyList, GLshort textureUnit) { // Compile the header pvr_poly_cxt_t cxt = *_glGetPVRContext(); cxt.list_type = activePolyList->list_type; @@ -1080,7 +1338,7 @@ static void push(PVRHeader* header, ClipVertex* output, const GLsizei count, Pol #define DEBUG_CLIPPING 0 -static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum type, const GLvoid* indices) { +static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) { /* Do nothing if vertices aren't enabled */ if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { return; From 8b206f012ef9a2a30ea6edd5a5e821334e3ecbb6 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 13 Mar 2019 12:05:06 +0000 Subject: [PATCH 2/6] Make sure we correctly initalize the used banks --- GL/texture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index a3c852f..e19a3ba 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "config.h" #include "../include/glext.h" @@ -36,11 +37,8 @@ static TexturePalette* _initTexturePalette() { TexturePalette* palette = (TexturePalette*) malloc(sizeof(TexturePalette)); assert(palette); - palette->data = NULL; - palette->format = 0; - palette->width = 0; + memset(palette, 0x0, sizeof(TexturePalette)); palette->bank = -1; - palette->size = 0; return palette; } @@ -222,6 +220,8 @@ GLubyte _glInitTextures() { SHARED_PALETTES[2] = _initTexturePalette(); SHARED_PALETTES[3] = _initTexturePalette(); + memset((void*) BANKS_USED, 0x0, sizeof(BANKS_USED)); + memset((void*) SUBBANKS_USED, 0x0, sizeof(SUBBANKS_USED)); return 1; } From 724ff628aa7c67685f8b4b76e406b5f8aa906b07 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 13 Mar 2019 15:14:09 +0000 Subject: [PATCH 3/6] Fix some signedness issues --- GL/lighting.c | 2 +- GL/private.h | 2 +- GL/state.c | 18 +++++++++--------- GL/texture.c | 16 ++++++++-------- include/gl.h | 6 +++--- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/GL/lighting.c b/GL/lighting.c index 04d2a52..00c4e0a 100644 --- a/GL/lighting.c +++ b/GL/lighting.c @@ -230,7 +230,7 @@ void APIENTRY glColorMaterial(GLenum face, GLenum mode) { return; } - GLenum validModes[] = {GL_AMBIENT, GL_DIFFUSE, GL_AMBIENT_AND_DIFFUSE, GL_EMISSION, GL_SPECULAR, 0}; + GLint validModes[] = {GL_AMBIENT, GL_DIFFUSE, GL_AMBIENT_AND_DIFFUSE, GL_EMISSION, GL_SPECULAR, 0}; if(_glCheckValidEnum(mode, validModes, __func__) != 0) { return; diff --git a/GL/private.h b/GL/private.h index 29392a1..ebdb9b4 100644 --- a/GL/private.h +++ b/GL/private.h @@ -131,7 +131,7 @@ typedef struct { GLint size; } AttribPointer; -GLboolean _glCheckValidEnum(GLint param, GLenum* values, const char* func); +GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func); GLuint _glGetEnabledAttributes(); AttribPointer* _glGetVertexAttribPointer(); diff --git a/GL/state.c b/GL/state.c index 9a43f2d..8cc8c5f 100644 --- a/GL/state.c +++ b/GL/state.c @@ -110,7 +110,7 @@ static int _calcPVRBlendFactor(GLenum factor) { case GL_ONE: return PVR_BLEND_ONE; default: - fprintf(stderr, "Invalid blend mode: %d\n", factor); + fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor); return PVR_BLEND_ONE; } } @@ -131,7 +131,7 @@ static void _updatePVRBlend(pvr_poly_cxt_t* context) { } } -GLboolean _glCheckValidEnum(GLint param, GLenum* values, const char* func) { +GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func) { GLubyte found = 0; while(*values != 0) { if(*values == param) { @@ -478,7 +478,7 @@ GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor) { #define PT_ALPHA_REF 0x011c GLAPI void APIENTRY glAlphaFunc(GLenum func, GLclampf ref) { - GLenum validFuncs[] = { + GLint validFuncs[] = { GL_GREATER, 0 }; @@ -662,20 +662,20 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) { } } -const GLbyte *glGetString(GLenum name) { +const GLubyte *glGetString(GLenum name) { switch(name) { case GL_VENDOR: - return "KallistiOS / Kazade"; + return (const GLubyte*) "KallistiOS / Kazade"; case GL_RENDERER: - return "PowerVR2 CLX2 100mHz"; + return (const GLubyte*) "PowerVR2 CLX2 100mHz"; case GL_VERSION: - return "1.2 (partial) - GLdc 1.0"; + return (const GLubyte*) "1.2 (partial) - GLdc 1.0"; case GL_EXTENSIONS: - return "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette"; + return (const GLubyte*) "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette"; } - return "GL_KOS_ERROR: ENUM Unsupported\n"; + return (const GLubyte*) "GL_KOS_ERROR: ENUM Unsupported\n"; } diff --git a/GL/texture.c b/GL/texture.c index e19a3ba..36e1acf 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -325,7 +325,7 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { void APIENTRY glBindTexture(GLenum target, GLuint texture) { TRACE(); - GLenum target_values [] = {GL_TEXTURE_2D, 0}; + GLint target_values [] = {GL_TEXTURE_2D, 0}; if(_glCheckValidEnum(target, target_values, __func__) != 0) { return; @@ -348,9 +348,9 @@ void APIENTRY glBindTexture(GLenum target, GLuint texture) { void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) { TRACE(); - GLenum target_values [] = {GL_TEXTURE_ENV, 0}; - GLenum pname_values [] = {GL_TEXTURE_ENV_MODE, 0}; - GLenum param_values [] = {GL_MODULATE, GL_DECAL, GL_REPLACE, 0}; + GLint target_values [] = {GL_TEXTURE_ENV, 0}; + GLint pname_values [] = {GL_TEXTURE_ENV_MODE, 0}; + GLint param_values [] = {GL_MODULATE, GL_DECAL, GL_REPLACE, 0}; GLubyte failures = 0; @@ -1074,7 +1074,7 @@ void APIENTRY glTexParameterf(GLenum target, GLenum pname, GLint param) { } GLAPI void APIENTRY glColorTableEXT(GLenum target, GLenum internalFormat, GLsizei width, GLenum format, GLenum type, const GLvoid *data) { - GLenum validTargets[] = { + GLint validTargets[] = { GL_TEXTURE_2D, GL_SHARED_TEXTURE_PALETTE_EXT, GL_SHARED_TEXTURE_PALETTE_0_KOS, @@ -1084,9 +1084,9 @@ GLAPI void APIENTRY glColorTableEXT(GLenum target, GLenum internalFormat, GLsize 0 }; - GLenum validInternalFormats[] = {GL_RGB8, GL_RGBA8, 0}; - GLenum validFormats[] = {GL_RGB, GL_RGBA, 0}; - GLenum validTypes[] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, 0}; + GLint validInternalFormats[] = {GL_RGB8, GL_RGBA8, 0}; + GLint validFormats[] = {GL_RGB, GL_RGBA, 0}; + GLint validTypes[] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, 0}; if(_glCheckValidEnum(target, validTargets, __func__) != 0) { return; diff --git a/include/gl.h b/include/gl.h index 5a80a5d..2937fe8 100644 --- a/include/gl.h +++ b/include/gl.h @@ -369,11 +369,11 @@ __BEGIN_DECLS #define GLushort unsigned short #define GLuint unsigned int #define GLenum unsigned int -#define GLsizei unsigned long +#define GLsizei unsigned int #define GLfixed const unsigned int #define GLclampf float #define GLubyte unsigned char -#define GLbitfield unsigned long +#define GLbitfield unsigned int #define GLboolean unsigned char #define GL_FALSE 0 #define GL_TRUE 1 @@ -619,7 +619,7 @@ GLAPI void APIENTRY glGetBooleanv(GLenum pname, GLboolean* params); GLAPI void APIENTRY glGetIntegerv(GLenum pname, GLint *params); GLAPI void APIENTRY glGetFloatv(GLenum pname, GLfloat *params); GLAPI GLboolean APIENTRY glIsEnabled(GLenum cap); -GLAPI const GLbyte* APIENTRY glGetString(GLenum name); +GLAPI const GLubyte* APIENTRY glGetString(GLenum name); /* Error handling */ GLAPI GLenum APIENTRY glGetError(void); From 8101e43e90c087ab35f8d602bc3c163802a1968e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 13 Mar 2019 15:19:28 +0000 Subject: [PATCH 4/6] Fix winding issue --- GL/draw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 90e807a..844149b 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -888,8 +888,8 @@ static void genArraysTriangleFan(ClipVertex* output, GLuint count) { for(; i < count; ++i) { output[target++] = *first; - output[target++] = buffer[i]; - output[target] = buffer[i - 1]; + output[target++] = buffer[i - 1]; + output[target] = buffer[i]; output[target++].flags = PVR_CMD_VERTEX_EOL; } } From d9539da841faa1ecb8e394910cef1c5f5c69044f Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 13 Mar 2019 15:35:42 +0000 Subject: [PATCH 5/6] Optimisations --- GL/draw.c | 25 +++++++++++++------------ GL/immediate.c | 1 + GL/profiler.c | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 844149b..7045557 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -501,13 +501,12 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { mat_trans_normal3(normal[0], normal[1], normal[2]); } -static inline void swapVertex(ClipVertex* v1, ClipVertex* v2) { - static ClipVertex tmp; - - tmp = *v1; - *v1 = *v2; - *v2 = tmp; -} +#define swapVertex(a, b) \ +do { \ + ClipVertex temp = *a; \ + *a = *b; \ + *b = temp; \ +} while(0) static inline FloatParseFunc _calcVertexParseFunc() { switch(VERTEX_POINTER.type) { @@ -850,15 +849,17 @@ static inline void genArraysTriangles(ClipVertex* output, GLuint count) { } } -static void genArraysQuads(ClipVertex* output, GLuint count) { - GLsizei i = 3; +static inline void genArraysQuads(ClipVertex* output, GLuint count) { + ClipVertex* previous; + ClipVertex* this = output + 3; - for(; i < count; i += 4) { - ClipVertex* this = output + i; - ClipVertex* previous = output + (i - 1); + const ClipVertex* end = output + count; + while(this < end) { + previous = this - 1; swapVertex(previous, this); this->flags = PVR_CMD_VERTEX_EOL; + this += 4; } } diff --git a/GL/immediate.c b/GL/immediate.c index 0c7e46c..b45e79f 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -92,6 +92,7 @@ void APIENTRY glColor3fv(const GLfloat* v) { } void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { + aligned_vector_reserve(&VERTICES, VERTICES.size + 3); aligned_vector_push_back(&VERTICES, &x, 1); aligned_vector_push_back(&VERTICES, &y, 1); aligned_vector_push_back(&VERTICES, &z, 1); diff --git a/GL/profiler.c b/GL/profiler.c index bc51cf0..968bc9f 100644 --- a/GL/profiler.c +++ b/GL/profiler.c @@ -138,6 +138,6 @@ void profiler_print_stats() { float ms = ((float) result->total_time_us) / 1000.0f; float avg = ms / (float) result->total_calls; - fprintf(stderr, "%-60s%-20f%-20f%u\n", result->name, avg, ms, result->total_calls); + fprintf(stderr, "%-60s%-20f%-20f%" PRIu64 "\n", result->name, (double)avg, (double)ms, result->total_calls); } } From 7c33f134387dcb27ed8c19786e4ccde639e3a457 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 13 Mar 2019 15:43:50 +0000 Subject: [PATCH 6/6] Allow configuring initial capacities --- GL/flush.c | 13 ++++++++----- GL/immediate.c | 8 +++++++- GL/private.h | 2 +- include/glkos.h | 5 ++++- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/GL/flush.c b/GL/flush.c index 1fa552c..74a1f04 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -78,7 +78,10 @@ void APIENTRY glFinish() { void APIENTRY glKosInitConfig(GLdcConfig* config) { config->autosort_enabled = GL_FALSE; - config->initial_vbuf_capacity = 1024; + config->initial_op_capacity = 1024; + config->initial_pt_capacity = 512; + config->initial_tr_capacity = 1024; + config->initial_immediate_capacity = 1024; config->internal_palette_format = GL_RGBA4; } @@ -91,7 +94,7 @@ void APIENTRY glKosInitEx(GLdcConfig* config) { _glInitAttributePointers(); _glInitContext(); _glInitLights(); - _glInitImmediateMode(); + _glInitImmediateMode(config->initial_immediate_capacity); _glInitFramebuffers(); _glSetInternalPaletteFormat(config->internal_palette_format); @@ -106,9 +109,9 @@ void APIENTRY glKosInitEx(GLdcConfig* config) { aligned_vector_init(&PT_LIST.vector, sizeof(ClipVertex)); aligned_vector_init(&TR_LIST.vector, sizeof(ClipVertex)); - aligned_vector_reserve(&OP_LIST.vector, config->initial_vbuf_capacity); - aligned_vector_reserve(&PT_LIST.vector, config->initial_vbuf_capacity); - aligned_vector_reserve(&TR_LIST.vector, config->initial_vbuf_capacity); + aligned_vector_reserve(&OP_LIST.vector, config->initial_op_capacity); + aligned_vector_reserve(&PT_LIST.vector, config->initial_pt_capacity); + aligned_vector_reserve(&TR_LIST.vector, config->initial_tr_capacity); } void APIENTRY glKosInit() { diff --git a/GL/immediate.c b/GL/immediate.c index b45e79f..3bbc9d1 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -28,12 +28,18 @@ static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; -void _glInitImmediateMode() { +void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(GLfloat)); aligned_vector_init(&COLOURS, sizeof(GLfloat)); aligned_vector_init(&UV_COORDS, sizeof(GLfloat)); aligned_vector_init(&ST_COORDS, sizeof(GLfloat)); aligned_vector_init(&NORMALS, sizeof(GLfloat)); + + aligned_vector_reserve(&VERTICES, initial_size); + aligned_vector_reserve(&COLOURS, initial_size); + aligned_vector_reserve(&UV_COORDS, initial_size); + aligned_vector_reserve(&ST_COORDS, initial_size); + aligned_vector_reserve(&NORMALS, initial_size); } GLubyte _glCheckImmediateModeInactive(const char* func) { diff --git a/GL/private.h b/GL/private.h index ebdb9b4..bdb6038 100644 --- a/GL/private.h +++ b/GL/private.h @@ -104,7 +104,7 @@ PolyList *_glTransparentPolyList(); void _glInitAttributePointers(); void _glInitContext(); void _glInitLights(); -void _glInitImmediateMode(); +void _glInitImmediateMode(GLuint initial_size); void _glInitMatrices(); void _glInitFramebuffers(); diff --git a/include/glkos.h b/include/glkos.h index ba2b323..34435bd 100644 --- a/include/glkos.h +++ b/include/glkos.h @@ -46,7 +46,10 @@ typedef struct { GLenum internal_palette_format; /* Initial capacity of each of the OP, TR and PT lists in vertices */ - GLuint initial_vbuf_capacity; + GLuint initial_op_capacity; + GLuint initial_tr_capacity; + GLuint initial_pt_capacity; + GLuint initial_immediate_capacity; } GLdcConfig;