diff --git a/GL/clip.c b/GL/clip.c index 426966b..fde4d1b 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -7,6 +7,7 @@ #define PVR_PACK_COLOR(a, r, g, b) {} #endif +#include "profiler.h" #include "clip.h" #include "../containers/aligned_vector.h" @@ -21,6 +22,7 @@ void enableClipping(unsigned char v) { ZCLIP_ENABLED = v; } +void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow @@ -38,13 +40,13 @@ static inline void interpolateFloat(const float v1, const float v2, const float *out = (v * t) + v1; } -static void interpolateVec2(const float* v1, const float* v2, const float t, float* out) { +static inline void interpolateVec2(const float* v1, const float* v2, const float t, float* out) { /* FIXME: SH4 has an asm instruction for this */ interpolateFloat(v1[0], v2[0], t, &out[0]); interpolateFloat(v1[1], v2[1], t, &out[1]); } -static void interpolateVec3(const float* v1, const float* v2, const float t, float* out) { +static inline void interpolateVec3(const float* v1, const float* v2, const float t, float* out) { /* FIXME: SH4 has an asm instruction for this */ interpolateFloat(v1[0], v2[0], t, &out[0]); @@ -52,7 +54,7 @@ static void interpolateVec3(const float* v1, const float* v2, const float t, flo interpolateFloat(v1[2], v2[2], t, &out[2]); } -static void interpolateVec4(const float* v1, const float* v2, const float t, float* out) { +static inline void interpolateVec4(const float* v1, const float* v2, const float t, float* out) { /* FIXME: SH4 has an asm instruction for this */ interpolateFloat(v1[0], v2[0], t, &out[0]); interpolateFloat(v1[1], v2[1], t, &out[1]); @@ -81,29 +83,31 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { uint32_t i; uint32_t stripCount = 2; /* The number of vertices in the source strip so far */ + ClipVertex* thisVertex = aligned_vector_at(vertices, 1); + for(i = 2; i < vertices->size; ++i) { + ++thisVertex; + if(stripCount < 2) { stripCount++; continue; } - ClipVertex* thisVertex = aligned_vector_at(vertices, i); - - ClipVertex* sourceTriangle[3] = { - aligned_vector_at(vertices, i - 2), - aligned_vector_at(vertices, i - 1), + const ClipVertex* sourceTriangle[3] = { + thisVertex - 2, + thisVertex - 1, thisVertex }; /* If we're on an odd vertex, we need to swap the order of the first two vertices, as that's what * triangle strips do */ - uint8_t swap = stripCount > 2 && (stripCount % 2 != 0); - ClipVertex* v1 = swap ? sourceTriangle[1] : sourceTriangle[0]; - ClipVertex* v2 = swap ? sourceTriangle[0] : sourceTriangle[1]; - ClipVertex* v3 = sourceTriangle[2]; + uint32_t swap = stripCount > 2 && (stripCount % 2 != 0); + const ClipVertex* v1 = swap ? sourceTriangle[1] : sourceTriangle[0]; + const ClipVertex* v2 = swap ? sourceTriangle[0] : sourceTriangle[1]; + const ClipVertex* v3 = sourceTriangle[2]; - uint8_t visible = ((v1->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v3->w > 0) ? 1 : 0); - uint8_t startOfStrip = (i == 2) || (outBuffer->size > 2 && ((ClipVertex*) aligned_vector_back(outBuffer))->flags == VERTEX_CMD_EOL); + uint32_t visible = ((v1->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v3->w > 0) ? 1 : 0); + uint32_t startOfStrip = (i == 2) || (outBuffer->size > 2 && ((ClipVertex*) aligned_vector_back(outBuffer))->flags == VERTEX_CMD_EOL); /* All visible, we're fine! */ if(visible == 0b111) { diff --git a/GL/draw.c b/GL/draw.c index c5e22c4..6485a3d 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -6,6 +6,7 @@ #include "../include/gl.h" #include "../include/glext.h" #include "private.h" +#include "profiler.h" typedef struct { const void* ptr; @@ -59,7 +60,7 @@ void initAttributePointers() { NORMAL_POINTER.size = 3; } -static GLuint byte_size(GLenum type) { +static inline GLuint byte_size(GLenum type) { switch(type) { case GL_BYTE: return sizeof(GLbyte); case GL_UNSIGNED_BYTE: return sizeof(GLubyte); @@ -73,73 +74,121 @@ static GLuint byte_size(GLenum type) { } } -static void _parseColour(float* out, const GLubyte* in, GLint size, GLenum type) { +typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in); +typedef void (*PolyBuildFunc)(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i); + +static inline void _parseVec3FromShort3(GLfloat* out, const GLubyte* in) { + GLshort* ptr = (GLshort*) in; + + out[0] = (GLfloat) ptr[0]; + out[1] = (GLfloat) ptr[1]; + out[2] = (GLfloat) ptr[2]; +} + +static inline void _parseVec3FromInt3(GLfloat* out, const GLubyte* in) { + GLint* ptr = (GLint*) in; + + out[0] = (GLfloat) ptr[0]; + out[1] = (GLfloat) ptr[1]; + out[2] = (GLfloat) ptr[2]; +} + +static inline void _parseVec3FromFloat3(GLfloat* out, const GLubyte* in) { + GLfloat* ptr = (GLfloat*) in; + + out[0] = ptr[0]; + out[1] = ptr[1]; + out[2] = ptr[2]; +} + +static inline void _parseVec2FromFloat2(GLfloat* out, const GLubyte* in) { + GLfloat* ptr = (GLfloat*) in; + + out[0] = ptr[0]; + out[1] = ptr[1]; +} + +static inline void _parseVec3FromFloat2(GLfloat* out, const GLubyte* in) { + GLfloat* ptr = (GLfloat*) in; + + out[0] = ptr[0]; + out[1] = ptr[1]; + out[2] = 0.0f; +} + +static inline void _parseVec4FromFloat3(GLfloat* out, const GLubyte* in) { + GLfloat* ptr = (GLfloat*) in; + + out[0] = ptr[0]; + out[1] = ptr[1]; + out[2] = ptr[2]; + out[3] = 1.0; +} + +static inline void _parseVec4FromFloat4(GLfloat* out, const GLubyte* in) { + GLfloat* ptr = (GLfloat*) in; + + out[0] = ptr[0]; + out[1] = ptr[1]; + out[2] = ptr[2]; + out[3] = ptr[3]; +} + +static inline void _parseColourFromUByte4(GLfloat* out, const GLubyte* in) { const float ONE_OVER_255 = 1.0f / 255.0f; - - switch(type) { - case GL_BYTE: { - case GL_UNSIGNED_BYTE: - out[0] = ((GLfloat)in[0]) * ONE_OVER_255; - out[1] = ((GLfloat)in[1]) * ONE_OVER_255; - out[2] = ((GLfloat)in[2]) * ONE_OVER_255; - out[3] = ((GLfloat)in[3]) * ONE_OVER_255; - } break; - case GL_SHORT: - case GL_UNSIGNED_SHORT: - /* FIXME!!!! */ - break; - case GL_INT: - case GL_UNSIGNED_INT: - /* FIXME!!!! */ - break; - case GL_FLOAT: - case GL_DOUBLE: - default: { - out[0] = ((GLfloat*) in)[0]; - out[1] = ((GLfloat*) in)[1]; - out[2] = ((GLfloat*) in)[2]; - out[3] = ((GLfloat*) in)[3]; - } break; - } + out[0] = ((GLfloat) in[0]) * ONE_OVER_255; + out[1] = ((GLfloat) in[1]) * ONE_OVER_255; + out[2] = ((GLfloat) in[2]) * ONE_OVER_255; + out[3] = ((GLfloat) in[3]) * ONE_OVER_255; } -static void _parseFloats(GLfloat* out, const GLubyte* in, GLint size, GLenum type) { - GLubyte i; - - switch(type) { - case GL_SHORT: { - GLshort* inp = (GLshort*) in; - for(i = 0; i < size; ++i) { - out[i] = (GLfloat) inp[i]; - } - } break; - case GL_INT: { - GLint* inp = (GLint*) in; - for(i = 0; i < size; ++i) { - out[i] = (GLfloat) inp[i]; - } - } break; - case GL_FLOAT: - case GL_DOUBLE: /* Double == Float */ - default: { - const GLfloat* ptr = (const GLfloat*) in; - for(i = 0; i < size; ++i) out[i] = ptr[i]; - } - } +static inline void _constVec2Zero(GLfloat* out, const GLubyte* in) { + out[0] = 0.0f; + out[1] = 0.0f; } -static void _parseIndex(GLuint* out, const GLubyte* in, GLenum type) { +static inline void _constVec3NegZ(GLfloat* out, const GLubyte* in) { + out[0] = 0.0f; + out[1] = 0.0f; + out[2] = -1.0f; +} + +static inline void _constVec4One(GLfloat* out, const GLubyte* in) { + out[0] = 1.0f; + out[1] = 1.0f; + out[2] = 1.0f; + out[3] = 1.0f; +} + +typedef GLuint (*IndexParseFunc)(const GLubyte* in); + +static inline GLuint _parseUByteIndex(const GLubyte* in) { + return (GLuint) *in; +} + +static inline GLuint _parseUIntIndex(const GLubyte* in) { + return *((GLuint*) in); +} + +static inline GLuint _parseUShortIndex(const GLubyte* in) { + return *((GLshort*) in); +} + + +static inline IndexParseFunc _calcParseIndexFunc(GLenum type) { switch(type) { case GL_UNSIGNED_BYTE: - *out = (GLuint) *in; + return &_parseUByteIndex; break; case GL_UNSIGNED_INT: - *out = *((GLuint*) in); + return &_parseUIntIndex; break; case GL_UNSIGNED_SHORT: default: - *out = *((GLshort*) in); + break; } + + return &_parseUShortIndex; } @@ -187,110 +236,264 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { mat_trans_normal3(normal[0], normal[1], normal[2]); } -static void swapVertex(ClipVertex* v1, ClipVertex* v2) { - ClipVertex tmp = *v1; +static inline void swapVertex(ClipVertex* v1, ClipVertex* v2) { + static ClipVertex tmp; + + tmp = *v1; *v1 = *v2; *v2 = tmp; } +static inline FloatParseFunc _calcVertexParseFunc() { + switch(VERTEX_POINTER.type) { + case GL_SHORT: { + if(VERTEX_POINTER.size == 3) { + return &_parseVec3FromShort3; + } + } break; + case GL_INT: { + if(VERTEX_POINTER.size == 3) { + return &_parseVec3FromInt3; + } + } break; + case GL_FLOAT: { + if(VERTEX_POINTER.size == 3) { + return &_parseVec3FromFloat3; + } else if(VERTEX_POINTER.size == 2) { + return &_parseVec3FromFloat2; + } + } break; + default: + break; + } + + return NULL; +} + +static inline FloatParseFunc _calcDiffuseParseFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { + return &_constVec4One; + } + + switch(DIFFUSE_POINTER.type) { + case GL_BYTE: + case GL_UNSIGNED_BYTE: { + if(DIFFUSE_POINTER.size == 4) { + return &_parseColourFromUByte4; + } + } break; + case GL_INT: { + if(DIFFUSE_POINTER.size == 3) { + return &_parseVec3FromInt3; + } + } break; + case GL_FLOAT: { + if(DIFFUSE_POINTER.size == 3) { + return &_parseVec4FromFloat3; + } else if(DIFFUSE_POINTER.size == 4) { + return &_parseVec4FromFloat4; + } + } break; + default: + break; + } + + return &_constVec4One; +} + +static inline FloatParseFunc _calcUVParseFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { + return &_constVec2Zero; + } + + switch(UV_POINTER.type) { + case GL_FLOAT: { + if(UV_POINTER.size == 2) { + return &_parseVec2FromFloat2; + } + } break; + default: + break; + } + + return &_constVec2Zero; +} + +static inline FloatParseFunc _calcSTParseFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { + return &_constVec2Zero; + } + + switch(ST_POINTER.type) { + case GL_FLOAT: { + if(ST_POINTER.size == 2) { + return &_parseVec2FromFloat2; + } + } break; + default: + break; + } + + return &_constVec2Zero; +} + +static inline FloatParseFunc _calcNormalParseFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { + return &_constVec3NegZ; + } + + switch(NORMAL_POINTER.type) { + case GL_SHORT: { + if(NORMAL_POINTER.size == 3) { + return &_parseVec3FromShort3; + } + } break; + case GL_INT: { + if(NORMAL_POINTER.size == 3) { + return &_parseVec3FromInt3; + } + } break; + case GL_FLOAT: { + if(NORMAL_POINTER.size == 3) { + return &_parseVec3FromFloat3; + } else if(NORMAL_POINTER.size == 2) { + return &_parseVec3FromFloat2; + } + } break; + default: + break; + } + + return &_constVec3NegZ; +} + + +static void _buildTriangle(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) { + if(((i + 1) % 3) == 0) { + vertex->flags = PVR_CMD_VERTEX_EOL; + } +} + +static inline GLsizei fast_mod(const GLsizei input, const GLsizei ceil) { + return input >= ceil ? input % ceil : input; +} + +static void _buildQuad(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) { + if((i + 1) % 4 == 0) { + previous->flags = PVR_CMD_VERTEX_EOL; + swapVertex(previous, vertex); + } +} + +static void _buildTriangleFan(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) { + if(i == 2) { + swapVertex(previous, vertex); + vertex->flags = PVR_CMD_VERTEX_EOL; + } else if(i > 2) { + ClipVertex* next = vertex + 1; + + *next = *first; + + swapVertex(next, vertex); + + vertex = next + 1; + *vertex = *previous; + + vertex->flags = PVR_CMD_VERTEX_EOL; + } +} + +static void _buildStrip(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) { + if(!next) { + /* If the mode was triangle strip, then the last vertex is the last vertex */ + vertex->flags = PVR_CMD_VERTEX_EOL; + } +} + +static inline PolyBuildFunc _calcBuildFunc(const GLenum type) { + switch(type) { + case GL_TRIANGLES: + return &_buildTriangle; + break; + case GL_QUADS: + return &_buildQuad; + break; + case GL_TRIANGLE_FAN: + case GL_POLYGON: + return &_buildTriangleFan; + break; + default: + break; + } + + return &_buildStrip; +} + +typedef struct { + const GLubyte* vptr; + const GLuint vstride; + const GLubyte* cptr; + const GLuint cstride; + const GLubyte* uvptr; + const GLuint uvstride; + const GLubyte* stptr; + const GLuint ststride; + const GLubyte* nptr; + const GLuint nstride; +} GenerateParams; + static void generate(AlignedVector* output, const GLenum mode, const GLsizei first, const GLsizei count, - const GLubyte* indices, const GLenum type, - const GLubyte* vptr, const GLubyte vstride, const GLubyte* cptr, const GLubyte cstride, - const GLubyte* uvptr, const GLubyte uvstride, const GLubyte* stptr, const GLubyte ststride, - const GLubyte* nptr, const GLubyte nstride) { + const GLubyte* indices, const GLenum type, const GenerateParams* pointers) { /* Read from the client buffers and generate an array of ClipVertices */ - GLsizei max = first + count; - - GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + const GLsizei max = first + count; + const GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; /* Make sure we have room for the output */ - aligned_vector_resize(output, spaceNeeded); + ClipVertex* vertex = aligned_vector_resize(output, spaceNeeded); - ClipVertex* vertex = (ClipVertex*) output->data; + const FloatParseFunc vertexFunc = _calcVertexParseFunc(); + const FloatParseFunc diffuseFunc = _calcDiffuseParseFunc(); + const FloatParseFunc uvFunc = _calcUVParseFunc(); + const FloatParseFunc stFunc = _calcSTParseFunc(); + const FloatParseFunc normalFunc = _calcNormalParseFunc(); - GLsizei j; - GLsizei i = 0; - for(j = first; j < max; ++i, ++j, ++vertex) { + const PolyBuildFunc buildFunc = _calcBuildFunc(mode); + const IndexParseFunc indexFunc = _calcParseIndexFunc(type); + + const GLsizei type_byte_size = byte_size(type); + + ClipVertex* previous = NULL; + ClipVertex* firstV = vertex; + ClipVertex* next = NULL; + + GLsizei i; + + for(i = first; i < max; ++i, ++vertex) { vertex->flags = PVR_CMD_VERTEX; - GLuint idx = j; - if(indices) { - _parseIndex(&idx, &indices[byte_size(type) * j], type); - } + const GLuint idx = (indices) ? + indexFunc(&indices[type_byte_size * i]) : i; - _parseFloats(vertex->xyz, vptr + (idx * vstride), VERTEX_POINTER.size, VERTEX_POINTER.type); + const GLubyte* vin = pointers->vptr + (idx * pointers->vstride); + const GLubyte* din = pointers->cptr + (idx * pointers->cstride); + const GLubyte* uin = pointers->uvptr + (idx * pointers->uvstride); + const GLubyte* sin = pointers->stptr + (idx * pointers->ststride); + const GLubyte* nin = pointers->nptr + (idx * pointers->nstride); - if(ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) { - _parseColour(vertex->diffuse, cptr + (idx * cstride), DIFFUSE_POINTER.size, DIFFUSE_POINTER.type); - } else { - /* Default to white if colours are disabled */ - vertex->diffuse[0] = vertex->diffuse[1] = vertex->diffuse[2] = vertex->diffuse[3] = 1.0f; - } + vertexFunc(vertex->xyz, vin); + diffuseFunc(vertex->diffuse, din); + uvFunc(vertex->uv, uin); + stFunc(vertex->st, sin); + normalFunc(vertex->nxyz, nin); + } - if(ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) { - _parseFloats(vertex->uv, uvptr + (idx * uvstride), UV_POINTER.size, UV_POINTER.type); - } else { - vertex->uv[0] = vertex->uv[1] = 0.0f; - } + vertex = firstV; - if(ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) { - _parseFloats(vertex->st, stptr + (idx * ststride), ST_POINTER.size, ST_POINTER.type); - } else { - vertex->st[0] = vertex->st[1] = 0.0f; - } - - if(ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) { - _parseFloats(vertex->nxyz, nptr + (idx * nstride), NORMAL_POINTER.size, NORMAL_POINTER.type); - } else { - vertex->nxyz[0] = 0.0f; - vertex->nxyz[1] = 0.0f; - vertex->nxyz[2] = -1.0f; - } - - switch(mode) { - case GL_TRIANGLES: { - if(((i + 1) % 3) == 0) { - vertex->flags = PVR_CMD_VERTEX_EOL; - } - } break; - case GL_QUADS: { - if(((i + 1) % 4) == 0) { - ClipVertex* previous = vertex - 1; - previous->flags = PVR_CMD_VERTEX_EOL; - swapVertex(previous, vertex); - } - } break; - case GL_POLYGON: - case GL_TRIANGLE_FAN: { - ClipVertex* previous = vertex - 1; - if(i == 2) { - swapVertex(previous, vertex); - vertex->flags = PVR_CMD_VERTEX_EOL; - } else if(i > 2) { - ClipVertex* first = (ClipVertex*) output->data; - ClipVertex* previous = vertex - 1; - ClipVertex* next = vertex + 1; - - *next = *first; - - swapVertex(next, vertex); - - vertex = next + 1; - *vertex = *previous; - - vertex->flags = PVR_CMD_VERTEX_EOL; - } - } break; - case GL_TRIANGLE_STRIP: - default: { - if(j == (max - 1)) { - /* If the mode was triangle strip, then the last vertex is the last vertex */ - vertex->flags = PVR_CMD_VERTEX_EOL; - } - } - - } + for(i = 0; i < count; ++i, ++vertex) { + next = (i < count - 1) ? vertex + 1 : NULL; + previous = (i > 0) ? vertex - 1 : NULL; + buildFunc(firstV, previous, vertex, next, i); } } @@ -334,7 +537,7 @@ static void clip(AlignedVector* vertices) { } /* Make sure we allocate roughly enough space */ - aligned_vector_reserve(CLIP_BUFFER, vertices->size); + aligned_vector_reserve(CLIP_BUFFER, vertices->size * 1.5); /* Start from empty */ aligned_vector_resize(CLIP_BUFFER, 0); @@ -537,29 +740,38 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ aligned_vector_resize(buffer, 0); } - GLubyte vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); + const GLuint vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const GLubyte* vptr = VERTEX_POINTER.ptr; - GLubyte cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type); + const GLuint cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type); const GLubyte* cptr = DIFFUSE_POINTER.ptr; - GLubyte uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); + const GLuint uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); const GLubyte* uvptr = UV_POINTER.ptr; - GLubyte ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type); + const GLuint ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type); const GLubyte* stptr = ST_POINTER.ptr; - GLubyte nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); + const GLuint nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); const GLubyte* nptr = NORMAL_POINTER.ptr; - generate( - buffer, mode, first, count, (GLubyte*) indices, type, - vptr, vstride, cptr, cstride, - uvptr, uvstride, stptr, ststride, - nptr, nstride - ); + GenerateParams params = { + .vptr = vptr, + .vstride = vstride, + .cptr = cptr, + .cstride = cstride, + .uvptr = uvptr, + .uvstride = uvstride, + .stptr = stptr, + .ststride = ststride, + .nptr = nptr, + .nstride = nstride + }; + + generate(buffer, mode, first, count, (GLubyte*) indices, type, ¶ms); light(buffer); + transform(buffer); if(isClippingEnabled()) { @@ -567,6 +779,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ } divide(buffer); + push(buffer, activePolyList(), 0); /* diff --git a/GL/flush.c b/GL/flush.c index bb8f3b1..a0c7eef 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -4,6 +4,7 @@ #include "../containers/aligned_vector.h" #include "private.h" +#include "profiler.h" #define TA_SQ_ADDR (unsigned int *)(void *) \ (0xe0000000 | (((unsigned long)0x10000000) & 0x03ffffe0)) diff --git a/GL/profiler.c b/GL/profiler.c new file mode 100644 index 0000000..32060cc --- /dev/null +++ b/GL/profiler.c @@ -0,0 +1,123 @@ +#include +#include +#include +#include + +#include "profiler.h" +#include "../containers/aligned_vector.h" + +#define MAX_PATH 256 + +typedef struct { + char name[MAX_PATH]; + + uint64_t total_time_us; + uint64_t total_calls; +} ProfilerResult; + +typedef struct { + AlignedVector stack; + AlignedVector results; + uint64_t start_time_in_us; +} RootProfiler; + + +static RootProfiler* root = NULL; + +static ProfilerResult* profiler_get_or_create_result(const char* name) { + uint16_t i = 0; + for(; i < root->results.size; ++i) { + ProfilerResult* result = aligned_vector_at(&root->results, i); + if(strcmp(result->name, name) == 0) { + return result; + } + } + + ProfilerResult newResult; + strcpy(newResult.name, name); + newResult.total_calls = 0; + newResult.total_time_us = 0; + aligned_vector_push_back(&root->results, &newResult, 1); + return aligned_vector_back(&root->results); +} + +static uint64_t current_time_in_us() { + return timer_us_gettime64(); +} + +static void profiler_generate_path(const char* suffix, char* path) { + uint16_t i = 0; + for(; i < root->stack.size; ++i) { + Profiler* prof = aligned_vector_at(&root->stack, i); + strcat(path, prof->name); + + if(i != root->stack.size - 1) { + strcat(path, "."); + } + } + + if(strlen(suffix)) { + strcat(path, ":"); + strcat(path, suffix); + } +} + + +Profiler* profiler_push(const char* name) { + if(!root) { + root = (RootProfiler*) malloc(sizeof(RootProfiler)); + aligned_vector_init( + &root->stack, + sizeof(Profiler) + ); + + aligned_vector_init( + &root->results, + sizeof(ProfilerResult) + ); + + aligned_vector_reserve(&root->stack, 32); + aligned_vector_reserve(&root->results, 64); + } + + Profiler profiler; + strncpy(profiler.name, name, 64); + profiler.start_time_in_us = current_time_in_us(); + + aligned_vector_push_back(&root->stack, &profiler, 1); + return aligned_vector_back(&root->stack); +} + +void profiler_checkpoint(const char* name) { + Profiler* prof = aligned_vector_back(&root->stack); + + char path[MAX_PATH]; + path[0] = '\0'; + + profiler_generate_path(name, path); + + uint64_t now = current_time_in_us(); + uint64_t diff = now - prof->start_time_in_us; + prof->start_time_in_us = now; + + ProfilerResult* result = profiler_get_or_create_result(path); + result->total_calls++; + result->total_time_us += diff; +} + +void profiler_pop() { + aligned_vector_resize(&root->stack, root->stack.size - 1); +} + +void profiler_print_stats() { + fprintf(stderr, "%-60s%-20s%-20s%-20s\n", "Path", "Average", "Total", "Calls"); + + uint16_t i = 0; + for(; i < root->results.size; ++i) { + ProfilerResult* result = aligned_vector_at(&root->results, i); + float ms = ((float) result->total_time_us) / 1000.0f; + float avg = ms / (float) result->total_calls; + + fprintf(stderr, "%-60s%-20f%-20f%d\n", result->name, avg, ms, result->total_calls); + } +} diff --git a/GL/profiler.h b/GL/profiler.h new file mode 100644 index 0000000..529be69 --- /dev/null +++ b/GL/profiler.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +typedef struct { + char name[64]; + uint64_t start_time_in_us; +} Profiler; + + +Profiler* profiler_push(const char* name); +void profiler_checkpoint(const char* name); +void profiler_pop(); + +void profiler_print_stats(); diff --git a/Makefile b/Makefile index 4f25a13..8f0e0b1 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ TARGET = libGLdc.a OBJS = GL/draw.o GL/flush.o GL/framebuffer.o GL/immediate.o GL/lighting.o GL/state.o GL/texture.o GL/glu.o -OBJS += GL/matrix.o GL/fog.o GL/error.o GL/clip.o containers/stack.o containers/named_array.o containers/aligned_vector.o +OBJS += GL/matrix.o GL/fog.o GL/error.o GL/clip.o containers/stack.o containers/named_array.o containers/aligned_vector.o GL/profiler.o SUBDIRS = diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index 8ad7a80..c7294e9 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -50,11 +50,13 @@ void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned memcpy(dest, objs, vector->element_size * count); } -void aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) { +void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) { + unsigned int previousCount = vector->size; + /* Don't change memory when resizing downwards, just change the size */ if(element_count <= vector->size) { vector->size = element_count; - return; + return NULL; } if(vector->capacity < element_count) { @@ -64,6 +66,12 @@ void aligned_vector_resize(AlignedVector* vector, const unsigned int element_cou } vector->size = element_count; + + if(previousCount < vector->size) { + return aligned_vector_at(vector, previousCount); + } else { + return NULL; + } } void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) { diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index 2b710d4..e549bc9 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -17,7 +17,7 @@ typedef struct { void aligned_vector_init(AlignedVector* vector, unsigned int element_size); void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count); void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count); -void aligned_vector_resize(AlignedVector* vector, const unsigned int element_count); +void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count); void* aligned_vector_at(const AlignedVector* vector, const unsigned int index); void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count); void aligned_vector_clear(AlignedVector* vector);