Add a basic profiler and optimise some code
This commit is contained in:
parent
7d1b57fb12
commit
ba66608a96
32
GL/clip.c
32
GL/clip.c
|
@ -7,6 +7,7 @@
|
|||
#define PVR_PACK_COLOR(a, r, g, b) {}
|
||||
#endif
|
||||
|
||||
#include "profiler.h"
|
||||
#include "clip.h"
|
||||
#include "../containers/aligned_vector.h"
|
||||
|
||||
|
@ -21,6 +22,7 @@ void enableClipping(unsigned char v) {
|
|||
ZCLIP_ENABLED = v;
|
||||
}
|
||||
|
||||
void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math")));
|
||||
void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) {
|
||||
const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow
|
||||
|
||||
|
@ -38,13 +40,13 @@ static inline void interpolateFloat(const float v1, const float v2, const float
|
|||
*out = (v * t) + v1;
|
||||
}
|
||||
|
||||
static void interpolateVec2(const float* v1, const float* v2, const float t, float* out) {
|
||||
static inline void interpolateVec2(const float* v1, const float* v2, const float t, float* out) {
|
||||
/* FIXME: SH4 has an asm instruction for this */
|
||||
interpolateFloat(v1[0], v2[0], t, &out[0]);
|
||||
interpolateFloat(v1[1], v2[1], t, &out[1]);
|
||||
}
|
||||
|
||||
static void interpolateVec3(const float* v1, const float* v2, const float t, float* out) {
|
||||
static inline void interpolateVec3(const float* v1, const float* v2, const float t, float* out) {
|
||||
/* FIXME: SH4 has an asm instruction for this */
|
||||
|
||||
interpolateFloat(v1[0], v2[0], t, &out[0]);
|
||||
|
@ -52,7 +54,7 @@ static void interpolateVec3(const float* v1, const float* v2, const float t, flo
|
|||
interpolateFloat(v1[2], v2[2], t, &out[2]);
|
||||
}
|
||||
|
||||
static void interpolateVec4(const float* v1, const float* v2, const float t, float* out) {
|
||||
static inline void interpolateVec4(const float* v1, const float* v2, const float t, float* out) {
|
||||
/* FIXME: SH4 has an asm instruction for this */
|
||||
interpolateFloat(v1[0], v2[0], t, &out[0]);
|
||||
interpolateFloat(v1[1], v2[1], t, &out[1]);
|
||||
|
@ -81,29 +83,31 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
|
|||
uint32_t i;
|
||||
uint32_t stripCount = 2; /* The number of vertices in the source strip so far */
|
||||
|
||||
ClipVertex* thisVertex = aligned_vector_at(vertices, 1);
|
||||
|
||||
for(i = 2; i < vertices->size; ++i) {
|
||||
++thisVertex;
|
||||
|
||||
if(stripCount < 2) {
|
||||
stripCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ClipVertex* thisVertex = aligned_vector_at(vertices, i);
|
||||
|
||||
ClipVertex* sourceTriangle[3] = {
|
||||
aligned_vector_at(vertices, i - 2),
|
||||
aligned_vector_at(vertices, i - 1),
|
||||
const ClipVertex* sourceTriangle[3] = {
|
||||
thisVertex - 2,
|
||||
thisVertex - 1,
|
||||
thisVertex
|
||||
};
|
||||
|
||||
/* If we're on an odd vertex, we need to swap the order of the first two vertices, as that's what
|
||||
* triangle strips do */
|
||||
uint8_t swap = stripCount > 2 && (stripCount % 2 != 0);
|
||||
ClipVertex* v1 = swap ? sourceTriangle[1] : sourceTriangle[0];
|
||||
ClipVertex* v2 = swap ? sourceTriangle[0] : sourceTriangle[1];
|
||||
ClipVertex* v3 = sourceTriangle[2];
|
||||
uint32_t swap = stripCount > 2 && (stripCount % 2 != 0);
|
||||
const ClipVertex* v1 = swap ? sourceTriangle[1] : sourceTriangle[0];
|
||||
const ClipVertex* v2 = swap ? sourceTriangle[0] : sourceTriangle[1];
|
||||
const ClipVertex* v3 = sourceTriangle[2];
|
||||
|
||||
uint8_t visible = ((v1->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v3->w > 0) ? 1 : 0);
|
||||
uint8_t startOfStrip = (i == 2) || (outBuffer->size > 2 && ((ClipVertex*) aligned_vector_back(outBuffer))->flags == VERTEX_CMD_EOL);
|
||||
uint32_t visible = ((v1->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v3->w > 0) ? 1 : 0);
|
||||
uint32_t startOfStrip = (i == 2) || (outBuffer->size > 2 && ((ClipVertex*) aligned_vector_back(outBuffer))->flags == VERTEX_CMD_EOL);
|
||||
|
||||
/* All visible, we're fine! */
|
||||
if(visible == 0b111) {
|
||||
|
|
465
GL/draw.c
465
GL/draw.c
|
@ -6,6 +6,7 @@
|
|||
#include "../include/gl.h"
|
||||
#include "../include/glext.h"
|
||||
#include "private.h"
|
||||
#include "profiler.h"
|
||||
|
||||
typedef struct {
|
||||
const void* ptr;
|
||||
|
@ -59,7 +60,7 @@ void initAttributePointers() {
|
|||
NORMAL_POINTER.size = 3;
|
||||
}
|
||||
|
||||
static GLuint byte_size(GLenum type) {
|
||||
static inline GLuint byte_size(GLenum type) {
|
||||
switch(type) {
|
||||
case GL_BYTE: return sizeof(GLbyte);
|
||||
case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
|
||||
|
@ -73,73 +74,121 @@ static GLuint byte_size(GLenum type) {
|
|||
}
|
||||
}
|
||||
|
||||
static void _parseColour(float* out, const GLubyte* in, GLint size, GLenum type) {
|
||||
const float ONE_OVER_255 = 1.0f / 255.0f;
|
||||
typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in);
|
||||
typedef void (*PolyBuildFunc)(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i);
|
||||
|
||||
switch(type) {
|
||||
case GL_BYTE: {
|
||||
case GL_UNSIGNED_BYTE:
|
||||
static inline void _parseVec3FromShort3(GLfloat* out, const GLubyte* in) {
|
||||
GLshort* ptr = (GLshort*) in;
|
||||
|
||||
out[0] = (GLfloat) ptr[0];
|
||||
out[1] = (GLfloat) ptr[1];
|
||||
out[2] = (GLfloat) ptr[2];
|
||||
}
|
||||
|
||||
static inline void _parseVec3FromInt3(GLfloat* out, const GLubyte* in) {
|
||||
GLint* ptr = (GLint*) in;
|
||||
|
||||
out[0] = (GLfloat) ptr[0];
|
||||
out[1] = (GLfloat) ptr[1];
|
||||
out[2] = (GLfloat) ptr[2];
|
||||
}
|
||||
|
||||
static inline void _parseVec3FromFloat3(GLfloat* out, const GLubyte* in) {
|
||||
GLfloat* ptr = (GLfloat*) in;
|
||||
|
||||
out[0] = ptr[0];
|
||||
out[1] = ptr[1];
|
||||
out[2] = ptr[2];
|
||||
}
|
||||
|
||||
static inline void _parseVec2FromFloat2(GLfloat* out, const GLubyte* in) {
|
||||
GLfloat* ptr = (GLfloat*) in;
|
||||
|
||||
out[0] = ptr[0];
|
||||
out[1] = ptr[1];
|
||||
}
|
||||
|
||||
static inline void _parseVec3FromFloat2(GLfloat* out, const GLubyte* in) {
|
||||
GLfloat* ptr = (GLfloat*) in;
|
||||
|
||||
out[0] = ptr[0];
|
||||
out[1] = ptr[1];
|
||||
out[2] = 0.0f;
|
||||
}
|
||||
|
||||
static inline void _parseVec4FromFloat3(GLfloat* out, const GLubyte* in) {
|
||||
GLfloat* ptr = (GLfloat*) in;
|
||||
|
||||
out[0] = ptr[0];
|
||||
out[1] = ptr[1];
|
||||
out[2] = ptr[2];
|
||||
out[3] = 1.0;
|
||||
}
|
||||
|
||||
static inline void _parseVec4FromFloat4(GLfloat* out, const GLubyte* in) {
|
||||
GLfloat* ptr = (GLfloat*) in;
|
||||
|
||||
out[0] = ptr[0];
|
||||
out[1] = ptr[1];
|
||||
out[2] = ptr[2];
|
||||
out[3] = ptr[3];
|
||||
}
|
||||
|
||||
static inline void _parseColourFromUByte4(GLfloat* out, const GLubyte* in) {
|
||||
const float ONE_OVER_255 = 1.0f / 255.0f;
|
||||
out[0] = ((GLfloat) in[0]) * ONE_OVER_255;
|
||||
out[1] = ((GLfloat) in[1]) * ONE_OVER_255;
|
||||
out[2] = ((GLfloat) in[2]) * ONE_OVER_255;
|
||||
out[3] = ((GLfloat) in[3]) * ONE_OVER_255;
|
||||
} break;
|
||||
case GL_SHORT:
|
||||
case GL_UNSIGNED_SHORT:
|
||||
/* FIXME!!!! */
|
||||
break;
|
||||
case GL_INT:
|
||||
case GL_UNSIGNED_INT:
|
||||
/* FIXME!!!! */
|
||||
break;
|
||||
case GL_FLOAT:
|
||||
case GL_DOUBLE:
|
||||
default: {
|
||||
out[0] = ((GLfloat*) in)[0];
|
||||
out[1] = ((GLfloat*) in)[1];
|
||||
out[2] = ((GLfloat*) in)[2];
|
||||
out[3] = ((GLfloat*) in)[3];
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
static void _parseFloats(GLfloat* out, const GLubyte* in, GLint size, GLenum type) {
|
||||
GLubyte i;
|
||||
|
||||
switch(type) {
|
||||
case GL_SHORT: {
|
||||
GLshort* inp = (GLshort*) in;
|
||||
for(i = 0; i < size; ++i) {
|
||||
out[i] = (GLfloat) inp[i];
|
||||
}
|
||||
} break;
|
||||
case GL_INT: {
|
||||
GLint* inp = (GLint*) in;
|
||||
for(i = 0; i < size; ++i) {
|
||||
out[i] = (GLfloat) inp[i];
|
||||
}
|
||||
} break;
|
||||
case GL_FLOAT:
|
||||
case GL_DOUBLE: /* Double == Float */
|
||||
default: {
|
||||
const GLfloat* ptr = (const GLfloat*) in;
|
||||
for(i = 0; i < size; ++i) out[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
static inline void _constVec2Zero(GLfloat* out, const GLubyte* in) {
|
||||
out[0] = 0.0f;
|
||||
out[1] = 0.0f;
|
||||
}
|
||||
|
||||
static void _parseIndex(GLuint* out, const GLubyte* in, GLenum type) {
|
||||
static inline void _constVec3NegZ(GLfloat* out, const GLubyte* in) {
|
||||
out[0] = 0.0f;
|
||||
out[1] = 0.0f;
|
||||
out[2] = -1.0f;
|
||||
}
|
||||
|
||||
static inline void _constVec4One(GLfloat* out, const GLubyte* in) {
|
||||
out[0] = 1.0f;
|
||||
out[1] = 1.0f;
|
||||
out[2] = 1.0f;
|
||||
out[3] = 1.0f;
|
||||
}
|
||||
|
||||
typedef GLuint (*IndexParseFunc)(const GLubyte* in);
|
||||
|
||||
static inline GLuint _parseUByteIndex(const GLubyte* in) {
|
||||
return (GLuint) *in;
|
||||
}
|
||||
|
||||
static inline GLuint _parseUIntIndex(const GLubyte* in) {
|
||||
return *((GLuint*) in);
|
||||
}
|
||||
|
||||
static inline GLuint _parseUShortIndex(const GLubyte* in) {
|
||||
return *((GLshort*) in);
|
||||
}
|
||||
|
||||
|
||||
static inline IndexParseFunc _calcParseIndexFunc(GLenum type) {
|
||||
switch(type) {
|
||||
case GL_UNSIGNED_BYTE:
|
||||
*out = (GLuint) *in;
|
||||
return &_parseUByteIndex;
|
||||
break;
|
||||
case GL_UNSIGNED_INT:
|
||||
*out = *((GLuint*) in);
|
||||
return &_parseUIntIndex;
|
||||
break;
|
||||
case GL_UNSIGNED_SHORT:
|
||||
default:
|
||||
*out = *((GLshort*) in);
|
||||
break;
|
||||
}
|
||||
|
||||
return &_parseUShortIndex;
|
||||
}
|
||||
|
||||
|
||||
|
@ -187,89 +236,160 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) {
|
|||
mat_trans_normal3(normal[0], normal[1], normal[2]);
|
||||
}
|
||||
|
||||
static void swapVertex(ClipVertex* v1, ClipVertex* v2) {
|
||||
ClipVertex tmp = *v1;
|
||||
static inline void swapVertex(ClipVertex* v1, ClipVertex* v2) {
|
||||
static ClipVertex tmp;
|
||||
|
||||
tmp = *v1;
|
||||
*v1 = *v2;
|
||||
*v2 = tmp;
|
||||
}
|
||||
|
||||
static void generate(AlignedVector* output, const GLenum mode, const GLsizei first, const GLsizei count,
|
||||
const GLubyte* indices, const GLenum type,
|
||||
const GLubyte* vptr, const GLubyte vstride, const GLubyte* cptr, const GLubyte cstride,
|
||||
const GLubyte* uvptr, const GLubyte uvstride, const GLubyte* stptr, const GLubyte ststride,
|
||||
const GLubyte* nptr, const GLubyte nstride) {
|
||||
/* Read from the client buffers and generate an array of ClipVertices */
|
||||
|
||||
GLsizei max = first + count;
|
||||
|
||||
GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
|
||||
|
||||
/* Make sure we have room for the output */
|
||||
aligned_vector_resize(output, spaceNeeded);
|
||||
|
||||
ClipVertex* vertex = (ClipVertex*) output->data;
|
||||
|
||||
GLsizei j;
|
||||
GLsizei i = 0;
|
||||
for(j = first; j < max; ++i, ++j, ++vertex) {
|
||||
vertex->flags = PVR_CMD_VERTEX;
|
||||
|
||||
GLuint idx = j;
|
||||
if(indices) {
|
||||
_parseIndex(&idx, &indices[byte_size(type) * j], type);
|
||||
static inline FloatParseFunc _calcVertexParseFunc() {
|
||||
switch(VERTEX_POINTER.type) {
|
||||
case GL_SHORT: {
|
||||
if(VERTEX_POINTER.size == 3) {
|
||||
return &_parseVec3FromShort3;
|
||||
}
|
||||
} break;
|
||||
case GL_INT: {
|
||||
if(VERTEX_POINTER.size == 3) {
|
||||
return &_parseVec3FromInt3;
|
||||
}
|
||||
} break;
|
||||
case GL_FLOAT: {
|
||||
if(VERTEX_POINTER.size == 3) {
|
||||
return &_parseVec3FromFloat3;
|
||||
} else if(VERTEX_POINTER.size == 2) {
|
||||
return &_parseVec3FromFloat2;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
_parseFloats(vertex->xyz, vptr + (idx * vstride), VERTEX_POINTER.size, VERTEX_POINTER.type);
|
||||
|
||||
if(ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) {
|
||||
_parseColour(vertex->diffuse, cptr + (idx * cstride), DIFFUSE_POINTER.size, DIFFUSE_POINTER.type);
|
||||
} else {
|
||||
/* Default to white if colours are disabled */
|
||||
vertex->diffuse[0] = vertex->diffuse[1] = vertex->diffuse[2] = vertex->diffuse[3] = 1.0f;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) {
|
||||
_parseFloats(vertex->uv, uvptr + (idx * uvstride), UV_POINTER.size, UV_POINTER.type);
|
||||
} else {
|
||||
vertex->uv[0] = vertex->uv[1] = 0.0f;
|
||||
static inline FloatParseFunc _calcDiffuseParseFunc() {
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) {
|
||||
return &_constVec4One;
|
||||
}
|
||||
|
||||
if(ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) {
|
||||
_parseFloats(vertex->st, stptr + (idx * ststride), ST_POINTER.size, ST_POINTER.type);
|
||||
} else {
|
||||
vertex->st[0] = vertex->st[1] = 0.0f;
|
||||
switch(DIFFUSE_POINTER.type) {
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE: {
|
||||
if(DIFFUSE_POINTER.size == 4) {
|
||||
return &_parseColourFromUByte4;
|
||||
}
|
||||
} break;
|
||||
case GL_INT: {
|
||||
if(DIFFUSE_POINTER.size == 3) {
|
||||
return &_parseVec3FromInt3;
|
||||
}
|
||||
} break;
|
||||
case GL_FLOAT: {
|
||||
if(DIFFUSE_POINTER.size == 3) {
|
||||
return &_parseVec4FromFloat3;
|
||||
} else if(DIFFUSE_POINTER.size == 4) {
|
||||
return &_parseVec4FromFloat4;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if(ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) {
|
||||
_parseFloats(vertex->nxyz, nptr + (idx * nstride), NORMAL_POINTER.size, NORMAL_POINTER.type);
|
||||
} else {
|
||||
vertex->nxyz[0] = 0.0f;
|
||||
vertex->nxyz[1] = 0.0f;
|
||||
vertex->nxyz[2] = -1.0f;
|
||||
return &_constVec4One;
|
||||
}
|
||||
|
||||
switch(mode) {
|
||||
case GL_TRIANGLES: {
|
||||
static inline FloatParseFunc _calcUVParseFunc() {
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) {
|
||||
return &_constVec2Zero;
|
||||
}
|
||||
|
||||
switch(UV_POINTER.type) {
|
||||
case GL_FLOAT: {
|
||||
if(UV_POINTER.size == 2) {
|
||||
return &_parseVec2FromFloat2;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return &_constVec2Zero;
|
||||
}
|
||||
|
||||
static inline FloatParseFunc _calcSTParseFunc() {
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) {
|
||||
return &_constVec2Zero;
|
||||
}
|
||||
|
||||
switch(ST_POINTER.type) {
|
||||
case GL_FLOAT: {
|
||||
if(ST_POINTER.size == 2) {
|
||||
return &_parseVec2FromFloat2;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return &_constVec2Zero;
|
||||
}
|
||||
|
||||
static inline FloatParseFunc _calcNormalParseFunc() {
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) {
|
||||
return &_constVec3NegZ;
|
||||
}
|
||||
|
||||
switch(NORMAL_POINTER.type) {
|
||||
case GL_SHORT: {
|
||||
if(NORMAL_POINTER.size == 3) {
|
||||
return &_parseVec3FromShort3;
|
||||
}
|
||||
} break;
|
||||
case GL_INT: {
|
||||
if(NORMAL_POINTER.size == 3) {
|
||||
return &_parseVec3FromInt3;
|
||||
}
|
||||
} break;
|
||||
case GL_FLOAT: {
|
||||
if(NORMAL_POINTER.size == 3) {
|
||||
return &_parseVec3FromFloat3;
|
||||
} else if(NORMAL_POINTER.size == 2) {
|
||||
return &_parseVec3FromFloat2;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return &_constVec3NegZ;
|
||||
}
|
||||
|
||||
|
||||
static void _buildTriangle(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) {
|
||||
if(((i + 1) % 3) == 0) {
|
||||
vertex->flags = PVR_CMD_VERTEX_EOL;
|
||||
}
|
||||
} break;
|
||||
case GL_QUADS: {
|
||||
if(((i + 1) % 4) == 0) {
|
||||
ClipVertex* previous = vertex - 1;
|
||||
}
|
||||
|
||||
static inline GLsizei fast_mod(const GLsizei input, const GLsizei ceil) {
|
||||
return input >= ceil ? input % ceil : input;
|
||||
}
|
||||
|
||||
static void _buildQuad(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) {
|
||||
if((i + 1) % 4 == 0) {
|
||||
previous->flags = PVR_CMD_VERTEX_EOL;
|
||||
swapVertex(previous, vertex);
|
||||
}
|
||||
} break;
|
||||
case GL_POLYGON:
|
||||
case GL_TRIANGLE_FAN: {
|
||||
ClipVertex* previous = vertex - 1;
|
||||
}
|
||||
|
||||
static void _buildTriangleFan(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) {
|
||||
if(i == 2) {
|
||||
swapVertex(previous, vertex);
|
||||
vertex->flags = PVR_CMD_VERTEX_EOL;
|
||||
} else if(i > 2) {
|
||||
ClipVertex* first = (ClipVertex*) output->data;
|
||||
ClipVertex* previous = vertex - 1;
|
||||
ClipVertex* next = vertex + 1;
|
||||
|
||||
*next = *first;
|
||||
|
@ -281,16 +401,99 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir
|
|||
|
||||
vertex->flags = PVR_CMD_VERTEX_EOL;
|
||||
}
|
||||
} break;
|
||||
case GL_TRIANGLE_STRIP:
|
||||
default: {
|
||||
if(j == (max - 1)) {
|
||||
}
|
||||
|
||||
static void _buildStrip(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i) {
|
||||
if(!next) {
|
||||
/* If the mode was triangle strip, then the last vertex is the last vertex */
|
||||
vertex->flags = PVR_CMD_VERTEX_EOL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline PolyBuildFunc _calcBuildFunc(const GLenum type) {
|
||||
switch(type) {
|
||||
case GL_TRIANGLES:
|
||||
return &_buildTriangle;
|
||||
break;
|
||||
case GL_QUADS:
|
||||
return &_buildQuad;
|
||||
break;
|
||||
case GL_TRIANGLE_FAN:
|
||||
case GL_POLYGON:
|
||||
return &_buildTriangleFan;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return &_buildStrip;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const GLubyte* vptr;
|
||||
const GLuint vstride;
|
||||
const GLubyte* cptr;
|
||||
const GLuint cstride;
|
||||
const GLubyte* uvptr;
|
||||
const GLuint uvstride;
|
||||
const GLubyte* stptr;
|
||||
const GLuint ststride;
|
||||
const GLubyte* nptr;
|
||||
const GLuint nstride;
|
||||
} GenerateParams;
|
||||
|
||||
static void generate(AlignedVector* output, const GLenum mode, const GLsizei first, const GLsizei count,
|
||||
const GLubyte* indices, const GLenum type, const GenerateParams* pointers) {
|
||||
/* Read from the client buffers and generate an array of ClipVertices */
|
||||
|
||||
const GLsizei max = first + count;
|
||||
const GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
|
||||
|
||||
/* Make sure we have room for the output */
|
||||
ClipVertex* vertex = aligned_vector_resize(output, spaceNeeded);
|
||||
|
||||
const FloatParseFunc vertexFunc = _calcVertexParseFunc();
|
||||
const FloatParseFunc diffuseFunc = _calcDiffuseParseFunc();
|
||||
const FloatParseFunc uvFunc = _calcUVParseFunc();
|
||||
const FloatParseFunc stFunc = _calcSTParseFunc();
|
||||
const FloatParseFunc normalFunc = _calcNormalParseFunc();
|
||||
|
||||
const PolyBuildFunc buildFunc = _calcBuildFunc(mode);
|
||||
const IndexParseFunc indexFunc = _calcParseIndexFunc(type);
|
||||
|
||||
const GLsizei type_byte_size = byte_size(type);
|
||||
|
||||
ClipVertex* previous = NULL;
|
||||
ClipVertex* firstV = vertex;
|
||||
ClipVertex* next = NULL;
|
||||
|
||||
GLsizei i;
|
||||
|
||||
for(i = first; i < max; ++i, ++vertex) {
|
||||
vertex->flags = PVR_CMD_VERTEX;
|
||||
|
||||
const GLuint idx = (indices) ?
|
||||
indexFunc(&indices[type_byte_size * i]) : i;
|
||||
|
||||
const GLubyte* vin = pointers->vptr + (idx * pointers->vstride);
|
||||
const GLubyte* din = pointers->cptr + (idx * pointers->cstride);
|
||||
const GLubyte* uin = pointers->uvptr + (idx * pointers->uvstride);
|
||||
const GLubyte* sin = pointers->stptr + (idx * pointers->ststride);
|
||||
const GLubyte* nin = pointers->nptr + (idx * pointers->nstride);
|
||||
|
||||
vertexFunc(vertex->xyz, vin);
|
||||
diffuseFunc(vertex->diffuse, din);
|
||||
uvFunc(vertex->uv, uin);
|
||||
stFunc(vertex->st, sin);
|
||||
normalFunc(vertex->nxyz, nin);
|
||||
}
|
||||
|
||||
vertex = firstV;
|
||||
|
||||
for(i = 0; i < count; ++i, ++vertex) {
|
||||
next = (i < count - 1) ? vertex + 1 : NULL;
|
||||
previous = (i > 0) ? vertex - 1 : NULL;
|
||||
buildFunc(firstV, previous, vertex, next, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -334,7 +537,7 @@ static void clip(AlignedVector* vertices) {
|
|||
}
|
||||
|
||||
/* Make sure we allocate roughly enough space */
|
||||
aligned_vector_reserve(CLIP_BUFFER, vertices->size);
|
||||
aligned_vector_reserve(CLIP_BUFFER, vertices->size * 1.5);
|
||||
|
||||
/* Start from empty */
|
||||
aligned_vector_resize(CLIP_BUFFER, 0);
|
||||
|
@ -537,29 +740,38 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ
|
|||
aligned_vector_resize(buffer, 0);
|
||||
}
|
||||
|
||||
GLubyte vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
|
||||
const GLuint vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
|
||||
const GLubyte* vptr = VERTEX_POINTER.ptr;
|
||||
|
||||
GLubyte cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
|
||||
const GLuint cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
|
||||
const GLubyte* cptr = DIFFUSE_POINTER.ptr;
|
||||
|
||||
GLubyte uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
|
||||
const GLuint uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
|
||||
const GLubyte* uvptr = UV_POINTER.ptr;
|
||||
|
||||
GLubyte ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
|
||||
const GLuint ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
|
||||
const GLubyte* stptr = ST_POINTER.ptr;
|
||||
|
||||
GLubyte nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
|
||||
const GLuint nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
|
||||
const GLubyte* nptr = NORMAL_POINTER.ptr;
|
||||
|
||||
generate(
|
||||
buffer, mode, first, count, (GLubyte*) indices, type,
|
||||
vptr, vstride, cptr, cstride,
|
||||
uvptr, uvstride, stptr, ststride,
|
||||
nptr, nstride
|
||||
);
|
||||
GenerateParams params = {
|
||||
.vptr = vptr,
|
||||
.vstride = vstride,
|
||||
.cptr = cptr,
|
||||
.cstride = cstride,
|
||||
.uvptr = uvptr,
|
||||
.uvstride = uvstride,
|
||||
.stptr = stptr,
|
||||
.ststride = ststride,
|
||||
.nptr = nptr,
|
||||
.nstride = nstride
|
||||
};
|
||||
|
||||
generate(buffer, mode, first, count, (GLubyte*) indices, type, ¶ms);
|
||||
|
||||
light(buffer);
|
||||
|
||||
transform(buffer);
|
||||
|
||||
if(isClippingEnabled()) {
|
||||
|
@ -567,6 +779,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ
|
|||
}
|
||||
|
||||
divide(buffer);
|
||||
|
||||
push(buffer, activePolyList(), 0);
|
||||
|
||||
/*
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "../containers/aligned_vector.h"
|
||||
#include "private.h"
|
||||
#include "profiler.h"
|
||||
|
||||
#define TA_SQ_ADDR (unsigned int *)(void *) \
|
||||
(0xe0000000 | (((unsigned long)0x10000000) & 0x03ffffe0))
|
||||
|
|
123
GL/profiler.c
Normal file
123
GL/profiler.c
Normal file
|
@ -0,0 +1,123 @@
|
|||
#include <kos.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "profiler.h"
|
||||
#include "../containers/aligned_vector.h"
|
||||
|
||||
#define MAX_PATH 256
|
||||
|
||||
typedef struct {
|
||||
char name[MAX_PATH];
|
||||
|
||||
uint64_t total_time_us;
|
||||
uint64_t total_calls;
|
||||
} ProfilerResult;
|
||||
|
||||
typedef struct {
|
||||
AlignedVector stack;
|
||||
AlignedVector results;
|
||||
uint64_t start_time_in_us;
|
||||
} RootProfiler;
|
||||
|
||||
|
||||
static RootProfiler* root = NULL;
|
||||
|
||||
static ProfilerResult* profiler_get_or_create_result(const char* name) {
|
||||
uint16_t i = 0;
|
||||
for(; i < root->results.size; ++i) {
|
||||
ProfilerResult* result = aligned_vector_at(&root->results, i);
|
||||
if(strcmp(result->name, name) == 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
ProfilerResult newResult;
|
||||
strcpy(newResult.name, name);
|
||||
newResult.total_calls = 0;
|
||||
newResult.total_time_us = 0;
|
||||
aligned_vector_push_back(&root->results, &newResult, 1);
|
||||
return aligned_vector_back(&root->results);
|
||||
}
|
||||
|
||||
static uint64_t current_time_in_us() {
|
||||
return timer_us_gettime64();
|
||||
}
|
||||
|
||||
static void profiler_generate_path(const char* suffix, char* path) {
|
||||
uint16_t i = 0;
|
||||
for(; i < root->stack.size; ++i) {
|
||||
Profiler* prof = aligned_vector_at(&root->stack, i);
|
||||
strcat(path, prof->name);
|
||||
|
||||
if(i != root->stack.size - 1) {
|
||||
strcat(path, ".");
|
||||
}
|
||||
}
|
||||
|
||||
if(strlen(suffix)) {
|
||||
strcat(path, ":");
|
||||
strcat(path, suffix);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Profiler* profiler_push(const char* name) {
|
||||
if(!root) {
|
||||
root = (RootProfiler*) malloc(sizeof(RootProfiler));
|
||||
aligned_vector_init(
|
||||
&root->stack,
|
||||
sizeof(Profiler)
|
||||
);
|
||||
|
||||
aligned_vector_init(
|
||||
&root->results,
|
||||
sizeof(ProfilerResult)
|
||||
);
|
||||
|
||||
aligned_vector_reserve(&root->stack, 32);
|
||||
aligned_vector_reserve(&root->results, 64);
|
||||
}
|
||||
|
||||
Profiler profiler;
|
||||
strncpy(profiler.name, name, 64);
|
||||
profiler.start_time_in_us = current_time_in_us();
|
||||
|
||||
aligned_vector_push_back(&root->stack, &profiler, 1);
|
||||
return aligned_vector_back(&root->stack);
|
||||
}
|
||||
|
||||
void profiler_checkpoint(const char* name) {
|
||||
Profiler* prof = aligned_vector_back(&root->stack);
|
||||
|
||||
char path[MAX_PATH];
|
||||
path[0] = '\0';
|
||||
|
||||
profiler_generate_path(name, path);
|
||||
|
||||
uint64_t now = current_time_in_us();
|
||||
uint64_t diff = now - prof->start_time_in_us;
|
||||
prof->start_time_in_us = now;
|
||||
|
||||
ProfilerResult* result = profiler_get_or_create_result(path);
|
||||
result->total_calls++;
|
||||
result->total_time_us += diff;
|
||||
}
|
||||
|
||||
void profiler_pop() {
|
||||
aligned_vector_resize(&root->stack, root->stack.size - 1);
|
||||
}
|
||||
|
||||
void profiler_print_stats() {
|
||||
fprintf(stderr, "%-60s%-20s%-20s%-20s\n", "Path", "Average", "Total", "Calls");
|
||||
|
||||
uint16_t i = 0;
|
||||
for(; i < root->results.size; ++i) {
|
||||
ProfilerResult* result = aligned_vector_at(&root->results, i);
|
||||
float ms = ((float) result->total_time_us) / 1000.0f;
|
||||
float avg = ms / (float) result->total_calls;
|
||||
|
||||
fprintf(stderr, "%-60s%-20f%-20f%d\n", result->name, avg, ms, result->total_calls);
|
||||
}
|
||||
}
|
15
GL/profiler.h
Normal file
15
GL/profiler.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct {
|
||||
char name[64];
|
||||
uint64_t start_time_in_us;
|
||||
} Profiler;
|
||||
|
||||
|
||||
Profiler* profiler_push(const char* name);
|
||||
void profiler_checkpoint(const char* name);
|
||||
void profiler_pop();
|
||||
|
||||
void profiler_print_stats();
|
2
Makefile
2
Makefile
|
@ -7,7 +7,7 @@
|
|||
|
||||
TARGET = libGLdc.a
|
||||
OBJS = GL/draw.o GL/flush.o GL/framebuffer.o GL/immediate.o GL/lighting.o GL/state.o GL/texture.o GL/glu.o
|
||||
OBJS += GL/matrix.o GL/fog.o GL/error.o GL/clip.o containers/stack.o containers/named_array.o containers/aligned_vector.o
|
||||
OBJS += GL/matrix.o GL/fog.o GL/error.o GL/clip.o containers/stack.o containers/named_array.o containers/aligned_vector.o GL/profiler.o
|
||||
|
||||
SUBDIRS =
|
||||
|
||||
|
|
|
@ -50,11 +50,13 @@ void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned
|
|||
memcpy(dest, objs, vector->element_size * count);
|
||||
}
|
||||
|
||||
void aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
|
||||
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
|
||||
unsigned int previousCount = vector->size;
|
||||
|
||||
/* Don't change memory when resizing downwards, just change the size */
|
||||
if(element_count <= vector->size) {
|
||||
vector->size = element_count;
|
||||
return;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(vector->capacity < element_count) {
|
||||
|
@ -64,6 +66,12 @@ void aligned_vector_resize(AlignedVector* vector, const unsigned int element_cou
|
|||
}
|
||||
|
||||
vector->size = element_count;
|
||||
|
||||
if(previousCount < vector->size) {
|
||||
return aligned_vector_at(vector, previousCount);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
|
||||
|
|
|
@ -17,7 +17,7 @@ typedef struct {
|
|||
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
|
||||
void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
||||
void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
|
||||
void aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
||||
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
||||
void* aligned_vector_at(const AlignedVector* vector, const unsigned int index);
|
||||
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
|
||||
void aligned_vector_clear(AlignedVector* vector);
|
||||
|
|
Loading…
Reference in New Issue
Block a user