Switch to using 4-byte colours internally rather than floats

These are stored in BGRA format so they can be directly cast to a uint32_t to get the
correct format for the argb param in pvr_vertex_t (yay endianess)

This results in a performance improvement as it reduces the data requirements.
This commit is contained in:
Luke Benstead 2018-08-20 09:28:30 +01:00
parent 26bd8df37c
commit da97483eee
3 changed files with 75 additions and 42 deletions

View File

@ -62,6 +62,13 @@ static inline void interpolateVec4(const float* v1, const float* v2, const float
interpolateFloat(v1[3], v2[3], t, &out[3]);
}
static inline void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
out[0] = v1[0] + (uint32_t) (((float) (v2[0] - v1[0])) * t);
out[1] = v1[1] + (uint32_t) (((float) (v2[1] - v1[1])) * t);
out[2] = v1[2] + (uint32_t) (((float) (v2[2] - v1[2])) * t);
out[3] = v1[3] + (uint32_t) (((float) (v2[3] - v1[3])) * t);
}
const uint32_t VERTEX_CMD_EOL = 0xf0000000;
const uint32_t VERTEX_CMD = 0xe0000000;
@ -145,8 +152,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
interpolateVec2(v1->st, v2->st, t1, output[1].st);
interpolateVec2(v1->st, v3->st, t2, output[2].st);
interpolateVec4(v1->diffuse, v2->diffuse, t1, output[1].diffuse);
interpolateVec4(v1->diffuse, v3->diffuse, t2, output[2].diffuse);
interpolateColour(v1->bgra, v2->bgra, t1, output[1].bgra);
interpolateColour(v1->bgra, v3->bgra, t2, output[2].bgra);
output[0].flags = VERTEX_CMD;
output[1].flags = VERTEX_CMD;
@ -179,8 +186,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
interpolateVec2(v2->st, v1->st, t1, output[0].st);
interpolateVec2(v2->st, v3->st, t2, output[2].st);
interpolateVec4(v2->diffuse, v1->diffuse, t1, output[0].diffuse);
interpolateVec4(v2->diffuse, v3->diffuse, t2, output[2].diffuse);
interpolateColour(v2->bgra, v1->bgra, t1, output[0].bgra);
interpolateColour(v2->bgra, v3->bgra, t2, output[2].bgra);
output[0].flags = VERTEX_CMD;
output[1].flags = VERTEX_CMD;
@ -213,8 +220,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
interpolateVec2(v3->st, v1->st, t1, output[0].st);
interpolateVec2(v3->st, v2->st, t2, output[1].st);
interpolateVec4(v3->diffuse, v1->diffuse, t1, output[0].diffuse);
interpolateVec4(v3->diffuse, v2->diffuse, t2, output[1].diffuse);
interpolateColour(v3->bgra, v1->bgra, t1, output[0].bgra);
interpolateColour(v3->bgra, v2->bgra, t2, output[1].bgra);
output[0].flags = VERTEX_CMD;
output[1].flags = VERTEX_CMD;
@ -248,8 +255,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
interpolateVec2(v2->st, v3->st, t1, output[2].st);
interpolateVec2(v1->st, v3->st, t2, output[3].st);
interpolateVec4(v2->diffuse, v3->diffuse, t1, output[2].diffuse);
interpolateVec4(v1->diffuse, v3->diffuse, t2, output[3].diffuse);
interpolateColour(v2->bgra, v3->bgra, t1, output[2].bgra);
interpolateColour(v1->bgra, v3->bgra, t2, output[3].bgra);
output[0].flags = VERTEX_CMD;
output[1].flags = VERTEX_CMD;
@ -283,8 +290,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
interpolateVec2(v1->st, v2->st, t1, output[0].st);
interpolateVec2(v1->st, v3->st, t2, output[2].st);
interpolateVec4(v1->diffuse, v2->diffuse, t1, output[0].diffuse);
interpolateVec4(v1->diffuse, v3->diffuse, t2, output[2].diffuse);
interpolateColour(v1->bgra, v2->bgra, t1, output[0].bgra);
interpolateColour(v1->bgra, v3->bgra, t2, output[2].bgra);
output[0].flags = VERTEX_CMD;
output[1].flags = VERTEX_CMD;
@ -318,8 +325,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) {
interpolateVec2(v1->st, v2->st, t1, output[1].st);
interpolateVec2(v3->st, v2->st, t2, output[3].st);
interpolateVec4(v1->diffuse, v2->diffuse, t1, output[1].diffuse);
interpolateVec4(v3->diffuse, v2->diffuse, t2, output[3].diffuse);
interpolateColour(v1->bgra, v2->bgra, t1, output[1].bgra);
interpolateColour(v3->bgra, v2->bgra, t2, output[3].bgra);
output[0].flags = VERTEX_CMD;
output[1].flags = VERTEX_CMD;

View File

@ -18,15 +18,20 @@ typedef enum {
} ClipResult;
#define A8IDX 3
#define R8IDX 2
#define G8IDX 1
#define B8IDX 0
typedef struct {
uint32_t flags;
float xyz[3];
float uv[2];
uint8_t bgra[4];
float nxyz[3]; /* Normal */
float w;
float diffuse[4]; /* Colour in floating point */
float st[2];
} ClipVertex;

View File

@ -75,6 +75,7 @@ static inline GLuint byte_size(GLenum type) {
}
typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in);
typedef void (*ByteParseFunc)(GLubyte* out, const GLubyte* in);
typedef void (*PolyBuildFunc)(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i);
static inline void _parseVec3FromShort3(GLfloat* out, const GLubyte* in) {
@ -134,12 +135,27 @@ static inline void _parseVec4FromFloat4(GLfloat* out, const GLubyte* in) {
out[3] = ptr[3];
}
static inline void _parseColourFromUByte4(GLfloat* out, const GLubyte* in) {
const float ONE_OVER_255 = 1.0f / 255.0f;
out[0] = ((GLfloat) in[0]) * ONE_OVER_255;
out[1] = ((GLfloat) in[1]) * ONE_OVER_255;
out[2] = ((GLfloat) in[2]) * ONE_OVER_255;
out[3] = ((GLfloat) in[3]) * ONE_OVER_255;
static inline void _parseColourFromUByte4(GLubyte* out, const GLubyte* in) {
out[R8IDX] = in[0];
out[G8IDX] = in[1];
out[B8IDX] = in[2];
out[A8IDX] = in[3];
}
static inline void _parseColourFromFloat4(GLubyte* out, const GLubyte* in) {
GLfloat* fin = (GLfloat*) in;
out[R8IDX] = (GLubyte) (fin[0] * 255.0f);
out[G8IDX] = (GLubyte) (fin[1] * 255.0f);
out[B8IDX] = (GLubyte) (fin[2] * 255.0f);
out[A8IDX] = (GLubyte) (fin[3] * 255.0f);
}
static inline void _parseColourFromFloat3(GLubyte* out, const GLubyte* in) {
out[A8IDX] = 255;
out[R8IDX] = (GLubyte) ((GLfloat) in[0]) * 255.0f;
out[G8IDX] = (GLubyte) ((GLfloat) in[1]) * 255.0f;
out[B8IDX] = (GLubyte) ((GLfloat) in[2]) * 255.0f;
}
static inline void _constVec2Zero(GLfloat* out, const GLubyte* in) {
@ -160,6 +176,14 @@ static inline void _constVec4One(GLfloat* out, const GLubyte* in) {
out[3] = 1.0f;
}
static inline void _constColourOne(GLubyte* out, const GLubyte* in) {
out[0] = 255;
out[1] = 255;
out[2] = 255;
out[3] = 255;
}
typedef GLuint (*IndexParseFunc)(const GLubyte* in);
static inline GLuint _parseUByteIndex(const GLubyte* in) {
@ -270,9 +294,9 @@ static inline FloatParseFunc _calcVertexParseFunc() {
return NULL;
}
static inline FloatParseFunc _calcDiffuseParseFunc() {
static inline ByteParseFunc _calcDiffuseParseFunc() {
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) {
return &_constVec4One;
return &_constColourOne;
}
switch(DIFFUSE_POINTER.type) {
@ -282,23 +306,18 @@ static inline FloatParseFunc _calcDiffuseParseFunc() {
return &_parseColourFromUByte4;
}
} break;
case GL_INT: {
if(DIFFUSE_POINTER.size == 3) {
return &_parseVec3FromInt3;
}
} break;
case GL_FLOAT: {
if(DIFFUSE_POINTER.size == 3) {
return &_parseVec4FromFloat3;
return &_parseColourFromFloat3;
} else if(DIFFUSE_POINTER.size == 4) {
return &_parseVec4FromFloat4;
return &_parseColourFromFloat4;
}
} break;
default:
break;
}
return &_constVec4One;
return &_constColourOne;
}
static inline FloatParseFunc _calcUVParseFunc() {
@ -446,7 +465,7 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir
ClipVertex* vertex = aligned_vector_resize(output, spaceNeeded);
const FloatParseFunc vertexFunc = _calcVertexParseFunc();
const FloatParseFunc diffuseFunc = _calcDiffuseParseFunc();
const ByteParseFunc diffuseFunc = _calcDiffuseParseFunc();
const FloatParseFunc uvFunc = _calcUVParseFunc();
const FloatParseFunc stFunc = _calcSTParseFunc();
const FloatParseFunc normalFunc = _calcNormalParseFunc();
@ -492,7 +511,7 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir
}
vertexFunc(target->xyz, vptr);
diffuseFunc(target->diffuse, cptr);
diffuseFunc(target->bgra, cptr);
vptr += vstride;
cptr += cstride;
@ -542,7 +561,7 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir
indexFunc(&indices[type_byte_size * i]) : i;
vertexFunc(target->xyz, VERTEX_POINTER.ptr + (idx * vstride));
diffuseFunc(target->diffuse, DIFFUSE_POINTER.ptr + (idx * cstride));
diffuseFunc(target->bgra, DIFFUSE_POINTER.ptr + (idx * cstride));
if(doTexture) {
uvFunc(target->uv, UV_POINTER.ptr + (idx * uvstride));
@ -687,23 +706,25 @@ static void light(AlignedVector* vertices) {
for(i = 0; i < vertices->size; ++i, ++vertex, ++ES) {
/* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled
* then the lighting calculation should possibly take it into account */
vertex->diffuse[0] = 0.0f;
vertex->diffuse[1] = 0.0f;
vertex->diffuse[2] = 0.0f;
vertex->diffuse[3] = 0.0f;
GLfloat total [] = {0.0f, 0.0f, 0.0f, 0.0f};
GLfloat to_add [] = {0.0f, 0.0f, 0.0f, 0.0f};
GLubyte j;
for(j = 0; j < MAX_LIGHTS; ++j) {
if(isLightEnabled(j)) {
calculateLightingContribution(j, ES->xyz, ES->n, to_add);
vertex->diffuse[0] += to_add[0];
vertex->diffuse[1] += to_add[1];
vertex->diffuse[2] += to_add[2];
vertex->diffuse[3] += to_add[3];
total[0] += to_add[0];
total[1] += to_add[1];
total[2] += to_add[2];
total[3] += to_add[3];
}
}
vertex->bgra[A8IDX] = (GLubyte) (255.0f * total[3]);
vertex->bgra[R8IDX] = (GLubyte) (255.0f * total[0]);
vertex->bgra[G8IDX] = (GLubyte) (255.0f * total[1]);
vertex->bgra[B8IDX] = (GLubyte) (255.0f * total[2]);
}
}
@ -803,7 +824,7 @@ static void push(const AlignedVector* vertices, PolyList* activePolyList, GLshor
vout->z = vin->xyz[2];
vout->u = vin->uv[0];
vout->v = vin->uv[1];
vout->argb = PVR_PACK_COLOR(vin->diffuse[3], vin->diffuse[0], vin->diffuse[1], vin->diffuse[2]);
vout->argb = *((uint32_t*) vin->bgra);
vout->oargb = 0;
vin++;