diff --git a/GL/clip.c b/GL/clip.c index 95350fd..a4c55fa 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -50,24 +50,12 @@ static void interpolateVec3(const float* v1, const float* v2, const float t, flo out[2] = v1[2] + (v2[2] - v1[2]) * t; } -static void interpolateColour(const uint32_t* c1, const uint32_t* c2, const float t, uint32_t* out) { - float r1 = (*c1 >> 16) & 0xFF; - float r2 = (*c2 >> 16) & 0xFF; - uint8_t r = (r1 + (r2 - r1) * t); - - r1 = (*c1 >> 24) & 0xFF; - r2 = (*c2 >> 24) & 0xFF; - uint8_t a = (r1 + (r2 - r1) * t); - - r1 = (*c1 >> 8) & 0xFF; - r2 = (*c2 >> 8) & 0xFF; - uint8_t g = (r1 + (r2 - r1) * t); - - r1 = (*c1 >> 0) & 0xFF; - r2 = (*c2 >> 0) & 0xFF; - uint8_t b = (r1 + (r2 - r1) * t); - - *out = (a << 24 | r << 16 | g << 8 | b); +static void interpolateVec4(const float* v1, const float* v2, const float t, float* out) { + /* FIXME: SH4 has an asm instruction for this */ + out[0] = v1[0] + (v2[0] - v1[0]) * t; + out[1] = v1[1] + (v2[1] - v1[1]) * t; + out[2] = v1[2] + (v2[2] - v1[2]) * t; + out[3] = v1[3] + (v2[3] - v1[3]) * t; } const uint32_t VERTEX_CMD_EOL = 0xf0000000; @@ -147,8 +135,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { interpolateVec2(v1->uv, v2->uv, t1, output[1].uv); interpolateVec2(v1->uv, v3->uv, t2, output[2].uv); - interpolateColour((uint32_t*) &v1->argb, (uint32_t*) &v2->argb, t1, (uint32_t*) &output[1].argb); - interpolateColour((uint32_t*) &v1->argb, (uint32_t*) &v3->argb, t2, (uint32_t*) &output[2].argb); + interpolateVec4(v1->diffuse, v2->diffuse, t1, output[1].diffuse); + interpolateVec4(v1->diffuse, v3->diffuse, t2, output[2].diffuse); output[0].flags = VERTEX_CMD; output[1].flags = VERTEX_CMD; @@ -178,8 +166,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { interpolateVec2(v2->uv, v1->uv, t1, output[0].uv); interpolateVec2(v2->uv, v3->uv, t2, output[2].uv); - interpolateColour((uint32_t*) &v2->argb, (uint32_t*) &v1->argb, t1, (uint32_t*) &output[0].argb); - interpolateColour((uint32_t*) &v2->argb, (uint32_t*) &v3->argb, t2, (uint32_t*) &output[2].argb); + interpolateVec4(v2->diffuse, v1->diffuse, t1, output[0].diffuse); + interpolateVec4(v2->diffuse, v3->diffuse, t2, output[2].diffuse); output[0].flags = VERTEX_CMD; output[1].flags = VERTEX_CMD; @@ -209,8 +197,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { interpolateVec2(v3->uv, v1->uv, t1, output[0].uv); interpolateVec2(v3->uv, v2->uv, t2, output[1].uv); - interpolateColour((uint32_t*) &v3->argb, (uint32_t*) &v1->argb, t1, (uint32_t*) &output[0].argb); - interpolateColour((uint32_t*) &v3->argb, (uint32_t*) &v2->argb, t2, (uint32_t*) &output[1].argb); + interpolateVec4(v3->diffuse, v1->diffuse, t1, output[0].diffuse); + interpolateVec4(v3->diffuse, v2->diffuse, t2, output[1].diffuse); output[0].flags = VERTEX_CMD; output[1].flags = VERTEX_CMD; @@ -241,8 +229,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { interpolateVec2(v2->uv, v3->uv, t1, output[2].uv); interpolateVec2(v1->uv, v3->uv, t2, output[3].uv); - interpolateColour((uint32_t*) &v2->argb, (uint32_t*) &v3->argb, t1, (uint32_t*) &output[2].argb); - interpolateColour((uint32_t*) &v1->argb, (uint32_t*) &v3->argb, t2, (uint32_t*) &output[3].argb); + interpolateVec4(v2->diffuse, v3->diffuse, t1, output[2].diffuse); + interpolateVec4(v1->diffuse, v3->diffuse, t2, output[3].diffuse); output[0].flags = VERTEX_CMD; output[1].flags = VERTEX_CMD; @@ -273,8 +261,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { interpolateVec2(v1->uv, v2->uv, t1, output[0].uv); interpolateVec2(v1->uv, v3->uv, t2, output[2].uv); - interpolateColour((uint32_t*) &v1->argb, (uint32_t*) &v2->argb, t1, (uint32_t*) &output[0].argb); - interpolateColour((uint32_t*) &v1->argb, (uint32_t*) &v3->argb, t2, (uint32_t*) &output[2].argb); + interpolateVec4(v1->diffuse, v2->diffuse, t1, output[0].diffuse); + interpolateVec4(v1->diffuse, v3->diffuse, t2, output[2].diffuse); output[0].flags = VERTEX_CMD; output[1].flags = VERTEX_CMD; @@ -305,8 +293,8 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { interpolateVec2(v1->uv, v2->uv, t1, output[1].uv); interpolateVec2(v3->uv, v2->uv, t2, output[3].uv); - interpolateColour((uint32_t*) &v1->argb, (uint32_t*) &v2->argb, t1, (uint32_t*) &output[1].argb); - interpolateColour((uint32_t*) &v3->argb, (uint32_t*) &v2->argb, t2, (uint32_t*) &output[3].argb); + interpolateVec4(v1->diffuse, v2->diffuse, t1, output[1].diffuse); + interpolateVec4(v3->diffuse, v2->diffuse, t2, output[3].diffuse); output[0].flags = VERTEX_CMD; output[1].flags = VERTEX_CMD; diff --git a/GL/clip.h b/GL/clip.h index e952a9d..d2f5c63 100644 --- a/GL/clip.h +++ b/GL/clip.h @@ -17,26 +17,26 @@ typedef enum { CLIP_RESULT_BACK_TO_FRONT } ClipResult; -typedef struct { - uint8_t a; - uint8_t r; - uint8_t g; - uint8_t b; -} ClipColour; /* Note: This structure is the almost the same format as pvr_vertex_t aside from the offet * (oargb) which is replaced by the floating point w value. This is so that we can - * simply zero it and memcpy the lot into the output */ + * simply zero it and memcpy the lot into the output. This struct is 96 bytes to keep + * 32 byte alignment */ typedef struct { uint32_t flags; float xyz[3]; float uv[2]; - ClipColour argb; + uint32_t argb; + float nxyz[3]; float w; - float xyzES[3]; /* Coordinate in eye space */ float nES[3]; /* Normal in eye space */ + + float diffuse[4]; /* Colour in floating point */ + + /* Keep things 32 byte aligned */ + unsigned char padding[8]; } ClipVertex; void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); diff --git a/GL/draw.c b/GL/draw.c index 1a7cf6c..bba7e23 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -73,11 +73,16 @@ static GLuint byte_size(GLenum type) { } } -static void _parseColour(uint32* out, const GLubyte* in, GLint size, GLenum type) { +static void _parseColour(float* out, const GLubyte* in, GLint size, GLenum type) { + const float ONE_OVER_255 = 1.0f / 255.0f; + switch(type) { case GL_BYTE: { case GL_UNSIGNED_BYTE: - *out = in[3] << 24 | in[0] << 16 | in[1] << 8 | in[0]; + out[0] = ((GLfloat)in[0]) * ONE_OVER_255; + out[1] = ((GLfloat)in[1]) * ONE_OVER_255; + out[2] = ((GLfloat)in[2]) * ONE_OVER_255; + out[3] = ((GLfloat)in[3]) * ONE_OVER_255; } break; case GL_SHORT: case GL_UNSIGNED_SHORT: @@ -90,8 +95,10 @@ static void _parseColour(uint32* out, const GLubyte* in, GLint size, GLenum type case GL_FLOAT: case GL_DOUBLE: default: { - const GLfloat* src = (GLfloat*) in; - *out = PVR_PACK_COLOR(src[3], src[0], src[1], src[2]); + out[0] = ((GLfloat*) in)[0]; + out[1] = ((GLfloat*) in)[1]; + out[2] = ((GLfloat*) in)[2]; + out[3] = ((GLfloat*) in)[3]; } break; } } @@ -214,10 +221,10 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir _parseFloats(vertex->xyz, vptr + (idx * vstride), VERTEX_POINTER.size, VERTEX_POINTER.type); if(ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) { - _parseColour((uint32_t*) &vertex->argb, cptr + (idx * cstride), DIFFUSE_POINTER.size, DIFFUSE_POINTER.type); + _parseColour(vertex->diffuse, cptr + (idx * cstride), DIFFUSE_POINTER.size, DIFFUSE_POINTER.type); } else { /* Default to white if colours are disabled */ - *((uint32_t*) &vertex->argb) = ~0; + vertex->diffuse[0] = vertex->diffuse[1] = vertex->diffuse[2] = vertex->diffuse[3] = 1.0f; } if(ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) { @@ -383,7 +390,8 @@ static void light(AlignedVector* vertices) { for(i = 0; i < vertices->size; ++i, ++vertex) { /* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled * then the lighting calculation should possibly take it into account */ - GLfloat contribution [] = {0.0f, 0.0f, 0.0f, 0.0f}; + memset(vertex->diffuse, 0, sizeof(float) * 4); + GLfloat to_add [] = {0.0f, 0.0f, 0.0f, 0.0f}; GLubyte j; @@ -391,15 +399,12 @@ static void light(AlignedVector* vertices) { if(isLightEnabled(j)) { calculateLightingContribution(j, vertex->xyzES, vertex->nES, to_add); - contribution[0] += to_add[0]; - contribution[1] += to_add[1]; - contribution[2] += to_add[2]; - contribution[3] += to_add[3]; + vertex->diffuse[0] += to_add[0]; + vertex->diffuse[1] += to_add[1]; + vertex->diffuse[2] += to_add[2]; + vertex->diffuse[3] += to_add[3]; } } - - uint32_t final = PVR_PACK_COLOR(contribution[3], contribution[0], contribution[1], contribution[2]); - vertex->argb = *((ClipColour*) &final); } } @@ -436,14 +441,20 @@ static void push(const AlignedVector* vertices, PolyList* activePolyList) { pvr_poly_compile(hdr, &cxt); GLsizei i; + ClipVertex* vin = aligned_vector_at(vertices, 0); + for(i = 0; i < vertices->size; ++i, dst++) { pvr_vertex_t* vout = (pvr_vertex_t*) dst; - - /* The first part of ClipVertex is the same as the first part of pvr_vertex_t */ - memcpy(vout, aligned_vector_at(vertices, i), sizeof(pvr_vertex_t)); - - /* Except for this bit */ + vout->flags = vin->flags; + vout->x = vin->xyz[0]; + vout->y = vin->xyz[1]; + vout->z = vin->xyz[2]; + vout->u = vin->uv[0]; + vout->v = vin->uv[1]; + vout->argb = PVR_PACK_COLOR(vin->diffuse[3], vin->diffuse[0], vin->diffuse[1], vin->diffuse[2]); vout->oargb = 0; + + vin++; } } @@ -459,6 +470,9 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ if(!buffer) { buffer = (AlignedVector*) malloc(sizeof(AlignedVector)); aligned_vector_init(buffer, sizeof(ClipVertex)); + + /* Reserve 64k up-front */ + aligned_vector_reserve(buffer, 64 * 1024); } else { /* Else, resize to zero (this will retain the allocated memory) */ aligned_vector_resize(buffer, 0);