diff --git a/GL/draw.c b/GL/draw.c index d3b00fe..b1f98ef 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -19,7 +19,7 @@ static AttribPointer DIFFUSE_POINTER; static GLuint ENABLED_VERTEX_ATTRIBUTES = 0; static GLubyte ACTIVE_CLIENT_TEXTURE = 0; - +static GLboolean FAST_PATH_ENABLED = GL_FALSE; #define ITERATE(count) \ GLuint i = count; \ @@ -55,6 +55,44 @@ void _glInitAttributePointers() { NORMAL_POINTER.size = 3; } +static GLboolean _glIsVertexDataFastPathCompatible() { + /* + * We provide a "fast path" if vertex data is provided in + * exactly the right format that matches what the PVR can handle. + * This function returns true if all the requirements are met. + */ + + /* + * At least these attributes need to be enabled, because we're not going to do any checking + * in the loop + */ + if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) != VERTEX_ENABLED_FLAG) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) return GL_FALSE; + + // All 3 attribute types must have a stride of 32 + if(VERTEX_POINTER.stride != 32) return GL_FALSE; + if(UV_POINTER.stride != 32) return GL_FALSE; + if(DIFFUSE_POINTER.stride != 32) return GL_FALSE; + + // UV must follow vertex, diffuse must follow UV + if((UV_POINTER.ptr - VERTEX_POINTER.ptr) != sizeof(GLfloat) * 3) return GL_FALSE; + if((DIFFUSE_POINTER.ptr - UV_POINTER.ptr) != sizeof(GLfloat) * 2) return GL_FALSE; + + if(VERTEX_POINTER.type != GL_FLOAT) return GL_FALSE; + if(VERTEX_POINTER.size != 3) return GL_FALSE; + + if(UV_POINTER.type != GL_FLOAT) return GL_FALSE; + if(UV_POINTER.size != 2) return GL_FALSE; + + if(DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) return GL_FALSE; + + /* BGRA is the required color order */ + if(DIFFUSE_POINTER.size != GL_BGRA) return GL_FALSE; + + return GL_TRUE; +} + static inline GLuint byte_size(GLenum type) { switch(type) { case GL_BYTE: return sizeof(GLbyte); @@ -304,8 +342,8 @@ static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte s output[B8IDX] = input[2]; output[A8IDX] = input[3]; - input = (GLubyte*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); + input += stride; + output += sizeof(Vertex); } } @@ -345,6 +383,30 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s } } +static void _readVertexData4ubRevARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + output[3] = input[3]; + + input += stride; + output += sizeof(Vertex); + } +} + +static void _readVertexData4fRevARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { + ITERATE(count) { + output[0] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); + output[1] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); + output[2] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); + output[3] = (GLubyte) clamp(input[3] * 255.0f, 0, 255); + + input = (float*) (((GLubyte*) input) + stride); + output += sizeof(Vertex); + } +} + static void _fillWithNegZVE(GLuint count, GLfloat* output) { ITERATE(count) { output[0] = output[1] = 0.0f; @@ -394,6 +456,14 @@ static void _readVertexData4uiARGB(const GLuint* input, GLuint count, GLubyte st assert(0 && "Not Implemented"); } +static void _readVertexData4usRevARGB(const GLushort* input, GLuint count, GLubyte stride, GLubyte* output) { + assert(0 && "Not Implemented"); +} + +static void _readVertexData4uiRevARGB(const GLuint* input, GLuint count, GLubyte stride, GLubyte* output) { + assert(0 && "Not Implemented"); +} + GLuint* _glGetEnabledAttributes() { return &ENABLED_VERTEX_ATTRIBUTES; } @@ -712,7 +782,7 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Vert } const GLubyte cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type); - const void* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr + (first * cstride)); + const void* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr) + (first * cstride); if(DIFFUSE_POINTER.size == 3) { switch(DIFFUSE_POINTER.type) { @@ -756,7 +826,28 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Vert default: assert(0 && "Not Implemented"); } - } else { + } else if(DIFFUSE_POINTER.size == GL_BGRA) { + switch(DIFFUSE_POINTER.type) { + case GL_DOUBLE: + case GL_FLOAT: + _readVertexData4fRevARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + _readVertexData4ubRevARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + _readVertexData4usRevARGB(cptr, count, cstride, output[0].bgra); + break; + case GL_INT: + case GL_UNSIGNED_INT: + _readVertexData4uiRevARGB(cptr, count, cstride, output[0].bgra); + break; + default: + assert(0 && "Not Implemented"); + } + }else { assert(0 && "Not Implemented"); } } @@ -766,6 +857,7 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei /* Read from the client buffers and generate an array of ClipVertices */ TRACE(); + static const uint32_t FAST_PATH_BYTE_SIZE = (sizeof(GLfloat) * 3) + (sizeof(GLfloat) * 2) + (sizeof(GLubyte) * 4); const GLsizei istride = byte_size(type); if(!indices) { @@ -773,13 +865,32 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei Vertex* start = _glSubmissionTargetStart(target); - _readPositionData(first, count, start); - profiler_checkpoint("positions"); + if(FAST_PATH_ENABLED) { + /* Copy the pos, uv and color directly in one go */ + const GLfloat* pos = VERTEX_POINTER.ptr; + Vertex* it = start; + ITERATE(count) { + it->flags = PVR_CMD_VERTEX; + memcpy(it->xyz, pos, FAST_PATH_BYTE_SIZE); + it++; + pos += 32 / sizeof(GLfloat); + } + } else { + _readPositionData(first, count, start); + profiler_checkpoint("positions"); - _readDiffuseData(first, count, start); - profiler_checkpoint("diffuse"); + _readDiffuseData(first, count, start); + profiler_checkpoint("diffuse"); - if(doTexture) _readUVData(first, count, start); + if(doTexture) _readUVData(first, count, start); + + Vertex* it = _glSubmissionTargetStart(target); + + ITERATE(count) { + it->flags = PVR_CMD_VERTEX; + ++it; + } + } VertexExtra* ve = aligned_vector_at(target->extras, 0); @@ -787,15 +898,6 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei if(doTexture && doMultitexture) _readSTData(first, count, ve); profiler_checkpoint("others"); - Vertex* it = _glSubmissionTargetStart(target); - - ITERATE(count) { - it->flags = PVR_CMD_VERTEX; - ++it; - } - - profiler_checkpoint("flags"); - // Drawing arrays switch(mode) { case GL_TRIANGLES: @@ -1292,6 +1394,11 @@ void APIENTRY glClientActiveTextureARB(GLenum texture) { ACTIVE_CLIENT_TEXTURE = (texture == GL_TEXTURE1_ARB) ? 1 : 0; } +GLboolean _glRecalcFastPath() { + FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible(); + return FAST_PATH_ENABLED; +} + void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { TRACE(); @@ -1307,6 +1414,8 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, cons tointer->stride = stride; tointer->type = type; tointer->size = size; + + _glRecalcFastPath(); } void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { @@ -1322,12 +1431,14 @@ void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const VERTEX_POINTER.stride = stride; VERTEX_POINTER.type = type; VERTEX_POINTER.size = size; + + _glRecalcFastPath(); } void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { TRACE(); - if(size != 3 && size != 4) { + if(size != 3 && size != 4 && size != GL_BGRA) { _glKosThrowError(GL_INVALID_VALUE, __func__); _glKosPrintError(); return; @@ -1337,6 +1448,8 @@ void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const G DIFFUSE_POINTER.stride = stride; DIFFUSE_POINTER.type = type; DIFFUSE_POINTER.size = size; + + _glRecalcFastPath(); } void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) { @@ -1346,4 +1459,6 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin NORMAL_POINTER.stride = stride; NORMAL_POINTER.type = type; NORMAL_POINTER.size = 3; + + _glRecalcFastPath(); } diff --git a/GL/immediate.c b/GL/immediate.c index d95c887..ae3ad7a 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -7,8 +7,12 @@ * 3. This is entirely untested. */ +#include +#include + #include "../include/gl.h" #include "../include/glext.h" +#include "../include/glkos.h" #include "profiler.h" #include "private.h" @@ -17,18 +21,14 @@ static GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; static AlignedVector VERTICES; -static AlignedVector COLOURS; -static AlignedVector UV_COORDS; static AlignedVector ST_COORDS; static AlignedVector NORMALS; - static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f}; static GLubyte COLOR[4] = {255, 255, 255, 255}; static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; - static AttribPointer VERTEX_ATTRIB; static AttribPointer DIFFUSE_ATTRIB; static AttribPointer UV_ATTRIB; @@ -36,42 +36,38 @@ static AttribPointer ST_ATTRIB; static AttribPointer NORMAL_ATTRIB; void _glInitImmediateMode(GLuint initial_size) { - aligned_vector_init(&VERTICES, sizeof(GLfloat)); - aligned_vector_init(&COLOURS, sizeof(GLubyte)); - aligned_vector_init(&UV_COORDS, sizeof(GLfloat)); + aligned_vector_init(&VERTICES, sizeof(GLVertexKOS)); aligned_vector_init(&ST_COORDS, sizeof(GLfloat)); aligned_vector_init(&NORMALS, sizeof(GLfloat)); aligned_vector_reserve(&VERTICES, initial_size); - aligned_vector_reserve(&COLOURS, initial_size); - aligned_vector_reserve(&UV_COORDS, initial_size); - aligned_vector_reserve(&ST_COORDS, initial_size); - aligned_vector_reserve(&NORMALS, initial_size); + aligned_vector_reserve(&ST_COORDS, initial_size * 2); + aligned_vector_reserve(&NORMALS, initial_size * 3); - VERTEX_ATTRIB.ptr = VERTICES.data; + VERTEX_ATTRIB.ptr = VERTICES.data + sizeof(uint32_t); VERTEX_ATTRIB.size = 3; VERTEX_ATTRIB.type = GL_FLOAT; - VERTEX_ATTRIB.stride = 0; + VERTEX_ATTRIB.stride = 32; - DIFFUSE_ATTRIB.ptr = COLOURS.data; - DIFFUSE_ATTRIB.size = 4; - DIFFUSE_ATTRIB.type = GL_UNSIGNED_BYTE; - DIFFUSE_ATTRIB.stride = 0; - - UV_ATTRIB.ptr = UV_COORDS.data; - UV_ATTRIB.stride = 0; + UV_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 3); + UV_ATTRIB.stride = 32; UV_ATTRIB.type = GL_FLOAT; UV_ATTRIB.size = 2; - ST_ATTRIB.ptr = ST_COORDS.data; - ST_ATTRIB.stride = 0; - ST_ATTRIB.type = GL_FLOAT; - ST_ATTRIB.size = 2; + DIFFUSE_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 5); + DIFFUSE_ATTRIB.size = GL_BGRA; /* Flipped color order */ + DIFFUSE_ATTRIB.type = GL_UNSIGNED_BYTE; + DIFFUSE_ATTRIB.stride = 32; NORMAL_ATTRIB.ptr = NORMALS.data; NORMAL_ATTRIB.stride = 0; NORMAL_ATTRIB.type = GL_FLOAT; NORMAL_ATTRIB.size = 3; + + ST_ATTRIB.ptr = ST_COORDS.data; + ST_ATTRIB.stride = 0; + ST_ATTRIB.type = GL_FLOAT; + ST_ATTRIB.size = 2; } GLubyte _glCheckImmediateModeInactive(const char* func) { @@ -146,17 +142,23 @@ void APIENTRY glColor3fv(const GLfloat* v) { } void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { - aligned_vector_reserve(&VERTICES, VERTICES.size + 3); - aligned_vector_push_back(&VERTICES, &x, 1); - aligned_vector_push_back(&VERTICES, &y, 1); - aligned_vector_push_back(&VERTICES, &z, 1); + GLVertexKOS* vert = aligned_vector_extend(&VERTICES, 1); + GLfloat* st = aligned_vector_extend(&ST_COORDS, 2); + GLfloat* n = aligned_vector_extend(&NORMALS, 3); + vert->x = x; + vert->y = y; + vert->z = z; + vert->u = UV_COORD[0]; + vert->v = UV_COORD[1]; - /* Push back the stashed colour, normal and uv_coordinate */ - aligned_vector_push_back(&COLOURS, COLOR, 4); - aligned_vector_push_back(&UV_COORDS, UV_COORD, 2); - aligned_vector_push_back(&ST_COORDS, ST_COORD, 2); - aligned_vector_push_back(&NORMALS, NORMAL, 3); + vert->bgra[R8IDX] = COLOR[0]; + vert->bgra[G8IDX] = COLOR[1]; + vert->bgra[B8IDX] = COLOR[2]; + vert->bgra[A8IDX] = COLOR[3]; + + memcpy(st, ST_COORD, sizeof(GLfloat) * 2); + memcpy(n, NORMAL, sizeof(GLfloat) * 3); } void APIENTRY glVertex3fv(const GLfloat* v) { @@ -218,11 +220,12 @@ void APIENTRY glEnd() { IMMEDIATE_MODE_ACTIVE = GL_FALSE; /* Resizing could have invalidated these pointers */ - VERTEX_ATTRIB.ptr = VERTICES.data; - DIFFUSE_ATTRIB.ptr = COLOURS.data; - UV_ATTRIB.ptr = UV_COORDS.data; - ST_ATTRIB.ptr = ST_COORDS.data; + VERTEX_ATTRIB.ptr = VERTICES.data + sizeof(uint32_t); + UV_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 3); + DIFFUSE_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 5); + NORMAL_ATTRIB.ptr = NORMALS.data; + ST_ATTRIB.ptr = ST_COORDS.data; GLuint* attrs = _glGetEnabledAttributes(); @@ -250,7 +253,15 @@ void APIENTRY glEnd() { *attrs = ~0; // Enable everything - glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size / 3); +#ifndef NDEBUG + _glRecalcFastPath(); +#else + // Immediate mode should always activate the fast path + GLboolean fastPathEnabled = _glRecalcFastPath(); + assert(fastPathEnabled); +#endif + + glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size); /* Restore everything */ *vattr = vptr; @@ -263,8 +274,6 @@ void APIENTRY glEnd() { /* Clear arrays for next polys */ aligned_vector_clear(&VERTICES); - aligned_vector_clear(&COLOURS); - aligned_vector_clear(&UV_COORDS); aligned_vector_clear(&ST_COORDS); aligned_vector_clear(&NORMALS); diff --git a/GL/private.h b/GL/private.h index 7fb68b8..35d3407 100644 --- a/GL/private.h +++ b/GL/private.h @@ -316,6 +316,8 @@ GLboolean _glIsLightingEnabled(); GLboolean _glIsLightEnabled(GLubyte light); GLboolean _glIsColorMaterialEnabled(); +GLboolean _glRecalcFastPath(); + typedef struct { float xyz[3]; float n[3]; diff --git a/GL/state.c b/GL/state.c index ef026a5..239f5d9 100644 --- a/GL/state.c +++ b/GL/state.c @@ -671,7 +671,7 @@ const GLubyte *glGetString(GLenum name) { return (const GLubyte*) "1.2 (partial) - GLdc 1.1"; case GL_EXTENSIONS: - return (const GLubyte*) "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette"; + return (const GLubyte*) "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette, GL_ARB_vertex_array_bgra"; } return (const GLubyte*) "GL_KOS_ERROR: ENUM Unsupported\n"; diff --git a/GL/util.c b/GL/util.c new file mode 100644 index 0000000..cef1d67 --- /dev/null +++ b/GL/util.c @@ -0,0 +1,15 @@ +#include "../include/glkos.h" + +void APIENTRY glVertexPackColor3fKOS(GLVertexKOS* vertex, float r, float g, float b) { + vertex->color[3] = 255; + vertex->color[2] = (r * 255.0f); + vertex->color[1] = (g * 255.0f); + vertex->color[0] = (b * 255.0f); +} + +void APIENTRY glVertexPackColor4fKOS(GLVertexKOS* vertex, float r, float g, float b, float a) { + vertex->color[3] = (a * 255.0f); + vertex->color[2] = (r * 255.0f); + vertex->color[1] = (g * 255.0f); + vertex->color[0] = (b * 255.0f); +} diff --git a/include/glkos.h b/include/glkos.h index 34435bd..5af510b 100644 --- a/include/glkos.h +++ b/include/glkos.h @@ -53,6 +53,20 @@ typedef struct { } GLdcConfig; +typedef struct { + GLuint padding0; + GLfloat x; + GLfloat y; + GLfloat z; + GLfloat u; + GLfloat v; + GLubyte bgra[4]; + GLuint padding1; +} GLVertexKOS; + +GLAPI void APIENTRY glVertexPackColor3fKOS(GLVertexKOS* vertex, float r, float g, float b); +GLAPI void APIENTRY glVertexPackColor4fKOS(GLVertexKOS* vertex, float r, float g, float b, float a); + GLAPI void APIENTRY glKosInitConfig(GLdcConfig* config); /* Usage: