From bdecf90d6406e0b1c08de87c19de8f04ba97eeb8 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 1 Feb 2025 15:38:59 +1100 Subject: [PATCH 1/4] Calculate read attribute functions when attribute states changes, instead of at every draw call --- CMakeLists.txt | 1 + GL/attributes.c | 567 ++++++++++++++++++++++++++++++++++++++++++ GL/draw.c | 637 +++--------------------------------------------- GL/private.h | 22 +- 4 files changed, 611 insertions(+), 616 deletions(-) create mode 100644 GL/attributes.c diff --git a/CMakeLists.txt b/CMakeLists.txt index e51c14d..e416cdd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ set( containers/aligned_vector.c containers/named_array.c containers/stack.c + GL/attributes.c GL/draw.c GL/error.c GL/flush.c diff --git a/GL/attributes.c b/GL/attributes.c new file mode 100644 index 0000000..4ed6ca3 --- /dev/null +++ b/GL/attributes.c @@ -0,0 +1,567 @@ +#include +#include +#include +#include +#include +#include + +#include "private.h" +#include "platform.h" + + +AttribPointerList ATTRIB_POINTERS; +GLuint ENABLED_VERTEX_ATTRIBUTES = 0; + +static const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + +extern inline GLuint _glRecalcFastPath(); + +GL_FORCE_INLINE GLsizei byte_size(GLenum type) { + switch(type) { + case GL_BYTE: return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: return sizeof(GLubyte); + case GL_SHORT: return sizeof(GLshort); + case GL_UNSIGNED_SHORT: return sizeof(GLushort); + case GL_INT: return sizeof(GLint); + case GL_UNSIGNED_INT: return sizeof(GLuint); + case GL_DOUBLE: return sizeof(GLdouble); + case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLuint); + case GL_FLOAT: + default: return sizeof(GLfloat); + } +} + +static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) { + vec3cpy(out, in); +} + +// 10:10:10:2REV format +static void _readVertexData1i3f(const GLubyte* in, GLubyte* out) { + static const float MULTIPLIER = 1.0f / 1023.0f; + + GLfloat* output = (GLfloat*) out; + + union { + int value; + struct { + signed int x: 10; + signed int y: 10; + signed int z: 10; + signed int w: 2; + } bits; + } input; + + input.value = *((const GLint*) in); + + output[0] = (2.0f * (float) input.bits.x + 1.0f) * MULTIPLIER; + output[1] = (2.0f * (float) input.bits.y + 1.0f) * MULTIPLIER; + output[2] = (2.0f * (float) input.bits.z + 1.0f) * MULTIPLIER; +} + +static void _readVertexData3us3f(const GLubyte* in, GLubyte* out) { + const GLushort* input = (const GLushort*) in; + float* output = (float*) out; + + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; +} + +static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) { + const GLuint* input = (const GLuint*) in; + float* output = (float*) out; + + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; +} + + +static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) { + float* output = (float*) out; + + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; +} + +static void _readVertexData2f2f(const GLubyte* in, GLubyte* out) { + vec2cpy(out, in); +} + +static void _readVertexData2f3f(const GLubyte* in, GLubyte* out) { + const float* input = (const float*) in; + float* output = (float*) out; + + vec2cpy(output, input); + output[2] = 0.0f; +} + +static void _readVertexData2ub3f(const GLubyte* input, GLubyte* out) { + float* output = (float*) out; + + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = 0.0f; +} + +static void _readVertexData2us3f(const GLubyte* in, GLubyte* out) { + const GLushort* input = (const GLushort*) in; + float* output = (float*) out; + + output[0] = input[0]; + output[1] = input[1]; + output[2] = 0.0f; +} + +static void _readVertexData2us2f(const GLubyte* in, GLubyte* out) { + const GLushort* input = (const GLushort*) in; + float* output = (float*) out; + + output[0] = (float)input[0] / SHRT_MAX; + output[1] = (float)input[1] / SHRT_MAX; +} + +static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) { + const GLuint* input = (const GLuint*) in; + float* output = (float*) out; + + output[0] = input[0]; + output[1] = input[1]; +} + +static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) { + float* output = (float*) out; + + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; +} + +static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) { + const GLuint* input = (const GLuint*) in; + float* output = (float*) out; + + output[0] = input[0]; + output[1] = input[1]; + output[2] = 0.0f; +} + +static void _readVertexData4ubARGB(const GLubyte* input, GLubyte* output) { + output[R8IDX] = input[0]; + output[G8IDX] = input[1]; + output[B8IDX] = input[2]; + output[A8IDX] = input[3]; +} + +static void _readVertexData4fARGB(const GLubyte* in, GLubyte* output) { + const float* input = (const float*) in; + + output[R8IDX] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); + output[G8IDX] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); + output[B8IDX] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); + output[A8IDX] = (GLubyte) clamp(input[3] * 255.0f, 0, 255); +} + +static void _readVertexData3fARGB(const GLubyte* in, GLubyte* output) { + const float* input = (const float*) in; + + output[R8IDX] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); + output[G8IDX] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); + output[B8IDX] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); + output[A8IDX] = 255; +} + +static void _readVertexData3ubARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { + output[R8IDX] = input[0]; + output[G8IDX] = input[1]; + output[B8IDX] = input[2]; + output[A8IDX] = 255; +} + +static void _readVertexData4ubRevARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { + argbcpy(output, input); +} + +static void _readVertexData4fRevARGB(const GLubyte* __restrict__ in, GLubyte* __restrict__ output) { + const float* input = (const float*) in; + + output[0] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); + output[1] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); + output[2] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); + output[3] = (GLubyte) clamp(input[3] * 255.0f, 0, 255); +} + +static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restrict__ out) { + _GL_UNUSED(input); + + typedef struct { + float x, y, z; + } V; + + static const V NegZ = {0.0f, 0.0f, -1.0f}; + + *((V*) out) = NegZ; +} + +static void _fillWhiteARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { + _GL_UNUSED(input); + *((uint32_t*) output) = ~0; +} + +static void _fillZero2f(const GLubyte* __restrict__ input, GLubyte* __restrict__ out) { + _GL_UNUSED(input); + memset(out, 0, sizeof(float) * 2); +} + +static void _readVertexData3usARGB(const GLubyte* input, GLubyte* output) { + _GL_UNUSED(input); + _GL_UNUSED(output); + gl_assert(0 && "Not Implemented"); +} + +static void _readVertexData3uiARGB(const GLubyte* input, GLubyte* output) { + _GL_UNUSED(input); + _GL_UNUSED(output); + gl_assert(0 && "Not Implemented"); +} + +static void _readVertexData4usARGB(const GLubyte* input, GLubyte* output) { + _GL_UNUSED(input); + _GL_UNUSED(output); + gl_assert(0 && "Not Implemented"); +} + +static void _readVertexData4uiARGB(const GLubyte* input, GLubyte* output) { + _GL_UNUSED(input); + _GL_UNUSED(output); + gl_assert(0 && "Not Implemented"); +} + +static void _readVertexData4usRevARGB(const GLubyte* input, GLubyte* output) { + _GL_UNUSED(input); + _GL_UNUSED(output); + gl_assert(0 && "Not Implemented"); +} + +static void _readVertexData4uiRevARGB(const GLubyte* input, GLubyte* output) { + _GL_UNUSED(input); + _GL_UNUSED(output); + gl_assert(0 && "Not Implemented"); +} + +static ReadAttributeFunc calcReadDiffuseFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { + /* Just fill the whole thing white if the attribute is disabled */ + return _fillWhiteARGB; + } + + switch(ATTRIB_POINTERS.colour.type) { + default: + case GL_DOUBLE: + case GL_FLOAT: + return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3fARGB: + (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4fARGB: + _readVertexData4fRevARGB; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3ubARGB: + (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4ubARGB: + _readVertexData4ubRevARGB; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3usARGB: + (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4usARGB: + _readVertexData4usRevARGB; + case GL_INT: + case GL_UNSIGNED_INT: + return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3uiARGB: + (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4uiARGB: + _readVertexData4uiRevARGB; + } +} + +static ReadAttributeFunc calcReadPositionFunc() { + switch(ATTRIB_POINTERS.vertex.type) { + default: + case GL_DOUBLE: + case GL_FLOAT: + return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f3f: + _readVertexData2f3f; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ub3f: + _readVertexData2ub3f; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3us3f: + _readVertexData2us3f; + case GL_INT: + case GL_UNSIGNED_INT: + return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ui3f: + _readVertexData2ui3f; + } +} + +static ReadAttributeFunc calcReadUVFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { + return _fillZero2f; + } + + switch(ATTRIB_POINTERS.uv.type) { + default: + case GL_DOUBLE: + case GL_FLOAT: + return _readVertexData2f2f; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + return _readVertexData2ub2f; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + return _readVertexData2us2f; + case GL_INT: + case GL_UNSIGNED_INT: + return _readVertexData2ui2f; + } +} + +static ReadAttributeFunc calcReadSTFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { + return _fillZero2f; + } + + switch(ATTRIB_POINTERS.st.type) { + default: + case GL_DOUBLE: + case GL_FLOAT: + return _readVertexData2f2f; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + return _readVertexData2ub2f; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + return _readVertexData2us2f; + case GL_INT: + case GL_UNSIGNED_INT: + return _readVertexData2ui2f; + } +} + +static ReadAttributeFunc calcReadNormalFunc() { + if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { + return _fillWithNegZVE; + } + + switch(ATTRIB_POINTERS.normal.type) { + default: + case GL_DOUBLE: + case GL_FLOAT: + return _readVertexData3f3f; + break; + case GL_BYTE: + case GL_UNSIGNED_BYTE: + return _readVertexData3ub3f; + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + return _readVertexData3us3f; + break; + case GL_INT: + case GL_UNSIGNED_INT: + return _readVertexData3ui3f; + break; + case GL_UNSIGNED_INT_2_10_10_10_REV: + return _readVertexData1i3f; + break; + } +} + +void APIENTRY glEnableClientState(GLenum cap) { + TRACE(); + + switch(cap) { + case GL_VERTEX_ARRAY: + ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; + break; + case GL_COLOR_ARRAY: + ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + break; + case GL_NORMAL_ARRAY: + ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; + break; + case GL_TEXTURE_COORD_ARRAY: + (ACTIVE_CLIENT_TEXTURE) ? + (ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG): + (ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG); + break; + default: + _glKosThrowError(GL_INVALID_ENUM, __func__); + } + + /* It's possible that we called glVertexPointer and friends before + * calling glEnableClientState, so we should recheck to make sure + * everything is in the right format with this new information */ + _glRecalcFastPath(); +} + +void APIENTRY glDisableClientState(GLenum cap) { + TRACE(); + + switch(cap) { + case GL_VERTEX_ARRAY: + ENABLED_VERTEX_ATTRIBUTES &= ~VERTEX_ENABLED_FLAG; + break; + case GL_COLOR_ARRAY: + ENABLED_VERTEX_ATTRIBUTES &= ~DIFFUSE_ENABLED_FLAG; + break; + case GL_NORMAL_ARRAY: + ENABLED_VERTEX_ATTRIBUTES &= ~NORMAL_ENABLED_FLAG; + break; + case GL_TEXTURE_COORD_ARRAY: + (ACTIVE_CLIENT_TEXTURE) ? + (ENABLED_VERTEX_ATTRIBUTES &= ~ST_ENABLED_FLAG): + (ENABLED_VERTEX_ATTRIBUTES &= ~UV_ENABLED_FLAG); + break; + default: + _glKosThrowError(GL_INVALID_ENUM, __func__); + } + + /* State changed, recalculate */ + _glRecalcFastPath(); +} + + +// Used to avoid checking and updating attribute related state unless necessary +GL_FORCE_INLINE GLboolean _glStateUnchanged(AttribPointer* p, GLint size, GLenum type, GLsizei stride) { + return (p->size == size && p->type == type && p->stride == stride); +} + +void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { + TRACE(); + + stride = (stride) ? stride : size * byte_size(type); + AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &ATTRIB_POINTERS.uv : &ATTRIB_POINTERS.st; + tointer->ptr = pointer; + + if(_glStateUnchanged(tointer, size, type, stride)) return; + + if(size < 1 || size > 4) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + tointer->stride = stride; + tointer->type = type; + tointer->size = size; + + ATTRIB_POINTERS.uv_func = calcReadUVFunc(); + ATTRIB_POINTERS.st_func = calcReadSTFunc(); + _glRecalcFastPath(); +} + +void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { + TRACE(); + + stride = (stride) ? stride : (size * byte_size(type)); + ATTRIB_POINTERS.vertex.ptr = pointer; + + if(_glStateUnchanged(&ATTRIB_POINTERS.vertex, size, type, stride)) return; + + if(size < 2 || size > 4) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + ATTRIB_POINTERS.vertex.stride = stride; + ATTRIB_POINTERS.vertex.type = type; + ATTRIB_POINTERS.vertex.size = size; + ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + + _glRecalcFastPath(); +} + +void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { + TRACE(); + + stride = (stride) ? stride : ((size == GL_BGRA) ? 4 : size) * byte_size(type); + ATTRIB_POINTERS.colour.ptr = pointer; + + if(_glStateUnchanged(&ATTRIB_POINTERS.colour, size, type, stride)) return; + + if(size != 3 && size != 4 && size != GL_BGRA) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + ATTRIB_POINTERS.colour.type = type; + ATTRIB_POINTERS.colour.size = size; + ATTRIB_POINTERS.colour.stride = stride; + ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + + _glRecalcFastPath(); +} + +void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) { + TRACE(); + + GLint validTypes[] = { + GL_DOUBLE, + GL_FLOAT, + GL_BYTE, + GL_UNSIGNED_BYTE, + GL_INT, + GL_UNSIGNED_INT, + GL_UNSIGNED_INT_2_10_10_10_REV, + 0 + }; + + stride = (stride) ? stride : ATTRIB_POINTERS.normal.size * byte_size(type); + ATTRIB_POINTERS.normal.ptr = pointer; + + if(_glStateUnchanged(&ATTRIB_POINTERS.normal, 3, type, stride)) return; + + if(_glCheckValidEnum(type, validTypes, __func__) != 0) { + return; + } + + ATTRIB_POINTERS.normal.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3; + ATTRIB_POINTERS.normal.stride = stride; + ATTRIB_POINTERS.normal.type = type; + ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + + _glRecalcFastPath(); +} + + +void _glInitAttributePointers() { + TRACE(); + + ATTRIB_POINTERS.vertex.ptr = NULL; + ATTRIB_POINTERS.vertex.stride = 0; + ATTRIB_POINTERS.vertex.type = GL_FLOAT; + ATTRIB_POINTERS.vertex.size = 4; + ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + + ATTRIB_POINTERS.colour.ptr = NULL; + ATTRIB_POINTERS.colour.stride = 0; + ATTRIB_POINTERS.colour.type = GL_FLOAT; + ATTRIB_POINTERS.colour.size = 4; + ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + + ATTRIB_POINTERS.uv.ptr = NULL; + ATTRIB_POINTERS.uv.stride = 0; + ATTRIB_POINTERS.uv.type = GL_FLOAT; + ATTRIB_POINTERS.uv.size = 4; + ATTRIB_POINTERS.uv_func = calcReadUVFunc(); + + ATTRIB_POINTERS.st.ptr = NULL; + ATTRIB_POINTERS.st.stride = 0; + ATTRIB_POINTERS.st.type = GL_FLOAT; + ATTRIB_POINTERS.st.size = 4; + ATTRIB_POINTERS.st_func = calcReadSTFunc(); + + ATTRIB_POINTERS.normal.ptr = NULL; + ATTRIB_POINTERS.normal.stride = 0; + ATTRIB_POINTERS.normal.type = GL_FLOAT; + ATTRIB_POINTERS.normal.size = 3; + ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); +} diff --git a/GL/draw.c b/GL/draw.c index b822c7f..514d03c 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -8,15 +8,8 @@ #include "private.h" #include "platform.h" - -AttribPointerList ATTRIB_POINTERS; -GLuint ENABLED_VERTEX_ATTRIBUTES = 0; GLuint FAST_PATH_ENABLED = GL_FALSE; - -static GLubyte ACTIVE_CLIENT_TEXTURE = 0; -static const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - -extern inline GLuint _glRecalcFastPath(); +GLubyte ACTIVE_CLIENT_TEXTURE; extern GLboolean AUTOSORT_ENABLED; @@ -25,296 +18,6 @@ extern GLboolean AUTOSORT_ENABLED; while(i--) -void _glInitAttributePointers() { - TRACE(); - - ATTRIB_POINTERS.vertex.ptr = NULL; - ATTRIB_POINTERS.vertex.stride = 0; - ATTRIB_POINTERS.vertex.type = GL_FLOAT; - ATTRIB_POINTERS.vertex.size = 4; - - ATTRIB_POINTERS.colour.ptr = NULL; - ATTRIB_POINTERS.colour.stride = 0; - ATTRIB_POINTERS.colour.type = GL_FLOAT; - ATTRIB_POINTERS.colour.size = 4; - - ATTRIB_POINTERS.uv.ptr = NULL; - ATTRIB_POINTERS.uv.stride = 0; - ATTRIB_POINTERS.uv.type = GL_FLOAT; - ATTRIB_POINTERS.uv.size = 4; - - ATTRIB_POINTERS.st.ptr = NULL; - ATTRIB_POINTERS.st.stride = 0; - ATTRIB_POINTERS.st.type = GL_FLOAT; - ATTRIB_POINTERS.st.size = 4; - - ATTRIB_POINTERS.normal.ptr = NULL; - ATTRIB_POINTERS.normal.stride = 0; - ATTRIB_POINTERS.normal.type = GL_FLOAT; - ATTRIB_POINTERS.normal.size = 3; -} - -GL_FORCE_INLINE GLsizei byte_size(GLenum type) { - switch(type) { - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - case GL_SHORT: return sizeof(GLshort); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_INT: return sizeof(GLint); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_DOUBLE: return sizeof(GLdouble); - case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLuint); - case GL_FLOAT: - default: return sizeof(GLfloat); - } -} - -typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in); -typedef void (*ByteParseFunc)(GLubyte* out, const GLubyte* in); -typedef void (*PolyBuildFunc)(Vertex* first, Vertex* previous, Vertex* vertex, Vertex* next, const GLsizei i); - -static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) { - vec3cpy(out, in); -} - -// 10:10:10:2REV format -static void _readVertexData1i3f(const GLubyte* in, GLubyte* out) { - static const float MULTIPLIER = 1.0f / 1023.0f; - - GLfloat* output = (GLfloat*) out; - - union { - int value; - struct { - signed int x: 10; - signed int y: 10; - signed int z: 10; - signed int w: 2; - } bits; - } input; - - input.value = *((const GLint*) in); - - output[0] = (2.0f * (float) input.bits.x + 1.0f) * MULTIPLIER; - output[1] = (2.0f * (float) input.bits.y + 1.0f) * MULTIPLIER; - output[2] = (2.0f * (float) input.bits.z + 1.0f) * MULTIPLIER; -} - -static void _readVertexData3us3f(const GLubyte* in, GLubyte* out) { - const GLushort* input = (const GLushort*) in; - float* output = (float*) out; - - output[0] = input[0]; - output[1] = input[1]; - output[2] = input[2]; -} - -static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) { - const GLuint* input = (const GLuint*) in; - float* output = (float*) out; - - output[0] = input[0]; - output[1] = input[1]; - output[2] = input[2]; -} - - -static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) { - float* output = (float*) out; - - output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; - output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; - output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; -} - -static void _readVertexData2f2f(const GLubyte* in, GLubyte* out) { - vec2cpy(out, in); -} - -static void _readVertexData2f3f(const GLubyte* in, GLubyte* out) { - const float* input = (const float*) in; - float* output = (float*) out; - - vec2cpy(output, input); - output[2] = 0.0f; -} - -static void _readVertexData2ub3f(const GLubyte* input, GLubyte* out) { - float* output = (float*) out; - - output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; - output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; - output[2] = 0.0f; -} - -static void _readVertexData2us3f(const GLubyte* in, GLubyte* out) { - const GLushort* input = (const GLushort*) in; - float* output = (float*) out; - - output[0] = input[0]; - output[1] = input[1]; - output[2] = 0.0f; -} - -static void _readVertexData2us2f(const GLubyte* in, GLubyte* out) { - const GLushort* input = (const GLushort*) in; - float* output = (float*) out; - - output[0] = (float)input[0] / SHRT_MAX; - output[1] = (float)input[1] / SHRT_MAX; -} - -static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) { - const GLuint* input = (const GLuint*) in; - float* output = (float*) out; - - output[0] = input[0]; - output[1] = input[1]; -} - -static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) { - float* output = (float*) out; - - output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; - output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; -} - -static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) { - const GLuint* input = (const GLuint*) in; - float* output = (float*) out; - - output[0] = input[0]; - output[1] = input[1]; - output[2] = 0.0f; -} - -static void _readVertexData4ubARGB(const GLubyte* input, GLubyte* output) { - output[R8IDX] = input[0]; - output[G8IDX] = input[1]; - output[B8IDX] = input[2]; - output[A8IDX] = input[3]; -} - -static void _readVertexData4fARGB(const GLubyte* in, GLubyte* output) { - const float* input = (const float*) in; - - output[R8IDX] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); - output[G8IDX] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); - output[B8IDX] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); - output[A8IDX] = (GLubyte) clamp(input[3] * 255.0f, 0, 255); -} - -static void _readVertexData3fARGB(const GLubyte* in, GLubyte* output) { - const float* input = (const float*) in; - - output[R8IDX] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); - output[G8IDX] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); - output[B8IDX] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); - output[A8IDX] = 255; -} - -static void _readVertexData3ubARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { - output[R8IDX] = input[0]; - output[G8IDX] = input[1]; - output[B8IDX] = input[2]; - output[A8IDX] = 255; -} - -static void _readVertexData4ubRevARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { - argbcpy(output, input); -} - -static void _readVertexData4fRevARGB(const GLubyte* __restrict__ in, GLubyte* __restrict__ output) { - const float* input = (const float*) in; - - output[0] = (GLubyte) clamp(input[0] * 255.0f, 0, 255); - output[1] = (GLubyte) clamp(input[1] * 255.0f, 0, 255); - output[2] = (GLubyte) clamp(input[2] * 255.0f, 0, 255); - output[3] = (GLubyte) clamp(input[3] * 255.0f, 0, 255); -} - -static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restrict__ out) { - _GL_UNUSED(input); - - typedef struct { - float x, y, z; - } V; - - static const V NegZ = {0.0f, 0.0f, -1.0f}; - - *((V*) out) = NegZ; -} - -static void _fillWhiteARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { - _GL_UNUSED(input); - *((uint32_t*) output) = ~0; -} - -static void _fillZero2f(const GLubyte* __restrict__ input, GLubyte* __restrict__ out) { - _GL_UNUSED(input); - memset(out, 0, sizeof(float) * 2); -} - -static void _readVertexData3usARGB(const GLubyte* input, GLubyte* output) { - _GL_UNUSED(input); - _GL_UNUSED(output); - gl_assert(0 && "Not Implemented"); -} - -static void _readVertexData3uiARGB(const GLubyte* input, GLubyte* output) { - _GL_UNUSED(input); - _GL_UNUSED(output); - gl_assert(0 && "Not Implemented"); -} - -static void _readVertexData4usARGB(const GLubyte* input, GLubyte* output) { - _GL_UNUSED(input); - _GL_UNUSED(output); - gl_assert(0 && "Not Implemented"); -} - -static void _readVertexData4uiARGB(const GLubyte* input, GLubyte* output) { - _GL_UNUSED(input); - _GL_UNUSED(output); - gl_assert(0 && "Not Implemented"); -} - -static void _readVertexData4usRevARGB(const GLubyte* input, GLubyte* output) { - _GL_UNUSED(input); - _GL_UNUSED(output); - gl_assert(0 && "Not Implemented"); -} - -static void _readVertexData4uiRevARGB(const GLubyte* input, GLubyte* output) { - _GL_UNUSED(input); - _GL_UNUSED(output); - gl_assert(0 && "Not Implemented"); -} - -GLuint* _glGetEnabledAttributes() { - return &ENABLED_VERTEX_ATTRIBUTES; -} - -AttribPointer* _glGetVertexAttribPointer() { - return &ATTRIB_POINTERS.vertex; -} - -AttribPointer* _glGetDiffuseAttribPointer() { - return &ATTRIB_POINTERS.colour; -} - -AttribPointer* _glGetNormalAttribPointer() { - return &ATTRIB_POINTERS.normal; -} - -AttribPointer* _glGetUVAttribPointer() { - return &ATTRIB_POINTERS.uv; -} - -AttribPointer* _glGetSTAttribPointer() { - return &ATTRIB_POINTERS.st; -} - typedef GLuint (*IndexParseFunc)(const GLubyte* in); static inline GLuint _parseUByteIndex(const GLubyte* in) { @@ -329,6 +32,14 @@ static inline GLuint _parseUShortIndex(const GLubyte* in) { return *((GLshort*) in); } +GL_FORCE_INLINE GLsizei index_size(GLenum type) { + switch(type) { + case GL_UNSIGNED_BYTE: return sizeof(GLubyte); + case GL_UNSIGNED_SHORT: return sizeof(GLushort); + case GL_UNSIGNED_INT: return sizeof(GLuint); + default: return sizeof(GLushort); + } +} GL_FORCE_INLINE IndexParseFunc _calcParseIndexFunc(GLenum type) { switch(type) { @@ -574,138 +285,8 @@ static GL_NO_INLINE void genLineLoop(Vertex* output, GLuint count) { draw_line(dst, &first, &last); } -typedef void (*ReadPositionFunc)(const GLubyte*, GLubyte*); -typedef void (*ReadDiffuseFunc)(const GLubyte*, GLubyte*); -typedef void (*ReadUVFunc)(const GLubyte*, GLubyte*); -typedef void (*ReadNormalFunc)(const GLubyte*, GLubyte*); - -ReadPositionFunc calcReadDiffuseFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { - /* Just fill the whole thing white if the attribute is disabled */ - return _fillWhiteARGB; - } - - switch(ATTRIB_POINTERS.colour.type) { - default: - case GL_DOUBLE: - case GL_FLOAT: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3fARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4fARGB: - _readVertexData4fRevARGB; - case GL_BYTE: - case GL_UNSIGNED_BYTE: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3ubARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4ubARGB: - _readVertexData4ubRevARGB; - case GL_SHORT: - case GL_UNSIGNED_SHORT: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3usARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4usARGB: - _readVertexData4usRevARGB; - case GL_INT: - case GL_UNSIGNED_INT: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3uiARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4uiARGB: - _readVertexData4uiRevARGB; - } -} - -ReadPositionFunc calcReadPositionFunc() { - switch(ATTRIB_POINTERS.vertex.type) { - default: - case GL_DOUBLE: - case GL_FLOAT: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f3f: - _readVertexData2f3f; - case GL_BYTE: - case GL_UNSIGNED_BYTE: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ub3f: - _readVertexData2ub3f; - case GL_SHORT: - case GL_UNSIGNED_SHORT: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3us3f: - _readVertexData2us3f; - case GL_INT: - case GL_UNSIGNED_INT: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ui3f: - _readVertexData2ui3f; - } -} - -ReadUVFunc calcReadUVFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { - return _fillZero2f; - } - - switch(ATTRIB_POINTERS.uv.type) { - default: - case GL_DOUBLE: - case GL_FLOAT: - return _readVertexData2f2f; - case GL_BYTE: - case GL_UNSIGNED_BYTE: - return _readVertexData2ub2f; - case GL_SHORT: - case GL_UNSIGNED_SHORT: - return _readVertexData2us2f; - case GL_INT: - case GL_UNSIGNED_INT: - return _readVertexData2ui2f; - } -} - -ReadUVFunc calcReadSTFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { - return _fillZero2f; - } - - switch(ATTRIB_POINTERS.st.type) { - default: - case GL_DOUBLE: - case GL_FLOAT: - return _readVertexData2f2f; - case GL_BYTE: - case GL_UNSIGNED_BYTE: - return _readVertexData2ub2f; - case GL_SHORT: - case GL_UNSIGNED_SHORT: - return _readVertexData2us2f; - case GL_INT: - case GL_UNSIGNED_INT: - return _readVertexData2ui2f; - } -} - -ReadNormalFunc calcReadNormalFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { - return _fillWithNegZVE; - } - - switch(ATTRIB_POINTERS.normal.type) { - default: - case GL_DOUBLE: - case GL_FLOAT: - return _readVertexData3f3f; - break; - case GL_BYTE: - case GL_UNSIGNED_BYTE: - return _readVertexData3ub3f; - break; - case GL_SHORT: - case GL_UNSIGNED_SHORT: - return _readVertexData3us3f; - break; - case GL_INT: - case GL_UNSIGNED_INT: - return _readVertexData3ui3f; - break; - case GL_UNSIGNED_INT_2_10_10_10_REV: - return _readVertexData1i3f; - break; - } -} - -static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GLuint count, Vertex* it) { +static void _readPositionData(const GLuint first, const GLuint count, Vertex* it) { + const ReadAttributeFunc func = ATTRIB_POINTERS.vertex_func; const GLsizei vstride = ATTRIB_POINTERS.vertex.stride; const GLubyte* vptr = ((GLubyte*) ATTRIB_POINTERS.vertex.ptr + (first * vstride)); @@ -722,7 +303,8 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL } } -static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, Vertex* it) { +static void _readUVData(const GLuint first, const GLuint count, Vertex* it) { + const ReadAttributeFunc func = ATTRIB_POINTERS.uv_func; const GLsizei uvstride = ATTRIB_POINTERS.uv.stride; const GLubyte* uvptr = ((GLubyte*) ATTRIB_POINTERS.uv.ptr + (first * uvstride)); @@ -735,7 +317,8 @@ static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, } } -static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, VertexExtra* it) { +static void _readSTData(const GLuint first, const GLuint count, VertexExtra* it) { + const ReadAttributeFunc func = ATTRIB_POINTERS.st_func; const GLsizei ststride = ATTRIB_POINTERS.st.stride; const GLubyte* stptr = ((GLubyte*) ATTRIB_POINTERS.st.ptr + (first * ststride)); @@ -747,7 +330,8 @@ static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, } } -static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuint count, VertexExtra* it) { +static void _readNormalData(const GLuint first, const GLuint count, VertexExtra* it) { + const ReadAttributeFunc func = ATTRIB_POINTERS.normal_func; const GLsizei nstride = ATTRIB_POINTERS.normal.stride; const GLubyte* nptr = ((GLubyte*) ATTRIB_POINTERS.normal.ptr + (first * nstride)); @@ -771,11 +355,8 @@ static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuin } } -GL_FORCE_INLINE GLuint diffusePointerSize() { - return (ATTRIB_POINTERS.colour.size == GL_BGRA) ? 4 : ATTRIB_POINTERS.colour.size; -} - -static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLuint count, Vertex* it) { +static void _readDiffuseData(const GLuint first, const GLuint count, Vertex* it) { + const ReadAttributeFunc func = ATTRIB_POINTERS.colour_func; const GLuint cstride = ATTRIB_POINTERS.colour.stride; const GLubyte* cptr = ((GLubyte*) ATTRIB_POINTERS.colour.ptr) + (first * cstride); @@ -791,7 +372,7 @@ static void generateElements( SubmissionTarget* target, const GLsizei first, const GLuint count, const GLubyte* indices, const GLenum type) { - const GLsizei istride = byte_size(type); + const GLsizei istride = index_size(type); const IndexParseFunc IndexFunc = _calcParseIndexFunc(type); GLubyte* xyz; @@ -807,19 +388,19 @@ static void generateElements( uint32_t i = first; uint32_t idx = 0; - const ReadPositionFunc pos_func = calcReadPositionFunc(); + const ReadAttributeFunc pos_func = ATTRIB_POINTERS.vertex_func; const GLsizei vstride = ATTRIB_POINTERS.vertex.stride; - const ReadUVFunc uv_func = calcReadUVFunc(); + const ReadAttributeFunc uv_func = ATTRIB_POINTERS.uv_func; const GLuint uvstride = ATTRIB_POINTERS.uv.stride; - const ReadUVFunc st_func = calcReadSTFunc(); + const ReadAttributeFunc st_func = ATTRIB_POINTERS.st_func; const GLuint ststride = ATTRIB_POINTERS.st.stride; - const ReadDiffuseFunc diffuse_func = calcReadDiffuseFunc(); + const ReadAttributeFunc diffuse_func = ATTRIB_POINTERS.colour_func; const GLuint dstride = ATTRIB_POINTERS.colour.stride; - const ReadNormalFunc normal_func = calcReadNormalFunc(); + const ReadAttributeFunc normal_func = ATTRIB_POINTERS.normal_func; const GLuint nstride = ATTRIB_POINTERS.normal.stride; for(; i < first + count; ++i) { @@ -867,7 +448,7 @@ static void generateElementsFastPath( const GLuint dstride = ATTRIB_POINTERS.colour.stride; const GLuint nstride = ATTRIB_POINTERS.normal.stride; - const GLsizei istride = byte_size(type); + const GLsizei istride = index_size(type); const IndexParseFunc IndexFunc = _calcParseIndexFunc(type); /* Copy the pos, uv and color directly in one go */ @@ -965,17 +546,11 @@ static void generateArrays(SubmissionTarget* target, const GLsizei first, const Vertex* start = _glSubmissionTargetStart(target); VertexExtra* ve = aligned_vector_at(target->extras, 0); - const ReadPositionFunc pfunc = calcReadPositionFunc(); - const ReadDiffuseFunc dfunc = calcReadDiffuseFunc(); - const ReadUVFunc uvfunc = calcReadUVFunc(); - const ReadNormalFunc nfunc = calcReadNormalFunc(); - const ReadUVFunc stfunc = calcReadSTFunc(); - - _readPositionData(pfunc, first, count, start); - _readDiffuseData(dfunc, first, count, start); - _readUVData(uvfunc, first, count, start); - _readNormalData(nfunc, first, count, ve); - _readSTData(stfunc, first, count, ve); + _readPositionData(first, count, start); + _readDiffuseData(first, count, start); + _readUVData(first, count, start); + _readNormalData(first, count, ve); + _readSTData(first, count, ve); } static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count, @@ -1397,60 +972,6 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) { submitVertices(mode, first, count, GL_UNSIGNED_INT, NULL); } -void APIENTRY glEnableClientState(GLenum cap) { - TRACE(); - - switch(cap) { - case GL_VERTEX_ARRAY: - ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - break; - case GL_COLOR_ARRAY: - ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - break; - case GL_NORMAL_ARRAY: - ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; - break; - case GL_TEXTURE_COORD_ARRAY: - (ACTIVE_CLIENT_TEXTURE) ? - (ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG): - (ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG); - break; - default: - _glKosThrowError(GL_INVALID_ENUM, __func__); - } - - /* It's possible that we called glVertexPointer and friends before - * calling glEnableClientState, so we should recheck to make sure - * everything is in the right format with this new information */ - _glRecalcFastPath(); -} - -void APIENTRY glDisableClientState(GLenum cap) { - TRACE(); - - switch(cap) { - case GL_VERTEX_ARRAY: - ENABLED_VERTEX_ATTRIBUTES &= ~VERTEX_ENABLED_FLAG; - break; - case GL_COLOR_ARRAY: - ENABLED_VERTEX_ATTRIBUTES &= ~DIFFUSE_ENABLED_FLAG; - break; - case GL_NORMAL_ARRAY: - ENABLED_VERTEX_ATTRIBUTES &= ~NORMAL_ENABLED_FLAG; - break; - case GL_TEXTURE_COORD_ARRAY: - (ACTIVE_CLIENT_TEXTURE) ? - (ENABLED_VERTEX_ATTRIBUTES &= ~ST_ENABLED_FLAG): - (ENABLED_VERTEX_ATTRIBUTES &= ~UV_ENABLED_FLAG); - break; - default: - _glKosThrowError(GL_INVALID_ENUM, __func__); - } - - /* State changed, recalculate */ - _glRecalcFastPath(); -} - GLuint _glGetActiveClientTexture() { return ACTIVE_CLIENT_TEXTURE; } @@ -1465,99 +986,3 @@ void APIENTRY glClientActiveTextureARB(GLenum texture) { ACTIVE_CLIENT_TEXTURE = (texture == GL_TEXTURE1_ARB) ? 1 : 0; } - -// Used to avoid checking and updating attribute related state unless necessary -GL_FORCE_INLINE GLboolean _glStateUnchanged(AttribPointer* p, GLint size, GLenum type, GLsizei stride) { - return (p->size == size && p->type == type && p->stride == stride); -} - -void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { - TRACE(); - - stride = (stride) ? stride : size * byte_size(type); - AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &ATTRIB_POINTERS.uv : &ATTRIB_POINTERS.st; - tointer->ptr = pointer; - - if(_glStateUnchanged(tointer, size, type, stride)) return; - - if(size < 1 || size > 4) { - _glKosThrowError(GL_INVALID_VALUE, __func__); - return; - } - - tointer->stride = stride; - tointer->type = type; - tointer->size = size; - - _glRecalcFastPath(); -} - -void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { - TRACE(); - - stride = (stride) ? stride : (size * byte_size(type)); - ATTRIB_POINTERS.vertex.ptr = pointer; - - if(_glStateUnchanged(&ATTRIB_POINTERS.vertex, size, type, stride)) return; - - if(size < 2 || size > 4) { - _glKosThrowError(GL_INVALID_VALUE, __func__); - return; - } - - ATTRIB_POINTERS.vertex.stride = stride; - ATTRIB_POINTERS.vertex.type = type; - ATTRIB_POINTERS.vertex.size = size; - - _glRecalcFastPath(); -} - -void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { - TRACE(); - - stride = (stride) ? stride : ((size == GL_BGRA) ? 4 : size) * byte_size(type); - ATTRIB_POINTERS.colour.ptr = pointer; - - if(_glStateUnchanged(&ATTRIB_POINTERS.colour, size, type, stride)) return; - - if(size != 3 && size != 4 && size != GL_BGRA) { - _glKosThrowError(GL_INVALID_VALUE, __func__); - return; - } - - ATTRIB_POINTERS.colour.type = type; - ATTRIB_POINTERS.colour.size = size; - ATTRIB_POINTERS.colour.stride = stride; - - _glRecalcFastPath(); -} - -void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) { - TRACE(); - - GLint validTypes[] = { - GL_DOUBLE, - GL_FLOAT, - GL_BYTE, - GL_UNSIGNED_BYTE, - GL_INT, - GL_UNSIGNED_INT, - GL_UNSIGNED_INT_2_10_10_10_REV, - 0 - }; - - stride = (stride) ? stride : ATTRIB_POINTERS.normal.size * byte_size(type); - ATTRIB_POINTERS.normal.ptr = pointer; - - if(_glStateUnchanged(&ATTRIB_POINTERS.normal, 3, type, stride)) return; - - if(_glCheckValidEnum(type, validTypes, __func__) != 0) { - return; - } - - ATTRIB_POINTERS.normal.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3; - ATTRIB_POINTERS.normal.stride = stride; - ATTRIB_POINTERS.normal.type = type; - - _glRecalcFastPath(); -} diff --git a/GL/private.h b/GL/private.h index 8230f01..bf13e0b 100644 --- a/GL/private.h +++ b/GL/private.h @@ -346,6 +346,7 @@ typedef struct { GLsizei stride; // 4 GLint size; // 4 } AttribPointer; +typedef void (*ReadAttributeFunc)(const GLubyte*, GLubyte*); typedef struct { AttribPointer vertex; // 16 @@ -353,25 +354,29 @@ typedef struct { AttribPointer uv; // 48 AttribPointer st; // 64 AttribPointer normal; // 80 - AttribPointer padding; // 96 + + ReadAttributeFunc vertex_func; + ReadAttributeFunc colour_func; + ReadAttributeFunc uv_func; + ReadAttributeFunc st_func; + ReadAttributeFunc normal_func; } AttribPointerList; +extern GLuint ENABLED_VERTEX_ATTRIBUTES; +extern AttribPointerList ATTRIB_POINTERS; + GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func); GLuint* _glGetEnabledAttributes(); -AttribPointer* _glGetVertexAttribPointer(); -AttribPointer* _glGetDiffuseAttribPointer(); -AttribPointer* _glGetNormalAttribPointer(); -AttribPointer* _glGetUVAttribPointer(); -AttribPointer* _glGetSTAttribPointer(); -GLenum _glGetShadeModel(); +GLenum _glGetShadeModel(); TextureObject* _glGetTexture0(); TextureObject* _glGetTexture1(); TextureObject* _glGetBoundTexture(); extern GLubyte ACTIVE_TEXTURE; extern GLboolean TEXTURES_ENABLED[]; +extern GLubyte ACTIVE_CLIENT_TEXTURE; GLubyte _glGetActiveTexture(); GLint _glGetTextureInternalFormat(); @@ -422,9 +427,6 @@ GLboolean _glIsColorMaterialEnabled(); GLboolean _glIsNormalizeEnabled(); -extern AttribPointerList ATTRIB_POINTERS; - -extern GLuint ENABLED_VERTEX_ATTRIBUTES; extern GLuint FAST_PATH_ENABLED; GL_FORCE_INLINE GLuint _glIsVertexDataFastPathCompatible() { From a35659ce3d0db0b5bdd69e0dda8329849b577d6f Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 1 Feb 2025 16:05:07 +1100 Subject: [PATCH 2/4] Fix enable/disable attribute calls not updating read functions --- GL/attributes.c | 72 +++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/GL/attributes.c b/GL/attributes.c index 4ed6ca3..13d5c0c 100644 --- a/GL/attributes.c +++ b/GL/attributes.c @@ -31,6 +31,12 @@ GL_FORCE_INLINE GLsizei byte_size(GLenum type) { } } +// Used to avoid checking and updating attribute related state unless necessary +GL_FORCE_INLINE GLboolean _glStateUnchanged(AttribPointer* p, GLint size, GLenum type, GLsizei stride) { + return (p->size == size && p->type == type && p->stride == stride); +} + + static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) { vec3cpy(out, in); } @@ -381,18 +387,24 @@ void APIENTRY glEnableClientState(GLenum cap) { switch(cap) { case GL_VERTEX_ARRAY: ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - break; + ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + break; case GL_COLOR_ARRAY: ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - break; + ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + break; case GL_NORMAL_ARRAY: ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; - break; + ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + break; case GL_TEXTURE_COORD_ARRAY: (ACTIVE_CLIENT_TEXTURE) ? (ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG): (ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG); - break; + + ATTRIB_POINTERS.uv_func = calcReadUVFunc(); + ATTRIB_POINTERS.st_func = calcReadSTFunc(); + break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); } @@ -409,18 +421,24 @@ void APIENTRY glDisableClientState(GLenum cap) { switch(cap) { case GL_VERTEX_ARRAY: ENABLED_VERTEX_ATTRIBUTES &= ~VERTEX_ENABLED_FLAG; - break; + ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + break; case GL_COLOR_ARRAY: ENABLED_VERTEX_ATTRIBUTES &= ~DIFFUSE_ENABLED_FLAG; - break; + ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + break; case GL_NORMAL_ARRAY: ENABLED_VERTEX_ATTRIBUTES &= ~NORMAL_ENABLED_FLAG; - break; + ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + break; case GL_TEXTURE_COORD_ARRAY: (ACTIVE_CLIENT_TEXTURE) ? (ENABLED_VERTEX_ATTRIBUTES &= ~ST_ENABLED_FLAG): (ENABLED_VERTEX_ATTRIBUTES &= ~UV_ENABLED_FLAG); - break; + + ATTRIB_POINTERS.uv_func = calcReadUVFunc(); + ATTRIB_POINTERS.st_func = calcReadSTFunc(); + break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); } @@ -430,11 +448,6 @@ void APIENTRY glDisableClientState(GLenum cap) { } -// Used to avoid checking and updating attribute related state unless necessary -GL_FORCE_INLINE GLboolean _glStateUnchanged(AttribPointer* p, GLint size, GLenum type, GLsizei stride) { - return (p->size == size && p->type == type && p->stride == stride); -} - void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { TRACE(); @@ -535,33 +548,8 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin void _glInitAttributePointers() { TRACE(); - ATTRIB_POINTERS.vertex.ptr = NULL; - ATTRIB_POINTERS.vertex.stride = 0; - ATTRIB_POINTERS.vertex.type = GL_FLOAT; - ATTRIB_POINTERS.vertex.size = 4; - ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); - - ATTRIB_POINTERS.colour.ptr = NULL; - ATTRIB_POINTERS.colour.stride = 0; - ATTRIB_POINTERS.colour.type = GL_FLOAT; - ATTRIB_POINTERS.colour.size = 4; - ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); - - ATTRIB_POINTERS.uv.ptr = NULL; - ATTRIB_POINTERS.uv.stride = 0; - ATTRIB_POINTERS.uv.type = GL_FLOAT; - ATTRIB_POINTERS.uv.size = 4; - ATTRIB_POINTERS.uv_func = calcReadUVFunc(); - - ATTRIB_POINTERS.st.ptr = NULL; - ATTRIB_POINTERS.st.stride = 0; - ATTRIB_POINTERS.st.type = GL_FLOAT; - ATTRIB_POINTERS.st.size = 4; - ATTRIB_POINTERS.st_func = calcReadSTFunc(); - - ATTRIB_POINTERS.normal.ptr = NULL; - ATTRIB_POINTERS.normal.stride = 0; - ATTRIB_POINTERS.normal.type = GL_FLOAT; - ATTRIB_POINTERS.normal.size = 3; - ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + glVertexPointer(4, GL_FLOAT, 0, NULL); + glTexCoordPointer(2, GL_FLOAT, 0, NULL); + glColorPointer(4, GL_FLOAT, 0, NULL); + glNormalPointer(GL_FLOAT, 0, NULL); } From 3550c89501ee0157bb685c42e306b54657d02a80 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 1 Feb 2025 17:04:42 +1100 Subject: [PATCH 3/4] Make non-textured non-fast drawing a little bit faster --- GL/attributes.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/GL/attributes.c b/GL/attributes.c index 13d5c0c..337d427 100644 --- a/GL/attributes.c +++ b/GL/attributes.c @@ -199,24 +199,25 @@ static void _readVertexData4fRevARGB(const GLubyte* __restrict__ in, GLubyte* __ static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restrict__ out) { _GL_UNUSED(input); - - typedef struct { - float x, y, z; - } V; + typedef struct { float x, y, z; } V; static const V NegZ = {0.0f, 0.0f, -1.0f}; *((V*) out) = NegZ; } -static void _fillWhiteARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { +static void _fillWhiteARGB(const GLubyte* __restrict__ input, GLubyte* __restrict__ output) { _GL_UNUSED(input); *((uint32_t*) output) = ~0; } static void _fillZero2f(const GLubyte* __restrict__ input, GLubyte* __restrict__ out) { _GL_UNUSED(input); - memset(out, 0, sizeof(float) * 2); + //memset(out, 0, sizeof(float) * 2); + // memset does 8 byte writes - faster to manually write as uint32 + uint32_t* dst = (uint32_t*)out; + dst[0] = 0; + dst[1] = 0; } static void _readVertexData3usARGB(const GLubyte* input, GLubyte* output) { From 5318f11f11b58297d0b56bf1f8040a6ca5be0a49 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 1 Feb 2025 18:17:28 +1100 Subject: [PATCH 4/4] Defer recomputing attributes state when possible --- GL/attributes.c | 228 +++++++++++++++++++++++++++---------------- GL/draw.c | 98 +++++++++---------- GL/draw_fastpath.inc | 22 ++--- GL/immediate.c | 64 ++++-------- GL/private.h | 65 ++---------- 5 files changed, 232 insertions(+), 245 deletions(-) diff --git a/GL/attributes.c b/GL/attributes.c index 337d427..607e2f3 100644 --- a/GL/attributes.c +++ b/GL/attributes.c @@ -8,14 +8,9 @@ #include "private.h" #include "platform.h" - -AttribPointerList ATTRIB_POINTERS; -GLuint ENABLED_VERTEX_ATTRIBUTES = 0; - +AttribPointerList ATTRIB_LIST; static const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; -extern inline GLuint _glRecalcFastPath(); - GL_FORCE_INLINE GLsizei byte_size(GLenum type) { switch(type) { case GL_BYTE: return sizeof(GLbyte); @@ -36,6 +31,10 @@ GL_FORCE_INLINE GLboolean _glStateUnchanged(AttribPointer* p, GLint size, GLenum return (p->size == size && p->type == type && p->stride == stride); } +GLuint* _glGetEnabledAttributes() { + return &ATTRIB_LIST.enabled; +} + static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) { vec3cpy(out, in); @@ -257,64 +256,64 @@ static void _readVertexData4uiRevARGB(const GLubyte* input, GLubyte* output) { } static ReadAttributeFunc calcReadDiffuseFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { + if((ATTRIB_LIST.enabled & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { /* Just fill the whole thing white if the attribute is disabled */ return _fillWhiteARGB; } - switch(ATTRIB_POINTERS.colour.type) { + switch(ATTRIB_LIST.colour.type) { default: case GL_DOUBLE: case GL_FLOAT: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3fARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4fARGB: + return (ATTRIB_LIST.colour.size == 3) ? _readVertexData3fARGB: + (ATTRIB_LIST.colour.size == 4) ? _readVertexData4fARGB: _readVertexData4fRevARGB; case GL_BYTE: case GL_UNSIGNED_BYTE: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3ubARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4ubARGB: + return (ATTRIB_LIST.colour.size == 3) ? _readVertexData3ubARGB: + (ATTRIB_LIST.colour.size == 4) ? _readVertexData4ubARGB: _readVertexData4ubRevARGB; case GL_SHORT: case GL_UNSIGNED_SHORT: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3usARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4usARGB: + return (ATTRIB_LIST.colour.size == 3) ? _readVertexData3usARGB: + (ATTRIB_LIST.colour.size == 4) ? _readVertexData4usARGB: _readVertexData4usRevARGB; case GL_INT: case GL_UNSIGNED_INT: - return (ATTRIB_POINTERS.colour.size == 3) ? _readVertexData3uiARGB: - (ATTRIB_POINTERS.colour.size == 4) ? _readVertexData4uiARGB: + return (ATTRIB_LIST.colour.size == 3) ? _readVertexData3uiARGB: + (ATTRIB_LIST.colour.size == 4) ? _readVertexData4uiARGB: _readVertexData4uiRevARGB; } } static ReadAttributeFunc calcReadPositionFunc() { - switch(ATTRIB_POINTERS.vertex.type) { + switch(ATTRIB_LIST.vertex.type) { default: case GL_DOUBLE: case GL_FLOAT: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f3f: + return (ATTRIB_LIST.vertex.size == 3) ? _readVertexData3f3f: _readVertexData2f3f; case GL_BYTE: case GL_UNSIGNED_BYTE: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ub3f: + return (ATTRIB_LIST.vertex.size == 3) ? _readVertexData3ub3f: _readVertexData2ub3f; case GL_SHORT: case GL_UNSIGNED_SHORT: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3us3f: + return (ATTRIB_LIST.vertex.size == 3) ? _readVertexData3us3f: _readVertexData2us3f; case GL_INT: case GL_UNSIGNED_INT: - return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ui3f: + return (ATTRIB_LIST.vertex.size == 3) ? _readVertexData3ui3f: _readVertexData2ui3f; } } static ReadAttributeFunc calcReadUVFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { + if((ATTRIB_LIST.enabled & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { return _fillZero2f; } - switch(ATTRIB_POINTERS.uv.type) { + switch(ATTRIB_LIST.uv.type) { default: case GL_DOUBLE: case GL_FLOAT: @@ -332,11 +331,11 @@ static ReadAttributeFunc calcReadUVFunc() { } static ReadAttributeFunc calcReadSTFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { + if((ATTRIB_LIST.enabled & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { return _fillZero2f; } - switch(ATTRIB_POINTERS.st.type) { + switch(ATTRIB_LIST.st.type) { default: case GL_DOUBLE: case GL_FLOAT: @@ -354,11 +353,11 @@ static ReadAttributeFunc calcReadSTFunc() { } static ReadAttributeFunc calcReadNormalFunc() { - if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { + if((ATTRIB_LIST.enabled & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { return _fillWithNegZVE; } - switch(ATTRIB_POINTERS.normal.type) { + switch(ATTRIB_LIST.normal.type) { default: case GL_DOUBLE: case GL_FLOAT: @@ -387,33 +386,29 @@ void APIENTRY glEnableClientState(GLenum cap) { switch(cap) { case GL_VERTEX_ARRAY: - ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + ATTRIB_LIST.enabled |= VERTEX_ENABLED_FLAG; + ATTRIB_LIST.dirty |= VERTEX_ENABLED_FLAG; break; case GL_COLOR_ARRAY: - ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + ATTRIB_LIST.enabled |= DIFFUSE_ENABLED_FLAG; + ATTRIB_LIST.dirty |= DIFFUSE_ENABLED_FLAG; break; case GL_NORMAL_ARRAY: - ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; - ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + ATTRIB_LIST.enabled |= NORMAL_ENABLED_FLAG; + ATTRIB_LIST.dirty |= NORMAL_ENABLED_FLAG; break; case GL_TEXTURE_COORD_ARRAY: (ACTIVE_CLIENT_TEXTURE) ? - (ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG): - (ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG); + (ATTRIB_LIST.enabled |= ST_ENABLED_FLAG): + (ATTRIB_LIST.enabled |= UV_ENABLED_FLAG); - ATTRIB_POINTERS.uv_func = calcReadUVFunc(); - ATTRIB_POINTERS.st_func = calcReadSTFunc(); + (ACTIVE_CLIENT_TEXTURE) ? + (ATTRIB_LIST.dirty |= ST_ENABLED_FLAG): + (ATTRIB_LIST.dirty |= UV_ENABLED_FLAG); break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); } - - /* It's possible that we called glVertexPointer and friends before - * calling glEnableClientState, so we should recheck to make sure - * everything is in the right format with this new information */ - _glRecalcFastPath(); } void APIENTRY glDisableClientState(GLenum cap) { @@ -421,31 +416,29 @@ void APIENTRY glDisableClientState(GLenum cap) { switch(cap) { case GL_VERTEX_ARRAY: - ENABLED_VERTEX_ATTRIBUTES &= ~VERTEX_ENABLED_FLAG; - ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + ATTRIB_LIST.enabled &= ~VERTEX_ENABLED_FLAG; + ATTRIB_LIST.dirty |= VERTEX_ENABLED_FLAG; break; case GL_COLOR_ARRAY: - ENABLED_VERTEX_ATTRIBUTES &= ~DIFFUSE_ENABLED_FLAG; - ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + ATTRIB_LIST.enabled &= ~DIFFUSE_ENABLED_FLAG; + ATTRIB_LIST.dirty |= DIFFUSE_ENABLED_FLAG; break; case GL_NORMAL_ARRAY: - ENABLED_VERTEX_ATTRIBUTES &= ~NORMAL_ENABLED_FLAG; - ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + ATTRIB_LIST.enabled &= ~NORMAL_ENABLED_FLAG; + ATTRIB_LIST.dirty |= NORMAL_ENABLED_FLAG; break; case GL_TEXTURE_COORD_ARRAY: (ACTIVE_CLIENT_TEXTURE) ? - (ENABLED_VERTEX_ATTRIBUTES &= ~ST_ENABLED_FLAG): - (ENABLED_VERTEX_ATTRIBUTES &= ~UV_ENABLED_FLAG); + (ATTRIB_LIST.enabled &= ~ST_ENABLED_FLAG): + (ATTRIB_LIST.enabled &= ~UV_ENABLED_FLAG); - ATTRIB_POINTERS.uv_func = calcReadUVFunc(); - ATTRIB_POINTERS.st_func = calcReadSTFunc(); + (ACTIVE_CLIENT_TEXTURE) ? + (ATTRIB_LIST.dirty |= ST_ENABLED_FLAG): + (ATTRIB_LIST.dirty |= UV_ENABLED_FLAG); break; default: _glKosThrowError(GL_INVALID_ENUM, __func__); } - - /* State changed, recalculate */ - _glRecalcFastPath(); } @@ -453,7 +446,7 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const G TRACE(); stride = (stride) ? stride : size * byte_size(type); - AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &ATTRIB_POINTERS.uv : &ATTRIB_POINTERS.st; + AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &ATTRIB_LIST.uv : &ATTRIB_LIST.st; tointer->ptr = pointer; if(_glStateUnchanged(tointer, size, type, stride)) return; @@ -467,51 +460,49 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const G tointer->type = type; tointer->size = size; - ATTRIB_POINTERS.uv_func = calcReadUVFunc(); - ATTRIB_POINTERS.st_func = calcReadSTFunc(); - _glRecalcFastPath(); + (ACTIVE_CLIENT_TEXTURE) ? + (ATTRIB_LIST.dirty |= ST_ENABLED_FLAG): + (ATTRIB_LIST.dirty |= UV_ENABLED_FLAG); } void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { TRACE(); stride = (stride) ? stride : (size * byte_size(type)); - ATTRIB_POINTERS.vertex.ptr = pointer; + ATTRIB_LIST.vertex.ptr = pointer; - if(_glStateUnchanged(&ATTRIB_POINTERS.vertex, size, type, stride)) return; + if(_glStateUnchanged(&ATTRIB_LIST.vertex, size, type, stride)) return; if(size < 2 || size > 4) { _glKosThrowError(GL_INVALID_VALUE, __func__); return; } - ATTRIB_POINTERS.vertex.stride = stride; - ATTRIB_POINTERS.vertex.type = type; - ATTRIB_POINTERS.vertex.size = size; - ATTRIB_POINTERS.vertex_func = calcReadPositionFunc(); + ATTRIB_LIST.vertex.stride = stride; + ATTRIB_LIST.vertex.type = type; + ATTRIB_LIST.vertex.size = size; - _glRecalcFastPath(); + ATTRIB_LIST.dirty |= VERTEX_ENABLED_FLAG; } void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { TRACE(); stride = (stride) ? stride : ((size == GL_BGRA) ? 4 : size) * byte_size(type); - ATTRIB_POINTERS.colour.ptr = pointer; + ATTRIB_LIST.colour.ptr = pointer; - if(_glStateUnchanged(&ATTRIB_POINTERS.colour, size, type, stride)) return; + if(_glStateUnchanged(&ATTRIB_LIST.colour, size, type, stride)) return; if(size != 3 && size != 4 && size != GL_BGRA) { _glKosThrowError(GL_INVALID_VALUE, __func__); return; } - ATTRIB_POINTERS.colour.type = type; - ATTRIB_POINTERS.colour.size = size; - ATTRIB_POINTERS.colour.stride = stride; - ATTRIB_POINTERS.colour_func = calcReadDiffuseFunc(); + ATTRIB_LIST.colour.type = type; + ATTRIB_LIST.colour.size = size; + ATTRIB_LIST.colour.stride = stride; - _glRecalcFastPath(); + ATTRIB_LIST.dirty |= DIFFUSE_ENABLED_FLAG; } void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) { @@ -528,29 +519,102 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin 0 }; - stride = (stride) ? stride : ATTRIB_POINTERS.normal.size * byte_size(type); - ATTRIB_POINTERS.normal.ptr = pointer; + stride = (stride) ? stride : ATTRIB_LIST.normal.size * byte_size(type); + ATTRIB_LIST.normal.ptr = pointer; - if(_glStateUnchanged(&ATTRIB_POINTERS.normal, 3, type, stride)) return; + if(_glStateUnchanged(&ATTRIB_LIST.normal, 3, type, stride)) return; if(_glCheckValidEnum(type, validTypes, __func__) != 0) { return; } - ATTRIB_POINTERS.normal.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3; - ATTRIB_POINTERS.normal.stride = stride; - ATTRIB_POINTERS.normal.type = type; - ATTRIB_POINTERS.normal_func = calcReadNormalFunc(); + ATTRIB_LIST.normal.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3; + ATTRIB_LIST.normal.stride = stride; + ATTRIB_LIST.normal.type = type; - _glRecalcFastPath(); + ATTRIB_LIST.dirty |= NORMAL_ENABLED_FLAG; } void _glInitAttributePointers() { TRACE(); + ATTRIB_LIST.dirty = ~0; // all attributes dirty - glVertexPointer(4, GL_FLOAT, 0, NULL); + glVertexPointer(3, GL_FLOAT, 0, NULL); glTexCoordPointer(2, GL_FLOAT, 0, NULL); glColorPointer(4, GL_FLOAT, 0, NULL); glNormalPointer(GL_FLOAT, 0, NULL); } + +GL_FORCE_INLINE GLuint _glIsVertexDataFastPathCompatible() { + /* The fast path is enabled when all enabled elements of the vertex + * match the output format. This means: + * + * xyz == 3f + * uv == 2f + * rgba == argb4444 + * st == 2f + * normal == 3f + * + * When this happens we do inline straight copies of the enabled data + * and transforms for positions and normals happen while copying. + */ + + if((ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG)) { + if(ATTRIB_LIST.vertex.size != 3 || ATTRIB_LIST.vertex.type != GL_FLOAT) { + return GL_FALSE; + } + } + + if((ATTRIB_LIST.enabled & UV_ENABLED_FLAG)) { + if(ATTRIB_LIST.uv.size != 2 || ATTRIB_LIST.uv.type != GL_FLOAT) { + return GL_FALSE; + } + } + + if((ATTRIB_LIST.enabled & DIFFUSE_ENABLED_FLAG)) { + /* FIXME: Shouldn't this be a reversed format? */ + if(ATTRIB_LIST.colour.size != GL_BGRA || ATTRIB_LIST.colour.type != GL_UNSIGNED_BYTE) { + return GL_FALSE; + } + } + + if((ATTRIB_LIST.enabled & ST_ENABLED_FLAG)) { + if(ATTRIB_LIST.st.size != 2 || ATTRIB_LIST.st.type != GL_FLOAT) { + return GL_FALSE; + } + } + + if((ATTRIB_LIST.enabled & NORMAL_ENABLED_FLAG)) { + if(ATTRIB_LIST.normal.size != 3 || ATTRIB_LIST.normal.type != GL_FLOAT) { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +void _glUpdateAttributes() { + if(ATTRIB_LIST.dirty & VERTEX_ENABLED_FLAG) { + ATTRIB_LIST.vertex_func = calcReadPositionFunc(); + } + + if(ATTRIB_LIST.dirty & UV_ENABLED_FLAG) { + ATTRIB_LIST.uv_func = calcReadUVFunc(); + } + + if(ATTRIB_LIST.dirty & DIFFUSE_ENABLED_FLAG) { + ATTRIB_LIST.colour_func = calcReadDiffuseFunc(); + } + + if(ATTRIB_LIST.dirty & ST_ENABLED_FLAG) { + ATTRIB_LIST.st_func = calcReadSTFunc(); + } + + if(ATTRIB_LIST.dirty & NORMAL_ENABLED_FLAG) { + ATTRIB_LIST.normal_func = calcReadNormalFunc(); + } + + ATTRIB_LIST.fast_path = _glIsVertexDataFastPathCompatible(); + ATTRIB_LIST.dirty = 0; +} diff --git a/GL/draw.c b/GL/draw.c index 514d03c..98b5cfd 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -8,7 +8,6 @@ #include "private.h" #include "platform.h" -GLuint FAST_PATH_ENABLED = GL_FALSE; GLubyte ACTIVE_CLIENT_TEXTURE; extern GLboolean AUTOSORT_ENABLED; @@ -286,9 +285,9 @@ static GL_NO_INLINE void genLineLoop(Vertex* output, GLuint count) { } static void _readPositionData(const GLuint first, const GLuint count, Vertex* it) { - const ReadAttributeFunc func = ATTRIB_POINTERS.vertex_func; - const GLsizei vstride = ATTRIB_POINTERS.vertex.stride; - const GLubyte* vptr = ((GLubyte*) ATTRIB_POINTERS.vertex.ptr + (first * vstride)); + const ReadAttributeFunc func = ATTRIB_LIST.vertex_func; + const GLsizei vstride = ATTRIB_LIST.vertex.stride; + const GLubyte* vptr = ((GLubyte*) ATTRIB_LIST.vertex.ptr + (first * vstride)); float pos[3], w = 1.0f; @@ -304,9 +303,9 @@ static void _readPositionData(const GLuint first, const GLuint count, Vertex* it } static void _readUVData(const GLuint first, const GLuint count, Vertex* it) { - const ReadAttributeFunc func = ATTRIB_POINTERS.uv_func; - const GLsizei uvstride = ATTRIB_POINTERS.uv.stride; - const GLubyte* uvptr = ((GLubyte*) ATTRIB_POINTERS.uv.ptr + (first * uvstride)); + const ReadAttributeFunc func = ATTRIB_LIST.uv_func; + const GLsizei uvstride = ATTRIB_LIST.uv.stride; + const GLubyte* uvptr = ((GLubyte*) ATTRIB_LIST.uv.ptr + (first * uvstride)); ITERATE(count) { PREFETCH(uvptr + uvstride); @@ -318,9 +317,9 @@ static void _readUVData(const GLuint first, const GLuint count, Vertex* it) { } static void _readSTData(const GLuint first, const GLuint count, VertexExtra* it) { - const ReadAttributeFunc func = ATTRIB_POINTERS.st_func; - const GLsizei ststride = ATTRIB_POINTERS.st.stride; - const GLubyte* stptr = ((GLubyte*) ATTRIB_POINTERS.st.ptr + (first * ststride)); + const ReadAttributeFunc func = ATTRIB_LIST.st_func; + const GLsizei ststride = ATTRIB_LIST.st.stride; + const GLubyte* stptr = ((GLubyte*) ATTRIB_LIST.st.ptr + (first * ststride)); ITERATE(count) { PREFETCH(stptr + ststride); @@ -331,9 +330,9 @@ static void _readSTData(const GLuint first, const GLuint count, VertexExtra* it) } static void _readNormalData(const GLuint first, const GLuint count, VertexExtra* it) { - const ReadAttributeFunc func = ATTRIB_POINTERS.normal_func; - const GLsizei nstride = ATTRIB_POINTERS.normal.stride; - const GLubyte* nptr = ((GLubyte*) ATTRIB_POINTERS.normal.ptr + (first * nstride)); + const ReadAttributeFunc func = ATTRIB_LIST.normal_func; + const GLsizei nstride = ATTRIB_LIST.normal.stride; + const GLubyte* nptr = ((GLubyte*) ATTRIB_LIST.normal.ptr + (first * nstride)); ITERATE(count) { func(nptr, (GLubyte*) it->nxyz); @@ -356,9 +355,9 @@ static void _readNormalData(const GLuint first, const GLuint count, VertexExtra* } static void _readDiffuseData(const GLuint first, const GLuint count, Vertex* it) { - const ReadAttributeFunc func = ATTRIB_POINTERS.colour_func; - const GLuint cstride = ATTRIB_POINTERS.colour.stride; - const GLubyte* cptr = ((GLubyte*) ATTRIB_POINTERS.colour.ptr) + (first * cstride); + const ReadAttributeFunc func = ATTRIB_LIST.colour_func; + const GLuint cstride = ATTRIB_LIST.colour.stride; + const GLubyte* cptr = ((GLubyte*) ATTRIB_LIST.colour.ptr) + (first * cstride); ITERATE(count) { PREFETCH(cptr + cstride); @@ -388,29 +387,29 @@ static void generateElements( uint32_t i = first; uint32_t idx = 0; - const ReadAttributeFunc pos_func = ATTRIB_POINTERS.vertex_func; - const GLsizei vstride = ATTRIB_POINTERS.vertex.stride; + const ReadAttributeFunc pos_func = ATTRIB_LIST.vertex_func; + const GLsizei vstride = ATTRIB_LIST.vertex.stride; - const ReadAttributeFunc uv_func = ATTRIB_POINTERS.uv_func; - const GLuint uvstride = ATTRIB_POINTERS.uv.stride; + const ReadAttributeFunc uv_func = ATTRIB_LIST.uv_func; + const GLuint uvstride = ATTRIB_LIST.uv.stride; - const ReadAttributeFunc st_func = ATTRIB_POINTERS.st_func; - const GLuint ststride = ATTRIB_POINTERS.st.stride; + const ReadAttributeFunc st_func = ATTRIB_LIST.st_func; + const GLuint ststride = ATTRIB_LIST.st.stride; - const ReadAttributeFunc diffuse_func = ATTRIB_POINTERS.colour_func; - const GLuint dstride = ATTRIB_POINTERS.colour.stride; + const ReadAttributeFunc diffuse_func = ATTRIB_LIST.colour_func; + const GLuint dstride = ATTRIB_LIST.colour.stride; - const ReadAttributeFunc normal_func = ATTRIB_POINTERS.normal_func; - const GLuint nstride = ATTRIB_POINTERS.normal.stride; + const ReadAttributeFunc normal_func = ATTRIB_LIST.normal_func; + const GLuint nstride = ATTRIB_LIST.normal.stride; for(; i < first + count; ++i) { idx = IndexFunc(indices + (i * istride)); - xyz = (GLubyte*) ATTRIB_POINTERS.vertex.ptr + (idx * vstride); - uv = (GLubyte*) ATTRIB_POINTERS.uv.ptr + (idx * uvstride); - bgra = (GLubyte*) ATTRIB_POINTERS.colour.ptr + (idx * dstride); - st = (GLubyte*) ATTRIB_POINTERS.st.ptr + (idx * ststride); - nxyz = (GLubyte*) ATTRIB_POINTERS.normal.ptr + (idx * nstride); + xyz = (GLubyte*) ATTRIB_LIST.vertex.ptr + (idx * vstride); + uv = (GLubyte*) ATTRIB_LIST.uv.ptr + (idx * uvstride); + bgra = (GLubyte*) ATTRIB_LIST.colour.ptr + (idx * dstride); + st = (GLubyte*) ATTRIB_LIST.st.ptr + (idx * ststride); + nxyz = (GLubyte*) ATTRIB_LIST.normal.ptr + (idx * nstride); pos_func(xyz, (GLubyte*) pos); TransformVertex((const float*) pos, &w, output->xyz, &output->w); @@ -442,21 +441,21 @@ static void generateElementsFastPath( Vertex* start = _glSubmissionTargetStart(target); - const GLuint vstride = ATTRIB_POINTERS.vertex.stride; - const GLuint uvstride = ATTRIB_POINTERS.uv.stride; - const GLuint ststride = ATTRIB_POINTERS.st.stride; - const GLuint dstride = ATTRIB_POINTERS.colour.stride; - const GLuint nstride = ATTRIB_POINTERS.normal.stride; + const GLuint vstride = ATTRIB_LIST.vertex.stride; + const GLuint uvstride = ATTRIB_LIST.uv.stride; + const GLuint ststride = ATTRIB_LIST.st.stride; + const GLuint dstride = ATTRIB_LIST.colour.stride; + const GLuint nstride = ATTRIB_LIST.normal.stride; const GLsizei istride = index_size(type); const IndexParseFunc IndexFunc = _calcParseIndexFunc(type); /* Copy the pos, uv and color directly in one go */ - const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? ATTRIB_POINTERS.vertex.ptr : NULL; - const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr : NULL; - const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr : NULL; - const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr : NULL; - const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr : NULL; + const GLubyte* pos = (ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG) ? ATTRIB_LIST.vertex.ptr : NULL; + const GLubyte* uv = (ATTRIB_LIST.enabled & UV_ENABLED_FLAG) ? ATTRIB_LIST.uv.ptr : NULL; + const GLubyte* col = (ATTRIB_LIST.enabled & DIFFUSE_ENABLED_FLAG) ? ATTRIB_LIST.colour.ptr : NULL; + const GLubyte* st = (ATTRIB_LIST.enabled & ST_ENABLED_FLAG) ? ATTRIB_LIST.st.ptr : NULL; + const GLubyte* n = (ATTRIB_LIST.enabled & NORMAL_ENABLED_FLAG) ? ATTRIB_LIST.normal.ptr : NULL; VertexExtra* ve = aligned_vector_at(target->extras, 0); Vertex* it = start; @@ -472,32 +471,32 @@ static void generateElementsFastPath( it->flags = GPU_CMD_VERTEX; - pos = (GLubyte*) ATTRIB_POINTERS.vertex.ptr + (idx * vstride); + pos = (GLubyte*) ATTRIB_LIST.vertex.ptr + (idx * vstride); TransformVertex((const float*) pos, &w, it->xyz, &it->w); if(uv) { - uv = (GLubyte*) ATTRIB_POINTERS.uv.ptr + (idx * uvstride); + uv = (GLubyte*) ATTRIB_LIST.uv.ptr + (idx * uvstride); MEMCPY4(it->uv, uv, sizeof(float) * 2); } else { *((Float2*) it->uv) = F2ZERO; } if(col) { - col = (GLubyte*) ATTRIB_POINTERS.colour.ptr + (idx * dstride); + col = (GLubyte*) ATTRIB_LIST.colour.ptr + (idx * dstride); MEMCPY4(it->bgra, col, sizeof(uint32_t)); } else { *((uint32_t*) it->bgra) = ~0; } if(st) { - st = (GLubyte*) ATTRIB_POINTERS.st.ptr + (idx * ststride); + st = (GLubyte*) ATTRIB_LIST.st.ptr + (idx * ststride); MEMCPY4(ve->st, st, sizeof(float) * 2); } else { *((Float2*) ve->st) = F2ZERO; } if(n) { - n = (GLubyte*) ATTRIB_POINTERS.normal.ptr + (idx * nstride); + n = (GLubyte*) ATTRIB_LIST.normal.ptr + (idx * nstride); MEMCPY4(ve->nxyz, n, sizeof(float) * 3); } else { *((Float3*) ve->nxyz) = F3Z; @@ -558,7 +557,7 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei /* Read from the client buffers and generate an array of ClipVertices */ TRACE(); - if(FAST_PATH_ENABLED) { + if(ATTRIB_LIST.fast_path) { if(indices) { generateElementsFastPath(target, first, count, indices, type); } else { @@ -834,7 +833,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL TRACE(); /* Do nothing if vertices aren't enabled */ - if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) return; + if(!(ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG)) return; + if(ATTRIB_LIST.dirty) _glUpdateAttributes(); /* No vertices? Do nothing */ if(!count) return; @@ -924,7 +924,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL // TextureObject* texture1 = _glGetTexture1(); // /* Multitexture implicitly disabled */ - // if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { + // if(!texture1 || ((ATTRIB_LIST.enabled & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { // /* Multitexture actively disabled */ // return; // } diff --git a/GL/draw_fastpath.inc b/GL/draw_fastpath.inc index 8ea3514..b26e6a9 100644 --- a/GL/draw_fastpath.inc +++ b/GL/draw_fastpath.inc @@ -6,7 +6,7 @@ MAKE_FUNC(POLYMODE) { static const float w = 1.0f; - if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { + if(!(ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG)) { /* If we don't have vertices, do nothing */ return; } @@ -29,8 +29,8 @@ MAKE_FUNC(POLYMODE) const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE; const int offset = (first + min); - stride = ATTRIB_POINTERS.uv.stride; - ptr = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + ((first + min) * stride) : NULL; + stride = ATTRIB_LIST.uv.stride; + ptr = (ATTRIB_LIST.enabled & UV_ENABLED_FLAG) ? ATTRIB_LIST.uv.ptr + ((first + min) * stride) : NULL; it = (Vertex*) start; if(ptr) { @@ -48,8 +48,8 @@ MAKE_FUNC(POLYMODE) } } - stride = ATTRIB_POINTERS.colour.stride; - ptr = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (offset * stride) : NULL; + stride = ATTRIB_LIST.colour.stride; + ptr = (ATTRIB_LIST.enabled & DIFFUSE_ENABLED_FLAG) ? ATTRIB_LIST.colour.ptr + (offset * stride) : NULL; it = (Vertex*) start; if(ptr) { @@ -68,8 +68,8 @@ MAKE_FUNC(POLYMODE) } } - stride = ATTRIB_POINTERS.vertex.stride; - ptr = ATTRIB_POINTERS.vertex.ptr + (offset * stride); + stride = ATTRIB_LIST.vertex.stride; + ptr = ATTRIB_LIST.vertex.ptr + (offset * stride); it = (Vertex*) start; PREFETCH(ptr); @@ -82,8 +82,8 @@ MAKE_FUNC(POLYMODE) start = aligned_vector_at(target->extras, min); - stride = ATTRIB_POINTERS.st.stride; - ptr = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (offset * stride) : NULL; + stride = ATTRIB_LIST.st.stride; + ptr = (ATTRIB_LIST.enabled & ST_ENABLED_FLAG) ? ATTRIB_LIST.st.ptr + (offset * stride) : NULL; ve = (VertexExtra*) start; if(ptr) { @@ -102,8 +102,8 @@ MAKE_FUNC(POLYMODE) } } - stride = ATTRIB_POINTERS.normal.stride; - ptr = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (offset * stride) : NULL; + stride = ATTRIB_LIST.normal.stride; + ptr = (ATTRIB_LIST.enabled & NORMAL_ENABLED_FLAG) ? ATTRIB_LIST.normal.ptr + (offset * stride) : NULL; ve = (VertexExtra*) start; if(ptr) { diff --git a/GL/immediate.c b/GL/immediate.c index df17635..971d92d 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -12,8 +12,6 @@ #include "private.h" -extern inline GLuint _glRecalcFastPath(); - GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; @@ -49,6 +47,7 @@ typedef struct __attribute__((aligned(32))) { void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(IMVertex)); aligned_vector_reserve(&VERTICES, initial_size); + IM_ATTRIBS.fast_path = GL_TRUE; IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES); IM_ATTRIBS.vertex.size = 3; @@ -87,7 +86,7 @@ void APIENTRY glBegin(GLenum mode) { } void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[A8IDX] = (GLubyte)(a * 255.0f); COLOR[R8IDX] = (GLubyte)(r * 255.0f); @@ -96,7 +95,7 @@ void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { } void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[A8IDX] = a; COLOR[R8IDX] = r; @@ -105,7 +104,7 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { } void APIENTRY glColor4ubv(const GLubyte *v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[A8IDX] = v[3]; COLOR[R8IDX] = v[0]; @@ -114,7 +113,7 @@ void APIENTRY glColor4ubv(const GLubyte *v) { } void APIENTRY glColor4fv(const GLfloat* v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[B8IDX] = (GLubyte)(v[2] * 255); COLOR[G8IDX] = (GLubyte)(v[1] * 255); @@ -123,7 +122,7 @@ void APIENTRY glColor4fv(const GLfloat* v) { } void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[B8IDX] = (GLubyte)(b * 255.0f); COLOR[G8IDX] = (GLubyte)(g * 255.0f); @@ -132,7 +131,7 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { } void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[A8IDX] = 255; COLOR[R8IDX] = red; @@ -141,7 +140,7 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { } void APIENTRY glColor3ubv(const GLubyte *v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[A8IDX] = 255; COLOR[R8IDX] = v[0]; @@ -150,7 +149,7 @@ void APIENTRY glColor3ubv(const GLubyte *v) { } void APIENTRY glColor3fv(const GLfloat* v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; + IM_ATTRIBS.enabled |= DIFFUSE_ENABLED_FLAG; COLOR[A8IDX] = 255; COLOR[R8IDX] = (GLubyte)(v[0] * 255); @@ -167,7 +166,7 @@ typedef union punned { } punned_t; void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { - IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; + IM_ATTRIBS.enabled |= VERTEX_ENABLED_FLAG; IMVertex* vert = aligned_vector_extend(&VERTICES, 1); @@ -208,11 +207,11 @@ void APIENTRY glVertex4fv(const GLfloat* v) { void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) { if(target == GL_TEXTURE0) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; + IM_ATTRIBS.enabled |= UV_ENABLED_FLAG; UV_COORD[0] = s; UV_COORD[1] = t; } else if(target == GL_TEXTURE1) { - IM_ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG; + IM_ATTRIBS.enabled |= ST_ENABLED_FLAG; ST_COORD[0] = s; ST_COORD[1] = t; } else { @@ -222,7 +221,7 @@ void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) { } void APIENTRY glTexCoord1f(GLfloat u) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; + IM_ATTRIBS.enabled |= UV_ENABLED_FLAG; UV_COORD[0] = u; UV_COORD[1] = 0.0f; } @@ -232,7 +231,7 @@ void APIENTRY glTexCoord1fv(const GLfloat* v) { } void APIENTRY glTexCoord2f(GLfloat u, GLfloat v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; + IM_ATTRIBS.enabled |= UV_ENABLED_FLAG; UV_COORD[0] = u; UV_COORD[1] = v; } @@ -242,7 +241,7 @@ void APIENTRY glTexCoord2fv(const GLfloat* v) { } void APIENTRY glNormal3f(GLfloat x, GLfloat y, GLfloat z) { - IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; + IM_ATTRIBS.enabled |= NORMAL_ENABLED_FLAG; NORMAL[0] = x; NORMAL[1] = y; NORMAL[2] = z; @@ -262,38 +261,15 @@ void APIENTRY glEnd() { IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; - GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES; - - /* Redirect attrib pointers */ - AttribPointerList stashed_attrib_pointers = ATTRIB_POINTERS; - ATTRIB_POINTERS = IM_ATTRIBS; - - GLuint prevAttrs = *attrs; - - *attrs = IM_ENABLED_VERTEX_ATTRIBUTES; - - /* Store the fast path enabled setting so we can restore it - * after drawing */ - const GLboolean fp_was_enabled = FAST_PATH_ENABLED; - -#ifndef NDEBUG - // Immediate mode should always activate the fast path - GLuint fastPathEnabled = _glRecalcFastPath(); - gl_assert(fastPathEnabled); -#else - /* If we're not debugging, set to true - we assume we haven't broken it! */ - FAST_PATH_ENABLED = GL_TRUE; -#endif - + /* Redirect attrib state */ + AttribPointerList stashed_state = ATTRIB_LIST; + ATTRIB_LIST = IM_ATTRIBS; + glDrawArrays(ACTIVE_POLYGON_MODE, 0, aligned_vector_header(&VERTICES)->size); - ATTRIB_POINTERS = stashed_attrib_pointers; - - *attrs = prevAttrs; + ATTRIB_LIST = stashed_state; aligned_vector_clear(&VERTICES); - - FAST_PATH_ENABLED = fp_was_enabled; } void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) { diff --git a/GL/private.h b/GL/private.h index bf13e0b..981bff0 100644 --- a/GL/private.h +++ b/GL/private.h @@ -355,6 +355,10 @@ typedef struct { AttribPointer st; // 64 AttribPointer normal; // 80 + GLuint enabled; // list of currently enabled/used attributes + GLuint dirty; // list of attributes that need state recalculating + GLboolean fast_path; + ReadAttributeFunc vertex_func; ReadAttributeFunc colour_func; ReadAttributeFunc uv_func; @@ -362,12 +366,12 @@ typedef struct { ReadAttributeFunc normal_func; } AttribPointerList; -extern GLuint ENABLED_VERTEX_ATTRIBUTES; -extern AttribPointerList ATTRIB_POINTERS; +extern AttribPointerList ATTRIB_LIST; GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func); GLuint* _glGetEnabledAttributes(); +GL_NO_INLINE void _glUpdateAttributes(); GLenum _glGetShadeModel(); TextureObject* _glGetTexture0(); @@ -427,63 +431,6 @@ GLboolean _glIsColorMaterialEnabled(); GLboolean _glIsNormalizeEnabled(); -extern GLuint FAST_PATH_ENABLED; - -GL_FORCE_INLINE GLuint _glIsVertexDataFastPathCompatible() { - /* The fast path is enabled when all enabled elements of the vertex - * match the output format. This means: - * - * xyz == 3f - * uv == 2f - * rgba == argb4444 - * st == 2f - * normal == 3f - * - * When this happens we do inline straight copies of the enabled data - * and transforms for positions and normals happen while copying. - */ - - - - if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { - if(ATTRIB_POINTERS.vertex.size != 3 || ATTRIB_POINTERS.vertex.type != GL_FLOAT) { - return GL_FALSE; - } - } - - if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) { - if(ATTRIB_POINTERS.uv.size != 2 || ATTRIB_POINTERS.uv.type != GL_FLOAT) { - return GL_FALSE; - } - } - - if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) { - /* FIXME: Shouldn't this be a reversed format? */ - if(ATTRIB_POINTERS.colour.size != GL_BGRA || ATTRIB_POINTERS.colour.type != GL_UNSIGNED_BYTE) { - return GL_FALSE; - } - } - - if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) { - if(ATTRIB_POINTERS.st.size != 2 || ATTRIB_POINTERS.st.type != GL_FLOAT) { - return GL_FALSE; - } - } - - if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) { - if(ATTRIB_POINTERS.normal.size != 3 || ATTRIB_POINTERS.normal.type != GL_FLOAT) { - return GL_FALSE; - } - } - - return GL_TRUE; -} - -GL_FORCE_INLINE GLuint _glRecalcFastPath() { - FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible(); - return FAST_PATH_ENABLED; -} - extern GLboolean IMMEDIATE_MODE_ACTIVE; extern GLenum LAST_ERROR;