From 2547459ef3f0be5bed1874d45d867c4af7d820a4 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 21 Apr 2021 09:11:51 +0100 Subject: [PATCH] Refactor fast path --- GL/draw.c | 124 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 780197d..7350324 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -53,39 +53,49 @@ void _glInitAttributePointers() { } GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() { - /* - * We provide a "fast path" if vertex data is provided in - * exactly the right format that matches what the PVR can handle. - * This function returns true if all the requirements are met. + /* The fast path is enabled when all enabled elements of the vertex + * match the output format. This means: + * + * xyz == 3f + * uv == 2f + * rgba == argb4444 + * st == 2f + * normal == 3f + * + * When this happens we do inline straight copies of the enabled data + * and transforms for positions and normals happen while copying. */ - /* - * At least these attributes need to be enabled, because we're not going to do any checking - * in the loop - */ - if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) != VERTEX_ENABLED_FLAG) return GL_FALSE; - if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) return GL_FALSE; - if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { + if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) { + return GL_FALSE; + } + } - // All 3 attribute types must have a stride of 32 - if(VERTEX_POINTER.stride != 32) return GL_FALSE; - if(UV_POINTER.stride != 32) return GL_FALSE; - if(DIFFUSE_POINTER.stride != 32) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) { + if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) { + return GL_FALSE; + } + } - // UV must follow vertex, diffuse must follow UV - if((UV_POINTER.ptr - VERTEX_POINTER.ptr) != sizeof(GLfloat) * 3) return GL_FALSE; - if((DIFFUSE_POINTER.ptr - UV_POINTER.ptr) != sizeof(GLfloat) * 2) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) { + /* FIXME: Shouldn't this be a reversed format? */ + if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) { + return GL_FALSE; + } + } - if(VERTEX_POINTER.type != GL_FLOAT) return GL_FALSE; - if(VERTEX_POINTER.size != 3) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) { + if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) { + return GL_FALSE; + } + } - if(UV_POINTER.type != GL_FLOAT) return GL_FALSE; - if(UV_POINTER.size != 2) return GL_FALSE; - - if(DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) return GL_FALSE; - - /* BGRA is the required color order */ - if(DIFFUSE_POINTER.size != GL_BGRA) return GL_FALSE; + if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) { + if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) { + return GL_FALSE; + } + } return GL_TRUE; } @@ -767,27 +777,61 @@ static void generateElements( } } -static const uint32_t FAST_PATH_BYTE_SIZE = (sizeof(GLfloat) * 3) + (sizeof(GLfloat) * 2) + (sizeof(GLubyte) * 4); - static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) { Vertex* start = _glSubmissionTargetStart(target); + + const GLuint vstride = (VERTEX_POINTER.stride) ? + VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); + + const GLuint uvstride = (UV_POINTER.stride) ? + UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); + + const GLuint ststride = (ST_POINTER.stride) ? + ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type); + + const GLuint dstride = (DIFFUSE_POINTER.stride) ? + DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type); + + const GLuint nstride = (NORMAL_POINTER.stride) ? + NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); + + /* Copy the pos, uv and color directly in one go */ - const GLubyte* pos = VERTEX_POINTER.ptr; + const GLubyte* pos = VERTEX_POINTER.ptr + (first * vstride); + const GLubyte* uv = UV_POINTER.ptr + (first * uvstride); + const GLubyte* col = DIFFUSE_POINTER.ptr + (first * dstride); + const GLubyte* st = ST_POINTER.ptr + (first * ststride); + const GLubyte* n = NORMAL_POINTER.ptr + (first * nstride); + + typedef struct { + float x, y, z; + } V3; + + typedef struct { + float u, v; + } V2; + + VertexExtra* ve = aligned_vector_at(target->extras, 0); Vertex* it = start; ITERATE(count) { it->flags = GPU_CMD_VERTEX; - MEMCPY4(it->xyz, pos, FAST_PATH_BYTE_SIZE); + + *((V3*) it->xyz) = *((V3*) pos); + *((V2*) it->uv) = *((V2*) uv); + *((uint32_t*) it->bgra) = *((uint32_t*) col); + + *((V2*) ve->st) = *((V2*) st); + *((V3*) ve->nxyz) = *((V3*) n); + it++; - pos += VERTEX_POINTER.stride; + ve++; + + pos += vstride; + uv += uvstride; + col += dstride; + st += ststride; + n += nstride; } - - VertexExtra* ve = aligned_vector_at(target->extras, 0); - - ReadNormalFunc nfunc = calcReadNormalFunc(); - ReadUVFunc stfunc = calcReadSTFunc(); - - _readNormalData(nfunc, first, count, ve); - _readSTData(stfunc, first, count, ve); } static void generateArrays(SubmissionTarget* target, const GLsizei first, const GLuint count) {