Refactor fast path

This commit is contained in:
Luke Benstead 2021-04-21 09:11:51 +01:00
parent bfca6fd8b6
commit 2547459ef3

124
GL/draw.c
View File

@ -53,39 +53,49 @@ void _glInitAttributePointers() {
}
GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() {
/*
* We provide a "fast path" if vertex data is provided in
* exactly the right format that matches what the PVR can handle.
* This function returns true if all the requirements are met.
/* The fast path is enabled when all enabled elements of the vertex
* match the output format. This means:
*
* xyz == 3f
* uv == 2f
* rgba == argb4444
* st == 2f
* normal == 3f
*
* When this happens we do inline straight copies of the enabled data
* and transforms for positions and normals happen while copying.
*/
/*
* At least these attributes need to be enabled, because we're not going to do any checking
* in the loop
*/
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) != VERTEX_ENABLED_FLAG) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
// All 3 attribute types must have a stride of 32
if(VERTEX_POINTER.stride != 32) return GL_FALSE;
if(UV_POINTER.stride != 32) return GL_FALSE;
if(DIFFUSE_POINTER.stride != 32) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) {
if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
// UV must follow vertex, diffuse must follow UV
if((UV_POINTER.ptr - VERTEX_POINTER.ptr) != sizeof(GLfloat) * 3) return GL_FALSE;
if((DIFFUSE_POINTER.ptr - UV_POINTER.ptr) != sizeof(GLfloat) * 2) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) {
/* FIXME: Shouldn't this be a reversed format? */
if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) {
return GL_FALSE;
}
}
if(VERTEX_POINTER.type != GL_FLOAT) return GL_FALSE;
if(VERTEX_POINTER.size != 3) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) {
if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if(UV_POINTER.type != GL_FLOAT) return GL_FALSE;
if(UV_POINTER.size != 2) return GL_FALSE;
if(DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) return GL_FALSE;
/* BGRA is the required color order */
if(DIFFUSE_POINTER.size != GL_BGRA) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) {
if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
return GL_TRUE;
}
@ -767,27 +777,61 @@ static void generateElements(
}
}
static const uint32_t FAST_PATH_BYTE_SIZE = (sizeof(GLfloat) * 3) + (sizeof(GLfloat) * 2) + (sizeof(GLubyte) * 4);
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
Vertex* start = _glSubmissionTargetStart(target);
const GLuint vstride = (VERTEX_POINTER.stride) ?
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
const GLuint uvstride = (UV_POINTER.stride) ?
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
const GLuint ststride = (ST_POINTER.stride) ?
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ?
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
/* Copy the pos, uv and color directly in one go */
const GLubyte* pos = VERTEX_POINTER.ptr;
const GLubyte* pos = VERTEX_POINTER.ptr + (first * vstride);
const GLubyte* uv = UV_POINTER.ptr + (first * uvstride);
const GLubyte* col = DIFFUSE_POINTER.ptr + (first * dstride);
const GLubyte* st = ST_POINTER.ptr + (first * ststride);
const GLubyte* n = NORMAL_POINTER.ptr + (first * nstride);
typedef struct {
float x, y, z;
} V3;
typedef struct {
float u, v;
} V2;
VertexExtra* ve = aligned_vector_at(target->extras, 0);
Vertex* it = start;
ITERATE(count) {
it->flags = GPU_CMD_VERTEX;
MEMCPY4(it->xyz, pos, FAST_PATH_BYTE_SIZE);
*((V3*) it->xyz) = *((V3*) pos);
*((V2*) it->uv) = *((V2*) uv);
*((uint32_t*) it->bgra) = *((uint32_t*) col);
*((V2*) ve->st) = *((V2*) st);
*((V3*) ve->nxyz) = *((V3*) n);
it++;
pos += VERTEX_POINTER.stride;
ve++;
pos += vstride;
uv += uvstride;
col += dstride;
st += ststride;
n += nstride;
}
VertexExtra* ve = aligned_vector_at(target->extras, 0);
ReadNormalFunc nfunc = calcReadNormalFunc();
ReadUVFunc stfunc = calcReadSTFunc();
_readNormalData(nfunc, first, count, ve);
_readSTData(stfunc, first, count, ve);
}
static void generateArrays(SubmissionTarget* target, const GLsizei first, const GLuint count) {