Add fast path for glDrawElements

This commit is contained in:
Luke Benstead 2021-04-22 17:48:15 +01:00
parent bb53dad90d
commit 28c14fdb0e
3 changed files with 119 additions and 7 deletions

120
GL/draw.c
View File

@ -777,6 +777,99 @@ static void generateElements(
}
}
typedef struct {
float x, y, z;
} Float3;
typedef struct {
float u, v;
} Float2;
static const Float3 F3Z = {0.0f, 0.0f, 1.0f};
static const Float3 F3ZERO = {0.0f, 0.0f, 0.0f};
static const Float2 F2ZERO = {0.0f, 0.0f};
static void generateElementsFastPath(
SubmissionTarget* target, const GLsizei first, const GLuint count,
const GLubyte* indices, const GLenum type) {
Vertex* start = _glSubmissionTargetStart(target);
const GLuint vstride = (VERTEX_POINTER.stride) ?
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
const GLuint uvstride = (UV_POINTER.stride) ?
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
const GLuint ststride = (ST_POINTER.stride) ?
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ?
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
const GLsizei istride = byte_size(type);
const IndexParseFunc IndexFunc = _calcParseIndexFunc(type);
/* Copy the pos, uv and color directly in one go */
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr : NULL;
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? UV_POINTER.ptr : NULL;
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? DIFFUSE_POINTER.ptr : NULL;
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ST_POINTER.ptr : NULL;
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? NORMAL_POINTER.ptr : NULL;
VertexExtra* ve = aligned_vector_at(target->extras, 0);
Vertex* it = start;
const float w = 1.0f;
for(GLuint i = first; i < first + count; ++i) {
GLuint idx = IndexFunc(indices + (i * istride));
it->flags = GPU_CMD_VERTEX;
if(pos) {
pos = (GLubyte*) VERTEX_POINTER.ptr + (idx * vstride);
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
} else {
*((Float3*) it->xyz) = F3ZERO;
}
if(uv) {
uv = (GLubyte*) UV_POINTER.ptr + (idx * uvstride);
MEMCPY4(it->uv, uv, sizeof(float) * 2);
} else {
*((Float2*) it->uv) = F2ZERO;
}
if(col) {
col = (GLubyte*) DIFFUSE_POINTER.ptr + (idx * dstride);
MEMCPY4(it->bgra, col, sizeof(uint32_t));
} else {
*((uint32_t*) it->bgra) = ~0;
}
if(st) {
st = (GLubyte*) ST_POINTER.ptr + (idx * ststride);
MEMCPY4(ve->st, st, sizeof(float) * 2);
} else {
*((Float2*) ve->st) = F2ZERO;
}
if(n) {
n = (GLubyte*) NORMAL_POINTER.ptr + (idx * nstride);
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
} else {
*((Float3*) ve->nxyz) = F3Z;
}
it++;
ve++;
}
}
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
Vertex* start = _glSubmissionTargetStart(target);
@ -816,26 +909,37 @@ static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first
if(pos) {
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
pos += vstride;
} else {
*((Float3*) it->xyz) = F3ZERO;
}
if(uv) {
MEMCPY4(it->uv, uv, sizeof(float) * 2);
uv += uvstride;
} else {
*((Float2*) it->uv) = F2ZERO;
}
if(col) {
MEMCPY4(it->bgra, col, sizeof(uint32_t));
col += dstride;
} else {
*((uint32_t*) it->bgra) = ~0;
}
if(st) {
MEMCPY4(ve->st, st, sizeof(float) * 2);
st += ststride;
} else {
*((Float2*) ve->st) = F2ZERO;
}
if(n) {
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
n += nstride;
} else {
*((Float3*) ve->nxyz) = F3Z;
}
it++;
@ -865,12 +969,18 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
/* Read from the client buffers and generate an array of ClipVertices */
TRACE();
if(indices) {
generateElements(target, first, count, indices, type);
} else if(FAST_PATH_ENABLED) {
generateArraysFastPath(target, first, count);
if(FAST_PATH_ENABLED) {
if(indices) {
generateElementsFastPath(target, first, count, indices, type);
} else {
generateArraysFastPath(target, first, count);
}
} else {
generateArrays(target, first, count);
if(indices) {
generateElements(target, first, count, indices, type);
} else {
generateArrays(target, first, count);
}
}
Vertex* it = _glSubmissionTargetStart(target);

View File

@ -88,8 +88,6 @@ void APIENTRY glKosInit() {
void APIENTRY glKosSwapBuffers() {
static int frame_count = 0;
TRACE();
SceneBegin();

View File

@ -16,9 +16,13 @@ typedef struct {
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
#ifdef __cplusplus
#define AV_FORCE_INLINE static inline
#else
#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function))
#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline))
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
#endif
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);