Add fast path for glDrawElements
This commit is contained in:
parent
bb53dad90d
commit
28c14fdb0e
120
GL/draw.c
120
GL/draw.c
|
@ -777,6 +777,99 @@ static void generateElements(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float x, y, z;
|
||||||
|
} Float3;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float u, v;
|
||||||
|
} Float2;
|
||||||
|
|
||||||
|
static const Float3 F3Z = {0.0f, 0.0f, 1.0f};
|
||||||
|
static const Float3 F3ZERO = {0.0f, 0.0f, 0.0f};
|
||||||
|
static const Float2 F2ZERO = {0.0f, 0.0f};
|
||||||
|
|
||||||
|
static void generateElementsFastPath(
|
||||||
|
SubmissionTarget* target, const GLsizei first, const GLuint count,
|
||||||
|
const GLubyte* indices, const GLenum type) {
|
||||||
|
|
||||||
|
Vertex* start = _glSubmissionTargetStart(target);
|
||||||
|
|
||||||
|
const GLuint vstride = (VERTEX_POINTER.stride) ?
|
||||||
|
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
|
||||||
|
|
||||||
|
const GLuint uvstride = (UV_POINTER.stride) ?
|
||||||
|
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
|
||||||
|
|
||||||
|
const GLuint ststride = (ST_POINTER.stride) ?
|
||||||
|
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
|
||||||
|
|
||||||
|
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
|
||||||
|
DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
|
||||||
|
|
||||||
|
const GLuint nstride = (NORMAL_POINTER.stride) ?
|
||||||
|
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
|
||||||
|
|
||||||
|
const GLsizei istride = byte_size(type);
|
||||||
|
const IndexParseFunc IndexFunc = _calcParseIndexFunc(type);
|
||||||
|
|
||||||
|
/* Copy the pos, uv and color directly in one go */
|
||||||
|
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr : NULL;
|
||||||
|
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? UV_POINTER.ptr : NULL;
|
||||||
|
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? DIFFUSE_POINTER.ptr : NULL;
|
||||||
|
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ST_POINTER.ptr : NULL;
|
||||||
|
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? NORMAL_POINTER.ptr : NULL;
|
||||||
|
|
||||||
|
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||||
|
Vertex* it = start;
|
||||||
|
|
||||||
|
const float w = 1.0f;
|
||||||
|
|
||||||
|
for(GLuint i = first; i < first + count; ++i) {
|
||||||
|
GLuint idx = IndexFunc(indices + (i * istride));
|
||||||
|
|
||||||
|
it->flags = GPU_CMD_VERTEX;
|
||||||
|
|
||||||
|
if(pos) {
|
||||||
|
pos = (GLubyte*) VERTEX_POINTER.ptr + (idx * vstride);
|
||||||
|
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
||||||
|
} else {
|
||||||
|
*((Float3*) it->xyz) = F3ZERO;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(uv) {
|
||||||
|
uv = (GLubyte*) UV_POINTER.ptr + (idx * uvstride);
|
||||||
|
MEMCPY4(it->uv, uv, sizeof(float) * 2);
|
||||||
|
} else {
|
||||||
|
*((Float2*) it->uv) = F2ZERO;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(col) {
|
||||||
|
col = (GLubyte*) DIFFUSE_POINTER.ptr + (idx * dstride);
|
||||||
|
MEMCPY4(it->bgra, col, sizeof(uint32_t));
|
||||||
|
} else {
|
||||||
|
*((uint32_t*) it->bgra) = ~0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(st) {
|
||||||
|
st = (GLubyte*) ST_POINTER.ptr + (idx * ststride);
|
||||||
|
MEMCPY4(ve->st, st, sizeof(float) * 2);
|
||||||
|
} else {
|
||||||
|
*((Float2*) ve->st) = F2ZERO;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(n) {
|
||||||
|
n = (GLubyte*) NORMAL_POINTER.ptr + (idx * nstride);
|
||||||
|
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
|
||||||
|
} else {
|
||||||
|
*((Float3*) ve->nxyz) = F3Z;
|
||||||
|
}
|
||||||
|
|
||||||
|
it++;
|
||||||
|
ve++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
|
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
|
||||||
Vertex* start = _glSubmissionTargetStart(target);
|
Vertex* start = _glSubmissionTargetStart(target);
|
||||||
|
|
||||||
|
@ -816,26 +909,37 @@ static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first
|
||||||
if(pos) {
|
if(pos) {
|
||||||
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
||||||
pos += vstride;
|
pos += vstride;
|
||||||
|
} else {
|
||||||
|
*((Float3*) it->xyz) = F3ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if(uv) {
|
if(uv) {
|
||||||
MEMCPY4(it->uv, uv, sizeof(float) * 2);
|
MEMCPY4(it->uv, uv, sizeof(float) * 2);
|
||||||
uv += uvstride;
|
uv += uvstride;
|
||||||
|
} else {
|
||||||
|
*((Float2*) it->uv) = F2ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(col) {
|
if(col) {
|
||||||
MEMCPY4(it->bgra, col, sizeof(uint32_t));
|
MEMCPY4(it->bgra, col, sizeof(uint32_t));
|
||||||
col += dstride;
|
col += dstride;
|
||||||
|
} else {
|
||||||
|
*((uint32_t*) it->bgra) = ~0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(st) {
|
if(st) {
|
||||||
MEMCPY4(ve->st, st, sizeof(float) * 2);
|
MEMCPY4(ve->st, st, sizeof(float) * 2);
|
||||||
st += ststride;
|
st += ststride;
|
||||||
|
} else {
|
||||||
|
*((Float2*) ve->st) = F2ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(n) {
|
if(n) {
|
||||||
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
|
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
|
||||||
n += nstride;
|
n += nstride;
|
||||||
|
} else {
|
||||||
|
*((Float3*) ve->nxyz) = F3Z;
|
||||||
}
|
}
|
||||||
|
|
||||||
it++;
|
it++;
|
||||||
|
@ -865,12 +969,18 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
|
||||||
/* Read from the client buffers and generate an array of ClipVertices */
|
/* Read from the client buffers and generate an array of ClipVertices */
|
||||||
TRACE();
|
TRACE();
|
||||||
|
|
||||||
if(indices) {
|
if(FAST_PATH_ENABLED) {
|
||||||
generateElements(target, first, count, indices, type);
|
if(indices) {
|
||||||
} else if(FAST_PATH_ENABLED) {
|
generateElementsFastPath(target, first, count, indices, type);
|
||||||
generateArraysFastPath(target, first, count);
|
} else {
|
||||||
|
generateArraysFastPath(target, first, count);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
generateArrays(target, first, count);
|
if(indices) {
|
||||||
|
generateElements(target, first, count, indices, type);
|
||||||
|
} else {
|
||||||
|
generateArrays(target, first, count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Vertex* it = _glSubmissionTargetStart(target);
|
Vertex* it = _glSubmissionTargetStart(target);
|
||||||
|
|
|
@ -88,8 +88,6 @@ void APIENTRY glKosInit() {
|
||||||
|
|
||||||
|
|
||||||
void APIENTRY glKosSwapBuffers() {
|
void APIENTRY glKosSwapBuffers() {
|
||||||
static int frame_count = 0;
|
|
||||||
|
|
||||||
TRACE();
|
TRACE();
|
||||||
|
|
||||||
SceneBegin();
|
SceneBegin();
|
||||||
|
|
|
@ -16,9 +16,13 @@ typedef struct {
|
||||||
|
|
||||||
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
|
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define AV_FORCE_INLINE static inline
|
||||||
|
#else
|
||||||
#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function))
|
#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function))
|
||||||
#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline))
|
#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline))
|
||||||
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
|
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
|
||||||
|
#endif
|
||||||
|
|
||||||
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
|
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
|
||||||
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user