More performance work
This commit is contained in:
parent
2547459ef3
commit
26c9a454e4
66
GL/draw.c
66
GL/draw.c
|
@ -797,40 +797,49 @@ static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first
|
||||||
|
|
||||||
|
|
||||||
/* Copy the pos, uv and color directly in one go */
|
/* Copy the pos, uv and color directly in one go */
|
||||||
const GLubyte* pos = VERTEX_POINTER.ptr + (first * vstride);
|
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr + (first * vstride) : NULL;
|
||||||
const GLubyte* uv = UV_POINTER.ptr + (first * uvstride);
|
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? UV_POINTER.ptr + (first * uvstride) : NULL;
|
||||||
const GLubyte* col = DIFFUSE_POINTER.ptr + (first * dstride);
|
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? DIFFUSE_POINTER.ptr + (first * dstride) : NULL;
|
||||||
const GLubyte* st = ST_POINTER.ptr + (first * ststride);
|
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ST_POINTER.ptr + (first * ststride) : NULL;
|
||||||
const GLubyte* n = NORMAL_POINTER.ptr + (first * nstride);
|
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? NORMAL_POINTER.ptr + (first * nstride) : NULL;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
float x, y, z;
|
|
||||||
} V3;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
float u, v;
|
|
||||||
} V2;
|
|
||||||
|
|
||||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||||
Vertex* it = start;
|
Vertex* it = start;
|
||||||
ITERATE(count) {
|
|
||||||
|
const float w = 1.0f;
|
||||||
|
|
||||||
|
uint32_t i = count;
|
||||||
|
|
||||||
|
while(i--) {
|
||||||
it->flags = GPU_CMD_VERTEX;
|
it->flags = GPU_CMD_VERTEX;
|
||||||
|
|
||||||
*((V3*) it->xyz) = *((V3*) pos);
|
if(pos) {
|
||||||
*((V2*) it->uv) = *((V2*) uv);
|
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
||||||
*((uint32_t*) it->bgra) = *((uint32_t*) col);
|
pos += vstride;
|
||||||
|
}
|
||||||
|
|
||||||
*((V2*) ve->st) = *((V2*) st);
|
if(uv) {
|
||||||
*((V3*) ve->nxyz) = *((V3*) n);
|
MEMCPY4(it->uv, uv, sizeof(float) * 2);
|
||||||
|
uv += uvstride;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(col) {
|
||||||
|
MEMCPY4(it->bgra, col, sizeof(uint32_t));
|
||||||
|
col += dstride;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(st) {
|
||||||
|
MEMCPY4(ve->st, st, sizeof(float) * 2);
|
||||||
|
st += ststride;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(n) {
|
||||||
|
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
|
||||||
|
n += nstride;
|
||||||
|
}
|
||||||
|
|
||||||
it++;
|
it++;
|
||||||
ve++;
|
ve++;
|
||||||
|
|
||||||
pos += vstride;
|
|
||||||
uv += uvstride;
|
|
||||||
col += dstride;
|
|
||||||
st += ststride;
|
|
||||||
n += nstride;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -890,8 +899,6 @@ static void transform(SubmissionTarget* target) {
|
||||||
/* Perform modelview transform, storing W */
|
/* Perform modelview transform, storing W */
|
||||||
Vertex* vertex = _glSubmissionTargetStart(target);
|
Vertex* vertex = _glSubmissionTargetStart(target);
|
||||||
|
|
||||||
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
|
|
||||||
|
|
||||||
TransformVertices(vertex, target->count);
|
TransformVertices(vertex, target->count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1095,13 +1102,18 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
||||||
|
|
||||||
/* Make room for the vertices and header */
|
/* Make room for the vertices and header */
|
||||||
aligned_vector_extend(&target->output->vector, target->count + 1);
|
aligned_vector_extend(&target->output->vector, target->count + 1);
|
||||||
|
|
||||||
|
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
|
||||||
|
|
||||||
generate(target, mode, first, count, (GLubyte*) indices, type);
|
generate(target, mode, first, count, (GLubyte*) indices, type);
|
||||||
|
|
||||||
if(doLighting){
|
if(doLighting){
|
||||||
light(target);
|
light(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!FAST_PATH_ENABLED) {
|
||||||
transform(target);
|
transform(target);
|
||||||
|
}
|
||||||
|
|
||||||
if(_glIsClippingEnabled()) {
|
if(_glIsClippingEnabled()) {
|
||||||
#if DEBUG_CLIPPING
|
#if DEBUG_CLIPPING
|
||||||
|
|
|
@ -69,6 +69,25 @@ inline void TransformVec4(float* x) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
|
||||||
|
register float __x __asm__("fr12") = (xyz[0]);
|
||||||
|
register float __y __asm__("fr13") = (xyz[1]);
|
||||||
|
register float __z __asm__("fr14") = (xyz[2]);
|
||||||
|
register float __w __asm__("fr15") = (*w);
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"fldi1 fr15\n"
|
||||||
|
"ftrv xmtrx,fv12\n"
|
||||||
|
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
|
||||||
|
: "0" (__x), "1" (__y), "2" (__z), "3" (__w)
|
||||||
|
);
|
||||||
|
|
||||||
|
oxyz[0] = __x;
|
||||||
|
oxyz[1] = __y;
|
||||||
|
oxyz[2] = __z;
|
||||||
|
*ow = __w;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void TransformVertices(Vertex* vertices, const int count) {
|
static inline void TransformVertices(Vertex* vertices, const int count) {
|
||||||
Vertex* it = vertices;
|
Vertex* it = vertices;
|
||||||
for(int i = 0; i < count; ++i, ++it) {
|
for(int i = 0; i < count; ++i, ++it) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user