More performance work
This commit is contained in:
parent
2547459ef3
commit
26c9a454e4
68
GL/draw.c
68
GL/draw.c
|
@ -797,40 +797,49 @@ static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first
|
|||
|
||||
|
||||
/* Copy the pos, uv and color directly in one go */
|
||||
const GLubyte* pos = VERTEX_POINTER.ptr + (first * vstride);
|
||||
const GLubyte* uv = UV_POINTER.ptr + (first * uvstride);
|
||||
const GLubyte* col = DIFFUSE_POINTER.ptr + (first * dstride);
|
||||
const GLubyte* st = ST_POINTER.ptr + (first * ststride);
|
||||
const GLubyte* n = NORMAL_POINTER.ptr + (first * nstride);
|
||||
|
||||
typedef struct {
|
||||
float x, y, z;
|
||||
} V3;
|
||||
|
||||
typedef struct {
|
||||
float u, v;
|
||||
} V2;
|
||||
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr + (first * vstride) : NULL;
|
||||
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? UV_POINTER.ptr + (first * uvstride) : NULL;
|
||||
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? DIFFUSE_POINTER.ptr + (first * dstride) : NULL;
|
||||
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ST_POINTER.ptr + (first * ststride) : NULL;
|
||||
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? NORMAL_POINTER.ptr + (first * nstride) : NULL;
|
||||
|
||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||
Vertex* it = start;
|
||||
ITERATE(count) {
|
||||
|
||||
const float w = 1.0f;
|
||||
|
||||
uint32_t i = count;
|
||||
|
||||
while(i--) {
|
||||
it->flags = GPU_CMD_VERTEX;
|
||||
|
||||
*((V3*) it->xyz) = *((V3*) pos);
|
||||
*((V2*) it->uv) = *((V2*) uv);
|
||||
*((uint32_t*) it->bgra) = *((uint32_t*) col);
|
||||
if(pos) {
|
||||
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
||||
pos += vstride;
|
||||
}
|
||||
|
||||
*((V2*) ve->st) = *((V2*) st);
|
||||
*((V3*) ve->nxyz) = *((V3*) n);
|
||||
if(uv) {
|
||||
MEMCPY4(it->uv, uv, sizeof(float) * 2);
|
||||
uv += uvstride;
|
||||
}
|
||||
|
||||
if(col) {
|
||||
MEMCPY4(it->bgra, col, sizeof(uint32_t));
|
||||
col += dstride;
|
||||
}
|
||||
|
||||
if(st) {
|
||||
MEMCPY4(ve->st, st, sizeof(float) * 2);
|
||||
st += ststride;
|
||||
}
|
||||
|
||||
if(n) {
|
||||
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
|
||||
n += nstride;
|
||||
}
|
||||
|
||||
it++;
|
||||
ve++;
|
||||
|
||||
pos += vstride;
|
||||
uv += uvstride;
|
||||
col += dstride;
|
||||
st += ststride;
|
||||
n += nstride;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -890,8 +899,6 @@ static void transform(SubmissionTarget* target) {
|
|||
/* Perform modelview transform, storing W */
|
||||
Vertex* vertex = _glSubmissionTargetStart(target);
|
||||
|
||||
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
|
||||
|
||||
TransformVertices(vertex, target->count);
|
||||
}
|
||||
|
||||
|
@ -1095,13 +1102,18 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
|
||||
/* Make room for the vertices and header */
|
||||
aligned_vector_extend(&target->output->vector, target->count + 1);
|
||||
|
||||
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
|
||||
|
||||
generate(target, mode, first, count, (GLubyte*) indices, type);
|
||||
|
||||
if(doLighting){
|
||||
light(target);
|
||||
}
|
||||
|
||||
transform(target);
|
||||
if(!FAST_PATH_ENABLED) {
|
||||
transform(target);
|
||||
}
|
||||
|
||||
if(_glIsClippingEnabled()) {
|
||||
#if DEBUG_CLIPPING
|
||||
|
|
|
@ -69,6 +69,25 @@ inline void TransformVec4(float* x) {
|
|||
|
||||
}
|
||||
|
||||
static inline void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
|
||||
register float __x __asm__("fr12") = (xyz[0]);
|
||||
register float __y __asm__("fr13") = (xyz[1]);
|
||||
register float __z __asm__("fr14") = (xyz[2]);
|
||||
register float __w __asm__("fr15") = (*w);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"fldi1 fr15\n"
|
||||
"ftrv xmtrx,fv12\n"
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w)
|
||||
);
|
||||
|
||||
oxyz[0] = __x;
|
||||
oxyz[1] = __y;
|
||||
oxyz[2] = __z;
|
||||
*ow = __w;
|
||||
}
|
||||
|
||||
static inline void TransformVertices(Vertex* vertices, const int count) {
|
||||
Vertex* it = vertices;
|
||||
for(int i = 0; i < count; ++i, ++it) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user