From 26c9a454e48cb1f8feb98b2c64f622185a31254c Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 21 Apr 2021 15:34:28 +0100 Subject: [PATCH] More performance work --- GL/draw.c | 68 +++++++++++++++++++++++++++------------------- GL/platforms/sh4.h | 19 +++++++++++++ 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 7350324..5491e39 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -797,40 +797,49 @@ static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first /* Copy the pos, uv and color directly in one go */ - const GLubyte* pos = VERTEX_POINTER.ptr + (first * vstride); - const GLubyte* uv = UV_POINTER.ptr + (first * uvstride); - const GLubyte* col = DIFFUSE_POINTER.ptr + (first * dstride); - const GLubyte* st = ST_POINTER.ptr + (first * ststride); - const GLubyte* n = NORMAL_POINTER.ptr + (first * nstride); - - typedef struct { - float x, y, z; - } V3; - - typedef struct { - float u, v; - } V2; + const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr + (first * vstride) : NULL; + const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? UV_POINTER.ptr + (first * uvstride) : NULL; + const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? DIFFUSE_POINTER.ptr + (first * dstride) : NULL; + const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ST_POINTER.ptr + (first * ststride) : NULL; + const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? NORMAL_POINTER.ptr + (first * nstride) : NULL; VertexExtra* ve = aligned_vector_at(target->extras, 0); Vertex* it = start; - ITERATE(count) { + + const float w = 1.0f; + + uint32_t i = count; + + while(i--) { it->flags = GPU_CMD_VERTEX; - *((V3*) it->xyz) = *((V3*) pos); - *((V2*) it->uv) = *((V2*) uv); - *((uint32_t*) it->bgra) = *((uint32_t*) col); + if(pos) { + TransformVertex((const float*) pos, &w, it->xyz, &it->w); + pos += vstride; + } - *((V2*) ve->st) = *((V2*) st); - *((V3*) ve->nxyz) = *((V3*) n); + if(uv) { + MEMCPY4(it->uv, uv, sizeof(float) * 2); + uv += uvstride; + } + + if(col) { + MEMCPY4(it->bgra, col, sizeof(uint32_t)); + col += dstride; + } + + if(st) { + MEMCPY4(ve->st, st, sizeof(float) * 2); + st += ststride; + } + + if(n) { + MEMCPY4(ve->nxyz, n, sizeof(float) * 3); + n += nstride; + } it++; ve++; - - pos += vstride; - uv += uvstride; - col += dstride; - st += ststride; - n += nstride; } } @@ -890,8 +899,6 @@ static void transform(SubmissionTarget* target) { /* Perform modelview transform, storing W */ Vertex* vertex = _glSubmissionTargetStart(target); - _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ - TransformVertices(vertex, target->count); } @@ -1095,13 +1102,18 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL /* Make room for the vertices and header */ aligned_vector_extend(&target->output->vector, target->count + 1); + + _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ + generate(target, mode, first, count, (GLubyte*) indices, type); if(doLighting){ light(target); } - transform(target); + if(!FAST_PATH_ENABLED) { + transform(target); + } if(_glIsClippingEnabled()) { #if DEBUG_CLIPPING diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index d84b25e..b1b6823 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -69,6 +69,25 @@ inline void TransformVec4(float* x) { } +static inline void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) { + register float __x __asm__("fr12") = (xyz[0]); + register float __y __asm__("fr13") = (xyz[1]); + register float __z __asm__("fr14") = (xyz[2]); + register float __w __asm__("fr15") = (*w); + + __asm__ __volatile__( + "fldi1 fr15\n" + "ftrv xmtrx,fv12\n" + : "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) + : "0" (__x), "1" (__y), "2" (__z), "3" (__w) + ); + + oxyz[0] = __x; + oxyz[1] = __y; + oxyz[2] = __z; + *ow = __w; +} + static inline void TransformVertices(Vertex* vertices, const int count) { Vertex* it = vertices; for(int i = 0; i < count; ++i, ++it) {