Optimise vertex transform in non-fast path to avoid storing xyz to memory and then loading it again
This commit is contained in:
parent
9d717800bd
commit
3b2e549934
@ -37,67 +37,89 @@ GLuint* _glGetEnabledAttributes() {
|
||||
|
||||
|
||||
static void _readPosition3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) {
|
||||
vec3cpy(out, in);
|
||||
const float* input = (const float*) in;
|
||||
Vertex* it = (Vertex*) out;
|
||||
|
||||
float x = input[0];
|
||||
float y = input[1];
|
||||
float z = input[2];
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition3ub3f(const GLubyte* input, GLubyte* out) {
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*)out;
|
||||
|
||||
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
float x = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
float y = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
float z = input[2] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition3us3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLushort* input = (const GLushort*) in;
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[2] = input[2];
|
||||
float x = input[0];
|
||||
float y = input[1];
|
||||
float z = input[2];
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition3ui3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLuint* input = (const GLuint*) in;
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[2] = input[2];
|
||||
float x = input[0];
|
||||
float y = input[1];
|
||||
float z = input[2];
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition2f3f(const GLubyte* in, GLubyte* out) {
|
||||
const float* input = (const float*) in;
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*) out;
|
||||
|
||||
vec2cpy(output, input);
|
||||
output[2] = 0.0f;
|
||||
float x = input[0];
|
||||
float y = input[1];
|
||||
float z = 0.0f;
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition2ub3f(const GLubyte* input, GLubyte* out) {
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*) out;
|
||||
|
||||
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
output[2] = 0.0f;
|
||||
float x = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
float y = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
float z = 0.0f;
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition2us3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLushort* input = (const GLushort*) in;
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[2] = 0.0f;
|
||||
float x = input[0];
|
||||
float y = input[1];
|
||||
float z = 0.0f;
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static void _readPosition2ui3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLuint* input = (const GLuint*) in;
|
||||
float* output = (float*) out;
|
||||
Vertex* it = (Vertex*)out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[2] = 0.0f;
|
||||
float x = input[0];
|
||||
float y = input[1];
|
||||
float z = 0.0f;
|
||||
float w = 1.0f;
|
||||
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||
}
|
||||
|
||||
static ReadAttributeFunc calcReadPositionFunc() {
|
||||
|
12
GL/draw.c
12
GL/draw.c
@ -289,13 +289,10 @@ static void _readPositionData(const GLuint first, const GLuint count, Vertex* it
|
||||
const GLsizei vstride = ATTRIB_LIST.vertex.stride;
|
||||
const GLubyte* vptr = ((GLubyte*) ATTRIB_LIST.vertex.ptr + (first * vstride));
|
||||
|
||||
float pos[3], w = 1.0f;
|
||||
|
||||
ITERATE(count) {
|
||||
PREFETCH(vptr + vstride);
|
||||
func(vptr, (GLubyte*) pos);
|
||||
func(vptr, (GLubyte*) it);
|
||||
it->flags = GPU_CMD_VERTEX;
|
||||
TransformVertex(pos, &w, it->xyz, &it->w);
|
||||
|
||||
vptr += vstride;
|
||||
++it;
|
||||
@ -411,8 +408,7 @@ static void generateElements(
|
||||
st = (GLubyte*) ATTRIB_LIST.st.ptr + (idx * ststride);
|
||||
nxyz = (GLubyte*) ATTRIB_LIST.normal.ptr + (idx * nstride);
|
||||
|
||||
pos_func(xyz, (GLubyte*) pos);
|
||||
TransformVertex((const float*) pos, &w, output->xyz, &output->w);
|
||||
pos_func(xyz, (GLubyte*) output);
|
||||
uv_func(uv, (GLubyte*) output->uv);
|
||||
diffuse_func(bgra, output->bgra);
|
||||
st_func(st, (GLubyte*) ve->st);
|
||||
@ -460,8 +456,6 @@ static void generateElementsFastPath(
|
||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||
Vertex* it = start;
|
||||
|
||||
const float w = 1.0f;
|
||||
|
||||
if(!pos) {
|
||||
return;
|
||||
}
|
||||
@ -472,7 +466,7 @@ static void generateElementsFastPath(
|
||||
it->flags = GPU_CMD_VERTEX;
|
||||
|
||||
pos = (GLubyte*) ATTRIB_LIST.vertex.ptr + (idx * vstride);
|
||||
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
||||
TransformVertex(((float*) pos)[0], ((float*) pos)[1], ((float*) pos)[2], 1.0f, it->xyz, &it->w);
|
||||
|
||||
if(uv) {
|
||||
uv = (GLubyte*) ATTRIB_LIST.uv.ptr + (idx * uvstride);
|
||||
|
@ -5,7 +5,6 @@
|
||||
|
||||
MAKE_FUNC(POLYMODE)
|
||||
{
|
||||
static const float w = 1.0f;
|
||||
if(!(ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG)) {
|
||||
/* If we don't have vertices, do nothing */
|
||||
return;
|
||||
@ -75,7 +74,7 @@ MAKE_FUNC(POLYMODE)
|
||||
PREFETCH(ptr);
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||
PREFETCH(ptr + stride);
|
||||
TransformVertex((const float*) ptr, &w, it->xyz, &it->w);
|
||||
TransformVertex(((float*) ptr)[0], ((float*) ptr)[1], ((float*) ptr)[2], 1.0f, it->xyz, &it->w);
|
||||
PROCESS_VERTEX_FLAGS(it, min + i);
|
||||
ptr += stride;
|
||||
}
|
||||
|
@ -106,15 +106,14 @@ inline void TransformVec4(float* x) {
|
||||
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
|
||||
register float __x __asm__("fr12") = (xyz[0]);
|
||||
register float __y __asm__("fr13") = (xyz[1]);
|
||||
register float __z __asm__("fr14") = (xyz[2]);
|
||||
register float __w __asm__("fr15") = (*w);
|
||||
GL_FORCE_INLINE void TransformVertex(float x, float y, float z, float w, float* oxyz, float* ow) {
|
||||
register float __x __asm__("fr4") = x;
|
||||
register float __y __asm__("fr5") = y;
|
||||
register float __z __asm__("fr6") = z;
|
||||
register float __w __asm__("fr7") = w;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"fldi1 fr15\n"
|
||||
"ftrv xmtrx,fv12\n"
|
||||
"ftrv xmtrx,fv4\n"
|
||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
|
||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w)
|
||||
);
|
||||
|
@ -636,12 +636,12 @@ void TransformVertices(Vertex* vertices, const int count) {
|
||||
}
|
||||
}
|
||||
|
||||
void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
|
||||
void TransformVertex(float x, float y, float z, float w, float* oxyz, float* ow) {
|
||||
float ret[4];
|
||||
ret[0] = xyz[0];
|
||||
ret[1] = xyz[1];
|
||||
ret[2] = xyz[2];
|
||||
ret[3] = *w;
|
||||
ret[0] = x;
|
||||
ret[1] = y;
|
||||
ret[2] = z;
|
||||
ret[3] = w;
|
||||
|
||||
TransformVec4(ret);
|
||||
|
||||
|
@ -53,7 +53,7 @@ static inline void TransformNormalNoMod(const float* xIn, float* xOut) {
|
||||
}
|
||||
|
||||
void TransformVertices(Vertex* vertices, const int count);
|
||||
void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow);
|
||||
void TransformVertex(float x, float y, float z, float w, float* oxyz, float* ow);
|
||||
|
||||
void InitGPU(_Bool autosort, _Bool fsaa);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user