Optimise vertex transform in non-fast path to avoid storing xyz to memory and then loading it again
This commit is contained in:
parent
9d717800bd
commit
3b2e549934
@ -37,67 +37,89 @@ GLuint* _glGetEnabledAttributes() {
|
|||||||
|
|
||||||
|
|
||||||
static void _readPosition3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) {
|
static void _readPosition3f3f(const GLubyte* __restrict__ in, GLubyte* __restrict__ out) {
|
||||||
vec3cpy(out, in);
|
const float* input = (const float*) in;
|
||||||
|
Vertex* it = (Vertex*) out;
|
||||||
|
|
||||||
|
float x = input[0];
|
||||||
|
float y = input[1];
|
||||||
|
float z = input[2];
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition3ub3f(const GLubyte* input, GLubyte* out) {
|
static void _readPosition3ub3f(const GLubyte* input, GLubyte* out) {
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*)out;
|
||||||
|
|
||||||
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
float x = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
||||||
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
float y = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||||
output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE;
|
float z = input[2] * ONE_OVER_TWO_FIVE_FIVE;
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition3us3f(const GLubyte* in, GLubyte* out) {
|
static void _readPosition3us3f(const GLubyte* in, GLubyte* out) {
|
||||||
const GLushort* input = (const GLushort*) in;
|
const GLushort* input = (const GLushort*) in;
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*) out;
|
||||||
|
|
||||||
output[0] = input[0];
|
float x = input[0];
|
||||||
output[1] = input[1];
|
float y = input[1];
|
||||||
output[2] = input[2];
|
float z = input[2];
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition3ui3f(const GLubyte* in, GLubyte* out) {
|
static void _readPosition3ui3f(const GLubyte* in, GLubyte* out) {
|
||||||
const GLuint* input = (const GLuint*) in;
|
const GLuint* input = (const GLuint*) in;
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*) out;
|
||||||
|
|
||||||
output[0] = input[0];
|
float x = input[0];
|
||||||
output[1] = input[1];
|
float y = input[1];
|
||||||
output[2] = input[2];
|
float z = input[2];
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition2f3f(const GLubyte* in, GLubyte* out) {
|
static void _readPosition2f3f(const GLubyte* in, GLubyte* out) {
|
||||||
const float* input = (const float*) in;
|
const float* input = (const float*) in;
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*) out;
|
||||||
|
|
||||||
vec2cpy(output, input);
|
float x = input[0];
|
||||||
output[2] = 0.0f;
|
float y = input[1];
|
||||||
|
float z = 0.0f;
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition2ub3f(const GLubyte* input, GLubyte* out) {
|
static void _readPosition2ub3f(const GLubyte* input, GLubyte* out) {
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*) out;
|
||||||
|
|
||||||
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
float x = input[0] * ONE_OVER_TWO_FIVE_FIVE;
|
||||||
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
float y = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||||
output[2] = 0.0f;
|
float z = 0.0f;
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition2us3f(const GLubyte* in, GLubyte* out) {
|
static void _readPosition2us3f(const GLubyte* in, GLubyte* out) {
|
||||||
const GLushort* input = (const GLushort*) in;
|
const GLushort* input = (const GLushort*) in;
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*) out;
|
||||||
|
|
||||||
output[0] = input[0];
|
float x = input[0];
|
||||||
output[1] = input[1];
|
float y = input[1];
|
||||||
output[2] = 0.0f;
|
float z = 0.0f;
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _readPosition2ui3f(const GLubyte* in, GLubyte* out) {
|
static void _readPosition2ui3f(const GLubyte* in, GLubyte* out) {
|
||||||
const GLuint* input = (const GLuint*) in;
|
const GLuint* input = (const GLuint*) in;
|
||||||
float* output = (float*) out;
|
Vertex* it = (Vertex*)out;
|
||||||
|
|
||||||
output[0] = input[0];
|
float x = input[0];
|
||||||
output[1] = input[1];
|
float y = input[1];
|
||||||
output[2] = 0.0f;
|
float z = 0.0f;
|
||||||
|
float w = 1.0f;
|
||||||
|
TransformVertex(x, y, z, w, it->xyz, &it->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ReadAttributeFunc calcReadPositionFunc() {
|
static ReadAttributeFunc calcReadPositionFunc() {
|
||||||
|
12
GL/draw.c
12
GL/draw.c
@ -289,13 +289,10 @@ static void _readPositionData(const GLuint first, const GLuint count, Vertex* it
|
|||||||
const GLsizei vstride = ATTRIB_LIST.vertex.stride;
|
const GLsizei vstride = ATTRIB_LIST.vertex.stride;
|
||||||
const GLubyte* vptr = ((GLubyte*) ATTRIB_LIST.vertex.ptr + (first * vstride));
|
const GLubyte* vptr = ((GLubyte*) ATTRIB_LIST.vertex.ptr + (first * vstride));
|
||||||
|
|
||||||
float pos[3], w = 1.0f;
|
|
||||||
|
|
||||||
ITERATE(count) {
|
ITERATE(count) {
|
||||||
PREFETCH(vptr + vstride);
|
PREFETCH(vptr + vstride);
|
||||||
func(vptr, (GLubyte*) pos);
|
func(vptr, (GLubyte*) it);
|
||||||
it->flags = GPU_CMD_VERTEX;
|
it->flags = GPU_CMD_VERTEX;
|
||||||
TransformVertex(pos, &w, it->xyz, &it->w);
|
|
||||||
|
|
||||||
vptr += vstride;
|
vptr += vstride;
|
||||||
++it;
|
++it;
|
||||||
@ -411,8 +408,7 @@ static void generateElements(
|
|||||||
st = (GLubyte*) ATTRIB_LIST.st.ptr + (idx * ststride);
|
st = (GLubyte*) ATTRIB_LIST.st.ptr + (idx * ststride);
|
||||||
nxyz = (GLubyte*) ATTRIB_LIST.normal.ptr + (idx * nstride);
|
nxyz = (GLubyte*) ATTRIB_LIST.normal.ptr + (idx * nstride);
|
||||||
|
|
||||||
pos_func(xyz, (GLubyte*) pos);
|
pos_func(xyz, (GLubyte*) output);
|
||||||
TransformVertex((const float*) pos, &w, output->xyz, &output->w);
|
|
||||||
uv_func(uv, (GLubyte*) output->uv);
|
uv_func(uv, (GLubyte*) output->uv);
|
||||||
diffuse_func(bgra, output->bgra);
|
diffuse_func(bgra, output->bgra);
|
||||||
st_func(st, (GLubyte*) ve->st);
|
st_func(st, (GLubyte*) ve->st);
|
||||||
@ -460,8 +456,6 @@ static void generateElementsFastPath(
|
|||||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||||
Vertex* it = start;
|
Vertex* it = start;
|
||||||
|
|
||||||
const float w = 1.0f;
|
|
||||||
|
|
||||||
if(!pos) {
|
if(!pos) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -472,7 +466,7 @@ static void generateElementsFastPath(
|
|||||||
it->flags = GPU_CMD_VERTEX;
|
it->flags = GPU_CMD_VERTEX;
|
||||||
|
|
||||||
pos = (GLubyte*) ATTRIB_LIST.vertex.ptr + (idx * vstride);
|
pos = (GLubyte*) ATTRIB_LIST.vertex.ptr + (idx * vstride);
|
||||||
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
TransformVertex(((float*) pos)[0], ((float*) pos)[1], ((float*) pos)[2], 1.0f, it->xyz, &it->w);
|
||||||
|
|
||||||
if(uv) {
|
if(uv) {
|
||||||
uv = (GLubyte*) ATTRIB_LIST.uv.ptr + (idx * uvstride);
|
uv = (GLubyte*) ATTRIB_LIST.uv.ptr + (idx * uvstride);
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
|
|
||||||
MAKE_FUNC(POLYMODE)
|
MAKE_FUNC(POLYMODE)
|
||||||
{
|
{
|
||||||
static const float w = 1.0f;
|
|
||||||
if(!(ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG)) {
|
if(!(ATTRIB_LIST.enabled & VERTEX_ENABLED_FLAG)) {
|
||||||
/* If we don't have vertices, do nothing */
|
/* If we don't have vertices, do nothing */
|
||||||
return;
|
return;
|
||||||
@ -75,7 +74,7 @@ MAKE_FUNC(POLYMODE)
|
|||||||
PREFETCH(ptr);
|
PREFETCH(ptr);
|
||||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||||
PREFETCH(ptr + stride);
|
PREFETCH(ptr + stride);
|
||||||
TransformVertex((const float*) ptr, &w, it->xyz, &it->w);
|
TransformVertex(((float*) ptr)[0], ((float*) ptr)[1], ((float*) ptr)[2], 1.0f, it->xyz, &it->w);
|
||||||
PROCESS_VERTEX_FLAGS(it, min + i);
|
PROCESS_VERTEX_FLAGS(it, min + i);
|
||||||
ptr += stride;
|
ptr += stride;
|
||||||
}
|
}
|
||||||
|
@ -106,15 +106,14 @@ inline void TransformVec4(float* x) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GL_FORCE_INLINE void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
|
GL_FORCE_INLINE void TransformVertex(float x, float y, float z, float w, float* oxyz, float* ow) {
|
||||||
register float __x __asm__("fr12") = (xyz[0]);
|
register float __x __asm__("fr4") = x;
|
||||||
register float __y __asm__("fr13") = (xyz[1]);
|
register float __y __asm__("fr5") = y;
|
||||||
register float __z __asm__("fr14") = (xyz[2]);
|
register float __z __asm__("fr6") = z;
|
||||||
register float __w __asm__("fr15") = (*w);
|
register float __w __asm__("fr7") = w;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"fldi1 fr15\n"
|
"ftrv xmtrx,fv4\n"
|
||||||
"ftrv xmtrx,fv12\n"
|
|
||||||
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
|
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
|
||||||
: "0" (__x), "1" (__y), "2" (__z), "3" (__w)
|
: "0" (__x), "1" (__y), "2" (__z), "3" (__w)
|
||||||
);
|
);
|
||||||
|
@ -636,12 +636,12 @@ void TransformVertices(Vertex* vertices, const int count) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
|
void TransformVertex(float x, float y, float z, float w, float* oxyz, float* ow) {
|
||||||
float ret[4];
|
float ret[4];
|
||||||
ret[0] = xyz[0];
|
ret[0] = x;
|
||||||
ret[1] = xyz[1];
|
ret[1] = y;
|
||||||
ret[2] = xyz[2];
|
ret[2] = z;
|
||||||
ret[3] = *w;
|
ret[3] = w;
|
||||||
|
|
||||||
TransformVec4(ret);
|
TransformVec4(ret);
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ static inline void TransformNormalNoMod(const float* xIn, float* xOut) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void TransformVertices(Vertex* vertices, const int count);
|
void TransformVertices(Vertex* vertices, const int count);
|
||||||
void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow);
|
void TransformVertex(float x, float y, float z, float w, float* oxyz, float* ow);
|
||||||
|
|
||||||
void InitGPU(_Bool autosort, _Bool fsaa);
|
void InitGPU(_Bool autosort, _Bool fsaa);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user