Performance improvements

This commit is contained in:
Luke Benstead 2021-05-25 20:40:43 +01:00
parent f736332eb8
commit fc947c49f8
4 changed files with 99 additions and 119 deletions

120
GL/draw.c
View File

@ -8,11 +8,11 @@
#include "private.h" #include "private.h"
#include "platform.h" #include "platform.h"
static AttribPointer VERTEX_POINTER; AttribPointer VERTEX_POINTER;
static AttribPointer UV_POINTER; AttribPointer UV_POINTER;
static AttribPointer ST_POINTER; AttribPointer ST_POINTER;
static AttribPointer NORMAL_POINTER; AttribPointer NORMAL_POINTER;
static AttribPointer DIFFUSE_POINTER; AttribPointer DIFFUSE_POINTER;
static GLuint ENABLED_VERTEX_ATTRIBUTES = 0; static GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
static GLubyte ACTIVE_CLIENT_TEXTURE = 0; static GLubyte ACTIVE_CLIENT_TEXTURE = 0;
@ -632,7 +632,7 @@ ReadNormalFunc calcReadNormalFunc() {
} }
static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) { static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLsizei vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const GLsizei vstride = VERTEX_POINTER.stride;
const GLubyte* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride)); const GLubyte* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride));
GLubyte* out = (GLubyte*) output[0].xyz; GLubyte* out = (GLubyte*) output[0].xyz;
@ -654,7 +654,7 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL
} }
static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, const Vertex* output) { static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLsizei uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); const GLsizei uvstride = UV_POINTER.stride;
const GLubyte* uvptr = ((GLubyte*) UV_POINTER.ptr + (first * uvstride)); const GLubyte* uvptr = ((GLubyte*) UV_POINTER.ptr + (first * uvstride));
GLubyte* out = (GLubyte*) output[0].uv; GLubyte* out = (GLubyte*) output[0].uv;
@ -669,7 +669,7 @@ static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count,
} }
static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) { static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) {
const GLsizei ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type); const GLsizei ststride = ST_POINTER.stride;
const GLubyte* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride)); const GLubyte* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride));
GLubyte* out = (GLubyte*) extra[0].st; GLubyte* out = (GLubyte*) extra[0].st;
@ -684,7 +684,7 @@ static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count,
} }
static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) { static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) {
const GLsizei nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); const GLsizei nstride = NORMAL_POINTER.stride;
const GLubyte* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride)); const GLubyte* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride));
GLubyte* out = (GLubyte*) extra[0].nxyz; GLubyte* out = (GLubyte*) extra[0].nxyz;
@ -718,8 +718,7 @@ GL_FORCE_INLINE GLuint diffusePointerSize() {
} }
static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) { static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLuint size = diffusePointerSize(); const GLuint cstride = DIFFUSE_POINTER.stride;
const GLuint cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : size * byte_size(DIFFUSE_POINTER.type);
const GLubyte* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr) + (first * cstride); const GLubyte* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr) + (first * cstride);
GLubyte* out = (GLubyte*) output[0].bgra; GLubyte* out = (GLubyte*) output[0].bgra;
@ -758,20 +757,12 @@ static void generateElements(
const ReadDiffuseFunc diffuse_func = calcReadDiffuseFunc(); const ReadDiffuseFunc diffuse_func = calcReadDiffuseFunc();
const ReadNormalFunc normal_func = calcReadNormalFunc(); const ReadNormalFunc normal_func = calcReadNormalFunc();
const GLuint vstride = (VERTEX_POINTER.stride) ? const GLsizei vstride = VERTEX_POINTER.stride;
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
const GLuint uvstride = (UV_POINTER.stride) ? const GLuint uvstride = UV_POINTER.stride;
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); const GLuint ststride = ST_POINTER.stride;
const GLuint dstride = DIFFUSE_POINTER.stride;
const GLuint ststride = (ST_POINTER.stride) ? const GLuint nstride = NORMAL_POINTER.stride;
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
DIFFUSE_POINTER.stride : diffusePointerSize() * byte_size(DIFFUSE_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ?
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
for(; i < first + count; ++i) { for(; i < first + count; ++i) {
idx = IndexFunc(indices + (i * istride)); idx = IndexFunc(indices + (i * istride));
@ -812,20 +803,11 @@ static void generateElementsFastPath(
Vertex* start = _glSubmissionTargetStart(target); Vertex* start = _glSubmissionTargetStart(target);
const GLuint vstride = (VERTEX_POINTER.stride) ? const GLuint vstride = VERTEX_POINTER.stride;
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const GLuint uvstride = UV_POINTER.stride;
const GLuint ststride = ST_POINTER.stride;
const GLuint uvstride = (UV_POINTER.stride) ? const GLuint dstride = DIFFUSE_POINTER.stride;
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); const GLuint nstride = NORMAL_POINTER.stride;
const GLuint ststride = (ST_POINTER.stride) ?
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
DIFFUSE_POINTER.stride : diffusePointerSize() * byte_size(DIFFUSE_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ?
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
const GLsizei istride = byte_size(type); const GLsizei istride = byte_size(type);
const IndexParseFunc IndexFunc = _calcParseIndexFunc(type); const IndexParseFunc IndexFunc = _calcParseIndexFunc(type);
@ -842,17 +824,17 @@ static void generateElementsFastPath(
const float w = 1.0f; const float w = 1.0f;
if(!pos) {
return;
}
for(GLuint i = first; i < first + count; ++i) { for(GLuint i = first; i < first + count; ++i) {
GLuint idx = IndexFunc(indices + (i * istride)); GLuint idx = IndexFunc(indices + (i * istride));
it->flags = GPU_CMD_VERTEX; it->flags = GPU_CMD_VERTEX;
if(pos) { pos = (GLubyte*) VERTEX_POINTER.ptr + (idx * vstride);
pos = (GLubyte*) VERTEX_POINTER.ptr + (idx * vstride); TransformVertex((const float*) pos, &w, it->xyz, &it->w);
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
} else {
*((Float3*) it->xyz) = F3ZERO;
}
if(uv) { if(uv) {
uv = (GLubyte*) UV_POINTER.ptr + (idx * uvstride); uv = (GLubyte*) UV_POINTER.ptr + (idx * uvstride);
@ -892,21 +874,11 @@ static void generateElementsFastPath(
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) { static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
Vertex* start = _glSubmissionTargetStart(target); Vertex* start = _glSubmissionTargetStart(target);
const GLuint vstride = (VERTEX_POINTER.stride) ? const GLuint vstride = VERTEX_POINTER.stride;
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const GLuint uvstride = UV_POINTER.stride;
const GLuint ststride = ST_POINTER.stride;
const GLuint uvstride = (UV_POINTER.stride) ? const GLuint dstride = DIFFUSE_POINTER.stride;
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); const GLuint nstride = NORMAL_POINTER.stride;
const GLuint ststride = (ST_POINTER.stride) ?
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
DIFFUSE_POINTER.stride : diffusePointerSize() * byte_size(DIFFUSE_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ?
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
/* Copy the pos, uv and color directly in one go */ /* Copy the pos, uv and color directly in one go */
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr + (first * vstride) : NULL; const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr + (first * vstride) : NULL;
@ -922,16 +894,16 @@ static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first
uint32_t i = count; uint32_t i = count;
if(!pos) {
/* If we don't have vertices, do nothing */
return;
}
while(i--) { while(i--) {
it->flags = GPU_CMD_VERTEX; it->flags = GPU_CMD_VERTEX;
if(pos) { TransformVertex((const float*) pos, &w, it->xyz, &it->w);
TransformVertex((const float*) pos, &w, it->xyz, &it->w); pos += vstride;
pos += vstride;
} else {
*((Float3*) it->xyz) = F3ZERO;
}
if(uv) { if(uv) {
MEMCPY4(it->uv, uv, sizeof(float) * 2); MEMCPY4(it->uv, uv, sizeof(float) * 2);
@ -1468,12 +1440,12 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, cons
AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &UV_POINTER : &ST_POINTER; AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &UV_POINTER : &ST_POINTER;
tointer->ptr = pointer; tointer->ptr = pointer;
tointer->stride = stride; tointer->stride = (stride) ? stride : size * byte_size(type);
tointer->type = type; tointer->type = type;
tointer->size = size; tointer->size = size;
} }
void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) { void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
TRACE(); TRACE();
if(size < 2 || size > 4) { if(size < 2 || size > 4) {
@ -1483,7 +1455,7 @@ void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const
} }
VERTEX_POINTER.ptr = pointer; VERTEX_POINTER.ptr = pointer;
VERTEX_POINTER.stride = stride; VERTEX_POINTER.stride = (stride) ? stride : (size * byte_size(VERTEX_POINTER.type));
VERTEX_POINTER.type = type; VERTEX_POINTER.type = type;
VERTEX_POINTER.size = size; VERTEX_POINTER.size = size;
} }
@ -1497,10 +1469,11 @@ void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const G
return; return;
} }
DIFFUSE_POINTER.ptr = pointer; DIFFUSE_POINTER.ptr = pointer;
DIFFUSE_POINTER.stride = stride;
DIFFUSE_POINTER.type = type; DIFFUSE_POINTER.type = type;
DIFFUSE_POINTER.size = size; DIFFUSE_POINTER.size = (DIFFUSE_POINTER.size == GL_BGRA) ? 4 : size;
DIFFUSE_POINTER.stride = (stride) ? stride : DIFFUSE_POINTER.size * byte_size(type);
} }
void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) { void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) {
@ -1522,8 +1495,7 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin
} }
NORMAL_POINTER.ptr = pointer; NORMAL_POINTER.ptr = pointer;
NORMAL_POINTER.stride = stride;
NORMAL_POINTER.type = type;
NORMAL_POINTER.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3; NORMAL_POINTER.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3;
NORMAL_POINTER.stride = (stride) ? stride : NORMAL_POINTER.size * byte_size(type);
NORMAL_POINTER.type = type;
} }

View File

@ -101,29 +101,38 @@ GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) {
/* Perform perspective divide on each vertex */ /* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src; Vertex* vertex = (Vertex*) src;
PREFETCH(vertex + 1);
const float h = GetVideoMode()->height; const float h = GetVideoMode()->height;
while(n--) { while(n--) {
PREFETCH(vertex + 1); PREFETCH(vertex + 2);
if(likely(glIsVertex(vertex->flags))) { if(likely(glIsVertex(vertex->flags))) {
const float f = MATH_Fast_Invert(vertex->w); const float f = MATH_Fast_Invert(vertex->w);
/* Convert to NDC and apply viewport */ /* Convert to NDC and apply viewport */
vertex->xyz[0] = MATH_fmac( vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
); );
vertex->xyz[1] = h - MATH_fmac( vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
); );
/* FIXME: Apply depth range */ /* FIXME: Apply depth range */
vertex->xyz[2] = MAX(
1.0f - MATH_fmac(vertex->xyz[2] * f, 0.5f, 0.5f), /* After multiplying by 'f', the Z coordinate is between
PVR_MIN_Z * -1 and 1. We then need to shift it into a value > 0.00001f
); * where the larger value becomes smaller and vice-versa (because
* the PVR works backwards).
*
* If we multipled the lowest value (-1) by -1 it becomes 1, if
* we multiply the lowest value (1) by -1 it becomes, then we need
* to add 1 to get it in the range 0 - 2. Then we add a little offset
* and this approach means we can just use FMAC.
* */
vertex->xyz[2] = __builtin_fmaf((vertex->xyz[2] * f), -1.0f, 1.00001f);
} }
++vertex; ++vertex;

View File

@ -30,6 +30,12 @@ static AttribPointer UV_ATTRIB;
static AttribPointer ST_ATTRIB; static AttribPointer ST_ATTRIB;
static AttribPointer NORMAL_ATTRIB; static AttribPointer NORMAL_ATTRIB;
extern AttribPointer VERTEX_POINTER;
extern AttribPointer UV_POINTER;
extern AttribPointer ST_POINTER;
extern AttribPointer NORMAL_POINTER;
extern AttribPointer DIFFUSE_POINTER;
/* We store the list of attributes that have been "enabled" by a call to /* We store the list of attributes that have been "enabled" by a call to
glColor, glNormal, glTexCoord etc. otherwise we already have defaults that glColor, glNormal, glTexCoord etc. otherwise we already have defaults that
can be applied faster */ can be applied faster */
@ -114,10 +120,10 @@ void APIENTRY glBegin(GLenum mode) {
void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = (GLubyte)(a * 255); COLOR[A8IDX] = (GLubyte)(a * 255.0f);
COLOR[R8IDX] = (GLubyte)(r * 255); COLOR[R8IDX] = (GLubyte)(r * 255.0f);
COLOR[G8IDX] = (GLubyte)(g * 255); COLOR[G8IDX] = (GLubyte)(g * 255.0f);
COLOR[B8IDX] = (GLubyte)(b * 255); COLOR[B8IDX] = (GLubyte)(b * 255.0f);
} }
void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
@ -268,27 +274,21 @@ void APIENTRY glEnd() {
GLuint* attrs = _glGetEnabledAttributes(); GLuint* attrs = _glGetEnabledAttributes();
AttribPointer* vattr = _glGetVertexAttribPointer();
AttribPointer* dattr = _glGetDiffuseAttribPointer();
AttribPointer* nattr = _glGetNormalAttribPointer();
AttribPointer* uattr = _glGetUVAttribPointer();
AttribPointer* sattr = _glGetSTAttribPointer();
/* Stash existing values */ /* Stash existing values */
AttribPointer vptr = *vattr; AttribPointer vptr = VERTEX_POINTER;
AttribPointer dptr = *dattr; AttribPointer dptr = DIFFUSE_POINTER;
AttribPointer nptr = *nattr; AttribPointer nptr = NORMAL_POINTER;
AttribPointer uvptr = *uattr; AttribPointer uvptr = UV_POINTER;
AttribPointer stptr = *sattr; AttribPointer stptr = ST_POINTER;
GLuint prevAttrs = *attrs; GLuint prevAttrs = *attrs;
/* Switch to our immediate mode arrays */ /* Switch to our immediate mode arrays */
*vattr = VERTEX_ATTRIB; VERTEX_POINTER = VERTEX_ATTRIB;
*dattr = DIFFUSE_ATTRIB; DIFFUSE_POINTER = DIFFUSE_ATTRIB;
*nattr = NORMAL_ATTRIB; NORMAL_POINTER = NORMAL_ATTRIB;
*uattr = UV_ATTRIB; UV_POINTER = UV_ATTRIB;
*sattr = ST_ATTRIB; ST_POINTER = ST_ATTRIB;
*attrs = ENABLED_VERTEX_ATTRIBUTES; *attrs = ENABLED_VERTEX_ATTRIBUTES;
@ -303,11 +303,11 @@ void APIENTRY glEnd() {
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size); glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
/* Restore everything */ /* Restore everything */
*vattr = vptr; VERTEX_POINTER = vptr;
*dattr = dptr; DIFFUSE_POINTER = dptr;
*nattr = nptr; NORMAL_POINTER = nptr;
*uattr = uvptr; UV_POINTER = uvptr;
*sattr = stptr; ST_POINTER = stptr;
*attrs = prevAttrs; *attrs = prevAttrs;
@ -315,12 +315,6 @@ void APIENTRY glEnd() {
aligned_vector_clear(&VERTICES); aligned_vector_clear(&VERTICES);
aligned_vector_clear(&ST_COORDS); aligned_vector_clear(&ST_COORDS);
aligned_vector_clear(&NORMALS); aligned_vector_clear(&NORMALS);
*vattr = vptr;
*dattr = dptr;
*nattr = nptr;
*uattr = uvptr;
*sattr = stptr;
} }
void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) { void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) {

View File

@ -16,6 +16,11 @@
#define PERF_WARNING(msg) (void) 0 #define PERF_WARNING(msg) (void) 0
#endif #endif
#ifndef GL_FORCE_INLINE
#define GL_NO_INSTRUMENT inline __attribute__((no_instrument_function))
#define GL_INLINE_DEBUG GL_NO_INSTRUMENT __attribute__((always_inline))
#define GL_FORCE_INLINE static GL_INLINE_DEBUG
#endif
#define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr))) #define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr)))
@ -39,29 +44,29 @@
#define VEC3_LENGTH(x, y, z, l) vec3f_length((x), (y), (z), (l)) #define VEC3_LENGTH(x, y, z, l) vec3f_length((x), (y), (z), (l))
#define VEC3_DOT(x1, y1, z1, x2, y2, z2, d) vec3f_dot((x1), (y1), (z1), (x2), (y2), (z2), (d)) #define VEC3_DOT(x1, y1, z1, x2, y2, z2, d) vec3f_dot((x1), (y1), (z1), (x2), (y2), (z2), (d))
static inline void UploadMatrix4x4(const Matrix4x4* mat) { GL_FORCE_INLINE void UploadMatrix4x4(const Matrix4x4* mat) {
mat_load((matrix_t*) mat); mat_load((matrix_t*) mat);
} }
static inline void DownloadMatrix4x4(Matrix4x4* mat) { GL_FORCE_INLINE void DownloadMatrix4x4(Matrix4x4* mat) {
mat_store((matrix_t*) mat); mat_store((matrix_t*) mat);
} }
static inline void MultiplyMatrix4x4(const Matrix4x4* mat) { GL_FORCE_INLINE void MultiplyMatrix4x4(const Matrix4x4* mat) {
mat_apply((matrix_t*) mat); mat_apply((matrix_t*) mat);
} }
static inline void TransformVec3(float* x) { GL_FORCE_INLINE void TransformVec3(float* x) {
mat_trans_single4(x[0], x[1], x[2], x[3]); mat_trans_single4(x[0], x[1], x[2], x[3]);
} }
/* Transform a 3-element vector using the stored matrix (w == 1) */ /* Transform a 3-element vector using the stored matrix (w == 1) */
static inline void TransformVec3NoMod(const float* xIn, float* xOut) { GL_FORCE_INLINE void TransformVec3NoMod(const float* xIn, float* xOut) {
mat_trans_single3_nodiv_nomod(xIn[0], xIn[1], xIn[2], xOut[0], xOut[1], xOut[2]); mat_trans_single3_nodiv_nomod(xIn[0], xIn[1], xIn[2], xOut[0], xOut[1], xOut[2]);
} }
/* Transform a 3-element normal using the stored matrix (w == 0)*/ /* Transform a 3-element normal using the stored matrix (w == 0)*/
static inline void TransformNormalNoMod(const float* in, float* out) { GL_FORCE_INLINE void TransformNormalNoMod(const float* in, float* out) {
mat_trans_normal3_nomod(in[0], in[1], in[2], out[0], out[1], out[2]); mat_trans_normal3_nomod(in[0], in[1], in[2], out[0], out[1], out[2]);
} }
@ -70,7 +75,7 @@ inline void TransformVec4(float* x) {
} }
static inline void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) { GL_FORCE_INLINE void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
register float __x __asm__("fr12") = (xyz[0]); register float __x __asm__("fr12") = (xyz[0]);
register float __y __asm__("fr13") = (xyz[1]); register float __y __asm__("fr13") = (xyz[1]);
register float __z __asm__("fr14") = (xyz[2]); register float __z __asm__("fr14") = (xyz[2]);