Perf improvements

This commit is contained in:
Luke Benstead 2021-04-26 16:14:33 +01:00
parent 5fc77887d0
commit fcbb6418d2
4 changed files with 73 additions and 13 deletions

View File

@ -442,7 +442,7 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
mat_trans_normal3(normal[0], normal[1], normal[2]);
}
PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
assert(target->header_offset < target->output->vector.size);
return aligned_vector_at(&target->output->vector, target->header_offset);
}
@ -456,7 +456,7 @@ Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) {
return _glSubmissionTargetStart(target) + target->count;
}
static inline void genTriangles(Vertex* output, GLuint count) {
GL_FORCE_INLINE void genTriangles(Vertex* output, GLuint count) {
Vertex* it = output + 2;
GLuint i;
@ -466,17 +466,22 @@ static inline void genTriangles(Vertex* output, GLuint count) {
}
}
static inline void genQuads(Vertex* output, GLuint count) {
GL_FORCE_INLINE void genQuads(Vertex* output, GLuint count) {
Vertex* pen = output + 2;
Vertex* final = output + 3;
GLuint i;
for(i = 0; i < count; i += 4) {
swapVertex((final - 1), final);
GLuint i = count >> 2;
while(i--) {
__asm__("pref @%0" : : "r"(pen + 4));
swapVertex(pen, final);
final->flags = GPU_CMD_VERTEX_EOL;
pen += 4;
final += 4;
}
}
static void genTriangleStrip(Vertex* output, GLuint count) {
GL_FORCE_INLINE void genTriangleStrip(Vertex* output, GLuint count) {
output[count - 1].flags = GPU_CMD_VERTEX_EOL;
}
@ -634,6 +639,8 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL
uint32_t* flags;
ITERATE(count) {
__asm__("pref @%0" : : "r"(vptr + vstride));
func(vptr, out);
vptr += vstride;
@ -653,6 +660,8 @@ static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count,
GLubyte* out = (GLubyte*) output[0].uv;
ITERATE(count) {
__asm__("pref @%0" : : "r"(uvptr + uvstride));
func(uvptr, out);
uvptr += uvstride;
out += sizeof(Vertex);
@ -666,6 +675,8 @@ static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count,
GLubyte* out = (GLubyte*) extra[0].st;
ITERATE(count) {
__asm__("pref @%0" : : "r"(stptr + ststride));
func(stptr, out);
stptr += ststride;
out += sizeof(VertexExtra);
@ -714,6 +725,8 @@ static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLu
GLubyte* out = (GLubyte*) output[0].bgra;
ITERATE(count) {
__asm__("pref @%0" : : "r"(cptr + cstride));
func(cptr, out);
cptr += cstride;
out += sizeof(Vertex);
@ -874,6 +887,8 @@ static void generateElementsFastPath(
}
}
#define likely(x) __builtin_expect(!!(x), 1)
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
Vertex* start = _glSubmissionTargetStart(target);
@ -1290,7 +1305,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
}
divide(target);
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
/*

View File

@ -42,10 +42,10 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
config->autosort_enabled = GL_FALSE;
config->fsaa_enabled = GL_FALSE;
config->initial_op_capacity = 1024;
config->initial_pt_capacity = 512;
config->initial_tr_capacity = 1024;
config->initial_immediate_capacity = 1024;
config->initial_op_capacity = 1024 * 3;
config->initial_pt_capacity = 512 * 3;
config->initial_tr_capacity = 1024 * 3;
config->initial_immediate_capacity = 1024 * 3;
config->internal_palette_format = GL_RGBA4;
}
@ -86,20 +86,65 @@ void APIENTRY glKosInit() {
glKosInitEx(&config);
}
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
GL_FORCE_INLINE bool glIsVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) {
TRACE();
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
while(n--) {
__asm__("pref @%0" : : "r"(vertex + 1));
if(likely(glIsVertex(vertex->flags))) {
const float f = MATH_Fast_Invert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = MATH_fmac(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - MATH_fmac(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
/* Apply depth range */
vertex->xyz[2] = MAX(
1.0f - MATH_fmac(vertex->xyz[2] * f, 0.5f, 0.5f),
PVR_MIN_Z
);
}
++vertex;
}
}
void APIENTRY glKosSwapBuffers() {
TRACE();
SceneBegin();
SceneListBegin(GPU_LIST_OP_POLY);
glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size);
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
SceneListFinish();
SceneListBegin(GPU_LIST_PT_POLY);
glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size);
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
SceneListFinish();
SceneListBegin(GPU_LIST_TR_POLY);
glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size);
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
SceneListFinish();
SceneFinish();

View File

@ -255,7 +255,6 @@ typedef struct {
AlignedVector* extras;
} SubmissionTarget;
PolyHeader* _glSubmissionTargetHeader(SubmissionTarget* target);
Vertex* _glSubmissionTargetStart(SubmissionTarget* target);
Vertex* _glSubmissionTargetEnd(SubmissionTarget* target);

View File

@ -74,6 +74,8 @@ void setup() {
glOrtho(0, 640, 0, 480, -100, 100);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glDisable(GL_NEARZ_CLIPPING_KOS);
}
void do_frame() {