Perf improvements
This commit is contained in:
parent
5fc77887d0
commit
fcbb6418d2
30
GL/draw.c
30
GL/draw.c
|
@ -442,7 +442,7 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
|
|||
mat_trans_normal3(normal[0], normal[1], normal[2]);
|
||||
}
|
||||
|
||||
PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
|
||||
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
|
||||
assert(target->header_offset < target->output->vector.size);
|
||||
return aligned_vector_at(&target->output->vector, target->header_offset);
|
||||
}
|
||||
|
@ -456,7 +456,7 @@ Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) {
|
|||
return _glSubmissionTargetStart(target) + target->count;
|
||||
}
|
||||
|
||||
static inline void genTriangles(Vertex* output, GLuint count) {
|
||||
GL_FORCE_INLINE void genTriangles(Vertex* output, GLuint count) {
|
||||
Vertex* it = output + 2;
|
||||
|
||||
GLuint i;
|
||||
|
@ -466,17 +466,22 @@ static inline void genTriangles(Vertex* output, GLuint count) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline void genQuads(Vertex* output, GLuint count) {
|
||||
GL_FORCE_INLINE void genQuads(Vertex* output, GLuint count) {
|
||||
Vertex* pen = output + 2;
|
||||
Vertex* final = output + 3;
|
||||
GLuint i;
|
||||
for(i = 0; i < count; i += 4) {
|
||||
swapVertex((final - 1), final);
|
||||
GLuint i = count >> 2;
|
||||
while(i--) {
|
||||
__asm__("pref @%0" : : "r"(pen + 4));
|
||||
|
||||
swapVertex(pen, final);
|
||||
final->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
pen += 4;
|
||||
final += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void genTriangleStrip(Vertex* output, GLuint count) {
|
||||
GL_FORCE_INLINE void genTriangleStrip(Vertex* output, GLuint count) {
|
||||
output[count - 1].flags = GPU_CMD_VERTEX_EOL;
|
||||
}
|
||||
|
||||
|
@ -634,6 +639,8 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL
|
|||
uint32_t* flags;
|
||||
|
||||
ITERATE(count) {
|
||||
__asm__("pref @%0" : : "r"(vptr + vstride));
|
||||
|
||||
func(vptr, out);
|
||||
vptr += vstride;
|
||||
|
||||
|
@ -653,6 +660,8 @@ static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count,
|
|||
GLubyte* out = (GLubyte*) output[0].uv;
|
||||
|
||||
ITERATE(count) {
|
||||
__asm__("pref @%0" : : "r"(uvptr + uvstride));
|
||||
|
||||
func(uvptr, out);
|
||||
uvptr += uvstride;
|
||||
out += sizeof(Vertex);
|
||||
|
@ -666,6 +675,8 @@ static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count,
|
|||
GLubyte* out = (GLubyte*) extra[0].st;
|
||||
|
||||
ITERATE(count) {
|
||||
__asm__("pref @%0" : : "r"(stptr + ststride));
|
||||
|
||||
func(stptr, out);
|
||||
stptr += ststride;
|
||||
out += sizeof(VertexExtra);
|
||||
|
@ -714,6 +725,8 @@ static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLu
|
|||
GLubyte* out = (GLubyte*) output[0].bgra;
|
||||
|
||||
ITERATE(count) {
|
||||
__asm__("pref @%0" : : "r"(cptr + cstride));
|
||||
|
||||
func(cptr, out);
|
||||
cptr += cstride;
|
||||
out += sizeof(Vertex);
|
||||
|
@ -874,6 +887,8 @@ static void generateElementsFastPath(
|
|||
}
|
||||
}
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
|
||||
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
|
||||
Vertex* start = _glSubmissionTargetStart(target);
|
||||
|
||||
|
@ -1290,7 +1305,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
|
||||
}
|
||||
|
||||
divide(target);
|
||||
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
|
||||
|
||||
/*
|
||||
|
|
53
GL/flush.c
53
GL/flush.c
|
@ -42,10 +42,10 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
|
|||
config->autosort_enabled = GL_FALSE;
|
||||
config->fsaa_enabled = GL_FALSE;
|
||||
|
||||
config->initial_op_capacity = 1024;
|
||||
config->initial_pt_capacity = 512;
|
||||
config->initial_tr_capacity = 1024;
|
||||
config->initial_immediate_capacity = 1024;
|
||||
config->initial_op_capacity = 1024 * 3;
|
||||
config->initial_pt_capacity = 512 * 3;
|
||||
config->initial_tr_capacity = 1024 * 3;
|
||||
config->initial_immediate_capacity = 1024 * 3;
|
||||
config->internal_palette_format = GL_RGBA4;
|
||||
}
|
||||
|
||||
|
@ -86,20 +86,65 @@ void APIENTRY glKosInit() {
|
|||
glKosInitEx(&config);
|
||||
}
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
GL_FORCE_INLINE bool glIsVertex(const float flags) {
|
||||
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
|
||||
}
|
||||
|
||||
|
||||
GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) {
|
||||
TRACE();
|
||||
|
||||
/* Perform perspective divide on each vertex */
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
while(n--) {
|
||||
__asm__("pref @%0" : : "r"(vertex + 1));
|
||||
|
||||
if(likely(glIsVertex(vertex->flags))) {
|
||||
const float f = MATH_Fast_Invert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = MATH_fmac(
|
||||
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||
);
|
||||
|
||||
vertex->xyz[1] = h - MATH_fmac(
|
||||
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||
);
|
||||
|
||||
/* Apply depth range */
|
||||
vertex->xyz[2] = MAX(
|
||||
1.0f - MATH_fmac(vertex->xyz[2] * f, 0.5f, 0.5f),
|
||||
PVR_MIN_Z
|
||||
);
|
||||
}
|
||||
|
||||
++vertex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void APIENTRY glKosSwapBuffers() {
|
||||
TRACE();
|
||||
|
||||
SceneBegin();
|
||||
SceneListBegin(GPU_LIST_OP_POLY);
|
||||
glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||
SceneListFinish();
|
||||
|
||||
SceneListBegin(GPU_LIST_PT_POLY);
|
||||
glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||
SceneListFinish();
|
||||
|
||||
SceneListBegin(GPU_LIST_TR_POLY);
|
||||
glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||
SceneListFinish();
|
||||
SceneFinish();
|
||||
|
|
|
@ -255,7 +255,6 @@ typedef struct {
|
|||
AlignedVector* extras;
|
||||
} SubmissionTarget;
|
||||
|
||||
PolyHeader* _glSubmissionTargetHeader(SubmissionTarget* target);
|
||||
Vertex* _glSubmissionTargetStart(SubmissionTarget* target);
|
||||
Vertex* _glSubmissionTargetEnd(SubmissionTarget* target);
|
||||
|
||||
|
|
|
@ -74,6 +74,8 @@ void setup() {
|
|||
glOrtho(0, 640, 0, 480, -100, 100);
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
|
||||
glDisable(GL_NEARZ_CLIPPING_KOS);
|
||||
}
|
||||
|
||||
void do_frame() {
|
||||
|
|
Loading…
Reference in New Issue
Block a user