Perf improvements
This commit is contained in:
parent
5fc77887d0
commit
fcbb6418d2
30
GL/draw.c
30
GL/draw.c
@ -442,7 +442,7 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
|
|||||||
mat_trans_normal3(normal[0], normal[1], normal[2]);
|
mat_trans_normal3(normal[0], normal[1], normal[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
|
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
|
||||||
assert(target->header_offset < target->output->vector.size);
|
assert(target->header_offset < target->output->vector.size);
|
||||||
return aligned_vector_at(&target->output->vector, target->header_offset);
|
return aligned_vector_at(&target->output->vector, target->header_offset);
|
||||||
}
|
}
|
||||||
@ -456,7 +456,7 @@ Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) {
|
|||||||
return _glSubmissionTargetStart(target) + target->count;
|
return _glSubmissionTargetStart(target) + target->count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void genTriangles(Vertex* output, GLuint count) {
|
GL_FORCE_INLINE void genTriangles(Vertex* output, GLuint count) {
|
||||||
Vertex* it = output + 2;
|
Vertex* it = output + 2;
|
||||||
|
|
||||||
GLuint i;
|
GLuint i;
|
||||||
@ -466,17 +466,22 @@ static inline void genTriangles(Vertex* output, GLuint count) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void genQuads(Vertex* output, GLuint count) {
|
GL_FORCE_INLINE void genQuads(Vertex* output, GLuint count) {
|
||||||
|
Vertex* pen = output + 2;
|
||||||
Vertex* final = output + 3;
|
Vertex* final = output + 3;
|
||||||
GLuint i;
|
GLuint i = count >> 2;
|
||||||
for(i = 0; i < count; i += 4) {
|
while(i--) {
|
||||||
swapVertex((final - 1), final);
|
__asm__("pref @%0" : : "r"(pen + 4));
|
||||||
|
|
||||||
|
swapVertex(pen, final);
|
||||||
final->flags = GPU_CMD_VERTEX_EOL;
|
final->flags = GPU_CMD_VERTEX_EOL;
|
||||||
|
|
||||||
|
pen += 4;
|
||||||
final += 4;
|
final += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void genTriangleStrip(Vertex* output, GLuint count) {
|
GL_FORCE_INLINE void genTriangleStrip(Vertex* output, GLuint count) {
|
||||||
output[count - 1].flags = GPU_CMD_VERTEX_EOL;
|
output[count - 1].flags = GPU_CMD_VERTEX_EOL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -634,6 +639,8 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL
|
|||||||
uint32_t* flags;
|
uint32_t* flags;
|
||||||
|
|
||||||
ITERATE(count) {
|
ITERATE(count) {
|
||||||
|
__asm__("pref @%0" : : "r"(vptr + vstride));
|
||||||
|
|
||||||
func(vptr, out);
|
func(vptr, out);
|
||||||
vptr += vstride;
|
vptr += vstride;
|
||||||
|
|
||||||
@ -653,6 +660,8 @@ static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count,
|
|||||||
GLubyte* out = (GLubyte*) output[0].uv;
|
GLubyte* out = (GLubyte*) output[0].uv;
|
||||||
|
|
||||||
ITERATE(count) {
|
ITERATE(count) {
|
||||||
|
__asm__("pref @%0" : : "r"(uvptr + uvstride));
|
||||||
|
|
||||||
func(uvptr, out);
|
func(uvptr, out);
|
||||||
uvptr += uvstride;
|
uvptr += uvstride;
|
||||||
out += sizeof(Vertex);
|
out += sizeof(Vertex);
|
||||||
@ -666,6 +675,8 @@ static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count,
|
|||||||
GLubyte* out = (GLubyte*) extra[0].st;
|
GLubyte* out = (GLubyte*) extra[0].st;
|
||||||
|
|
||||||
ITERATE(count) {
|
ITERATE(count) {
|
||||||
|
__asm__("pref @%0" : : "r"(stptr + ststride));
|
||||||
|
|
||||||
func(stptr, out);
|
func(stptr, out);
|
||||||
stptr += ststride;
|
stptr += ststride;
|
||||||
out += sizeof(VertexExtra);
|
out += sizeof(VertexExtra);
|
||||||
@ -714,6 +725,8 @@ static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLu
|
|||||||
GLubyte* out = (GLubyte*) output[0].bgra;
|
GLubyte* out = (GLubyte*) output[0].bgra;
|
||||||
|
|
||||||
ITERATE(count) {
|
ITERATE(count) {
|
||||||
|
__asm__("pref @%0" : : "r"(cptr + cstride));
|
||||||
|
|
||||||
func(cptr, out);
|
func(cptr, out);
|
||||||
cptr += cstride;
|
cptr += cstride;
|
||||||
out += sizeof(Vertex);
|
out += sizeof(Vertex);
|
||||||
@ -874,6 +887,8 @@ static void generateElementsFastPath(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||||||
|
|
||||||
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
|
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
|
||||||
Vertex* start = _glSubmissionTargetStart(target);
|
Vertex* start = _glSubmissionTargetStart(target);
|
||||||
|
|
||||||
@ -1290,7 +1305,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
divide(target);
|
|
||||||
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
|
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
53
GL/flush.c
53
GL/flush.c
@ -42,10 +42,10 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
|
|||||||
config->autosort_enabled = GL_FALSE;
|
config->autosort_enabled = GL_FALSE;
|
||||||
config->fsaa_enabled = GL_FALSE;
|
config->fsaa_enabled = GL_FALSE;
|
||||||
|
|
||||||
config->initial_op_capacity = 1024;
|
config->initial_op_capacity = 1024 * 3;
|
||||||
config->initial_pt_capacity = 512;
|
config->initial_pt_capacity = 512 * 3;
|
||||||
config->initial_tr_capacity = 1024;
|
config->initial_tr_capacity = 1024 * 3;
|
||||||
config->initial_immediate_capacity = 1024;
|
config->initial_immediate_capacity = 1024 * 3;
|
||||||
config->internal_palette_format = GL_RGBA4;
|
config->internal_palette_format = GL_RGBA4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,20 +86,65 @@ void APIENTRY glKosInit() {
|
|||||||
glKosInitEx(&config);
|
glKosInitEx(&config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||||||
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
|
|
||||||
|
GL_FORCE_INLINE bool glIsVertex(const float flags) {
|
||||||
|
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) {
|
||||||
|
TRACE();
|
||||||
|
|
||||||
|
/* Perform perspective divide on each vertex */
|
||||||
|
Vertex* vertex = (Vertex*) src;
|
||||||
|
|
||||||
|
const float h = GetVideoMode()->height;
|
||||||
|
|
||||||
|
while(n--) {
|
||||||
|
__asm__("pref @%0" : : "r"(vertex + 1));
|
||||||
|
|
||||||
|
if(likely(glIsVertex(vertex->flags))) {
|
||||||
|
const float f = MATH_Fast_Invert(vertex->w);
|
||||||
|
|
||||||
|
/* Convert to NDC and apply viewport */
|
||||||
|
vertex->xyz[0] = MATH_fmac(
|
||||||
|
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||||
|
);
|
||||||
|
|
||||||
|
vertex->xyz[1] = h - MATH_fmac(
|
||||||
|
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||||
|
);
|
||||||
|
|
||||||
|
/* Apply depth range */
|
||||||
|
vertex->xyz[2] = MAX(
|
||||||
|
1.0f - MATH_fmac(vertex->xyz[2] * f, 0.5f, 0.5f),
|
||||||
|
PVR_MIN_Z
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
++vertex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void APIENTRY glKosSwapBuffers() {
|
void APIENTRY glKosSwapBuffers() {
|
||||||
TRACE();
|
TRACE();
|
||||||
|
|
||||||
SceneBegin();
|
SceneBegin();
|
||||||
SceneListBegin(GPU_LIST_OP_POLY);
|
SceneListBegin(GPU_LIST_OP_POLY);
|
||||||
|
glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||||
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
|
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||||
SceneListFinish();
|
SceneListFinish();
|
||||||
|
|
||||||
SceneListBegin(GPU_LIST_PT_POLY);
|
SceneListBegin(GPU_LIST_PT_POLY);
|
||||||
|
glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||||
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
|
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||||
SceneListFinish();
|
SceneListFinish();
|
||||||
|
|
||||||
SceneListBegin(GPU_LIST_TR_POLY);
|
SceneListBegin(GPU_LIST_TR_POLY);
|
||||||
|
glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||||
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
|
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||||
SceneListFinish();
|
SceneListFinish();
|
||||||
SceneFinish();
|
SceneFinish();
|
||||||
|
|||||||
@ -255,7 +255,6 @@ typedef struct {
|
|||||||
AlignedVector* extras;
|
AlignedVector* extras;
|
||||||
} SubmissionTarget;
|
} SubmissionTarget;
|
||||||
|
|
||||||
PolyHeader* _glSubmissionTargetHeader(SubmissionTarget* target);
|
|
||||||
Vertex* _glSubmissionTargetStart(SubmissionTarget* target);
|
Vertex* _glSubmissionTargetStart(SubmissionTarget* target);
|
||||||
Vertex* _glSubmissionTargetEnd(SubmissionTarget* target);
|
Vertex* _glSubmissionTargetEnd(SubmissionTarget* target);
|
||||||
|
|
||||||
|
|||||||
@ -74,6 +74,8 @@ void setup() {
|
|||||||
glOrtho(0, 640, 0, 480, -100, 100);
|
glOrtho(0, 640, 0, 480, -100, 100);
|
||||||
glMatrixMode(GL_PROJECTION);
|
glMatrixMode(GL_PROJECTION);
|
||||||
glLoadIdentity();
|
glLoadIdentity();
|
||||||
|
|
||||||
|
glDisable(GL_NEARZ_CLIPPING_KOS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void do_frame() {
|
void do_frame() {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user