Optimise non-indexed rendering paths

This commit is contained in:
Luke Benstead 2018-10-08 22:03:50 +01:00
parent 52e1be498e
commit 18759422ea
5 changed files with 466 additions and 86 deletions

348
GL/draw.c
View File

@ -460,6 +460,178 @@ static inline PolyBuildFunc _calcBuildFunc(const GLenum type) {
return &_buildStrip;
}
static inline void genArraysCommon(
ClipVertex* output,
GLsizei count,
const GLubyte* vptr, GLuint vstride,
const GLubyte* cptr, GLuint cstride,
const GLubyte* uvptr, GLuint uvstride,
const GLubyte* stptr, GLuint ststride,
const GLubyte* nptr, GLuint nstride,
GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting
) {
const FloatParseFunc vertexFunc = _calcVertexParseFunc();
const ByteParseFunc diffuseFunc = _calcDiffuseParseFunc();
const FloatParseFunc uvFunc = _calcUVParseFunc();
const FloatParseFunc stFunc = _calcSTParseFunc();
const FloatParseFunc normalFunc = _calcNormalParseFunc();
GLsizei i = count;
ClipVertex* vertex = output;
while(i--) {
vertex->flags = PVR_CMD_VERTEX;
vertexFunc(vertex->xyz, vptr);
vptr += vstride;
vertex++;
}
i = count;
vertex = output;
while(i--) {
diffuseFunc(vertex->bgra, cptr);
cptr += cstride;
vertex++;
}
if(doTexture) {
i = count;
vertex = output;
while(i--) {
uvFunc(vertex->uv, uvptr);
uvptr += uvstride;
vertex++;
}
}
if(doMultitexture) {
i = count;
vertex = output;
while(i--) {
stFunc(vertex->st, stptr);
stptr += ststride;
++vertex;
}
}
if(doLighting) {
i = count;
vertex = output;
while(i--) {
normalFunc(vertex->nxyz, nptr);
nptr += nstride;
++vertex;
}
}
}
static inline void genArraysTriangles(
ClipVertex* output,
GLsizei count,
const GLubyte* vptr, GLuint vstride,
const GLubyte* cptr, GLuint cstride,
const GLubyte* uvptr, GLuint uvstride,
const GLubyte* stptr, GLuint ststride,
const GLubyte* nptr, GLuint nstride,
GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) {
genArraysCommon(
output, count,
vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride,
doTexture, doMultitexture, doLighting
);
GLsizei i = count;
ClipVertex* vertex = output;
for(i = 2; i < count; i += 3) {
vertex[i].flags = PVR_CMD_VERTEX_EOL;
}
}
static void genArraysQuads(
ClipVertex* output,
GLsizei count,
const GLubyte* vptr, GLuint vstride,
const GLubyte* cptr, GLuint cstride,
const GLubyte* uvptr, GLuint uvstride,
const GLubyte* stptr, GLuint ststride,
const GLubyte* nptr, GLuint nstride,
GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) {
genArraysCommon(
output, count,
vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride,
doTexture, doMultitexture, doLighting
);
GLsizei i = count;
ClipVertex* vertex = output;
for(i = 3; i < count; i += 4) {
swapVertex(&vertex[i], &vertex[i - 1]);
vertex[i].flags = PVR_CMD_VERTEX_EOL;
}
}
static void genArraysTriangleStrip(
ClipVertex* output,
GLsizei count,
const GLubyte* vptr, GLuint vstride,
const GLubyte* cptr, GLuint cstride,
const GLubyte* uvptr, GLuint uvstride,
const GLubyte* stptr, GLuint ststride,
const GLubyte* nptr, GLuint nstride,
GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) {
genArraysCommon(
output, count,
vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride,
doTexture, doMultitexture, doLighting
);
output[count - 1].flags = PVR_CMD_VERTEX_EOL;
}
static void genArraysTriangleFan(
ClipVertex* output,
GLsizei count,
const GLubyte* vptr, GLuint vstride,
const GLubyte* cptr, GLuint cstride,
const GLubyte* uvptr, GLuint uvstride,
const GLubyte* stptr, GLuint ststride,
const GLubyte* nptr, GLuint nstride,
GLboolean doTexture, GLboolean doMultitexture, GLboolean doLighting) {
genArraysCommon(
output, count,
vptr, vstride, cptr, cstride, uvptr, uvstride, stptr, ststride, nptr, nstride,
doTexture, doMultitexture, doLighting
);
swapVertex(&output[1], &output[2]);
output[2].flags = PVR_CMD_VERTEX_EOL;
GLsizei i = 3;
ClipVertex* first = &output[0];
for(; i < count - 1; ++i) {
ClipVertex* next = &output[i + 1];
ClipVertex* previous = &output[i - 1];
ClipVertex* vertex = &output[i];
*next = *first;
swapVertex(next, vertex);
vertex = next + 1;
*vertex = *previous;
vertex->flags = PVR_CMD_VERTEX_EOL;
}
}
static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLsizei count,
const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) {
/* Read from the client buffers and generate an array of ClipVertices */
@ -470,6 +642,65 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first,
const GLuint ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
if(!indices) {
const GLubyte* vptr = VERTEX_POINTER.ptr + (first * vstride);
const GLubyte* cptr = DIFFUSE_POINTER.ptr + (first * cstride);
const GLubyte* uvptr = UV_POINTER.ptr + (first * uvstride);
const GLubyte* stptr = ST_POINTER.ptr + (first * ststride);
const GLubyte* nptr = NORMAL_POINTER.ptr + (first * nstride);
// Drawing arrays
switch(mode) {
case GL_TRIANGLES:
genArraysTriangles(
output,
count,
vptr, vstride,
cptr, cstride,
uvptr, uvstride,
stptr, ststride,
nptr, nstride,
doTexture, doMultitexture, doLighting
);
case GL_QUADS:
genArraysQuads(
output,
count,
vptr, vstride,
cptr, cstride,
uvptr, uvstride,
stptr, ststride,
nptr, nstride,
doTexture, doMultitexture, doLighting
);
case GL_TRIANGLE_FAN:
genArraysTriangleFan(
output,
count,
vptr, vstride,
cptr, cstride,
uvptr, uvstride,
stptr, ststride,
nptr, nstride,
doTexture, doMultitexture, doLighting
);
case GL_TRIANGLE_STRIP:
default:
genArraysTriangleStrip(
output,
count,
vptr, vstride,
cptr, cstride,
uvptr, uvstride,
stptr, ststride,
nptr, nstride,
doTexture, doMultitexture, doLighting
);
}
return;
}
const GLsizei max = first + count;
ClipVertex* vertex = output;
@ -494,102 +725,47 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first,
GLsizei i, j = 0;
GLuint idx;
if(!indices) {
GLubyte* vptr = VERTEX_POINTER.ptr + (first * vstride);
GLubyte* cptr = DIFFUSE_POINTER.ptr + (first * cstride);
GLubyte* uvptr = UV_POINTER.ptr + (first * uvstride);
GLubyte* stptr = ST_POINTER.ptr + (first * ststride);
GLubyte* nptr = NORMAL_POINTER.ptr + (first * nstride);
for(j = 0; j < count; ++j, ++vertex) {
if(mode == GL_QUADS) {
/* Performance optimisation to prevent copying to a temporary */
GLsizei mod = (j + 1) % 4;
if(mod == 0) {
target = vertex - 1;
target->flags = PVR_CMD_VERTEX;
} else if(mod == 3) {
target = vertex + 1;
target->flags = PVR_CMD_VERTEX_EOL;
} else {
target = vertex;
target->flags = PVR_CMD_VERTEX;
}
for(i = first; i < max; ++i, ++j, ++vertex) {
if(mode == GL_QUADS) {
/* Performance optimisation to prevent copying to a temporary */
GLsizei mod = (j + 1) % 4;
if(mod == 0) {
target = vertex - 1;
target->flags = PVR_CMD_VERTEX;
} else if(mod == 3) {
target = vertex + 1;
target->flags = PVR_CMD_VERTEX_EOL;
} else {
target = vertex;
target->flags = PVR_CMD_VERTEX;
}
vertexFunc(target->xyz, vptr);
diffuseFunc(target->bgra, cptr);
vptr += vstride;
cptr += cstride;
if(doTexture) {
uvFunc(target->uv, uvptr);
uvptr += uvstride;
}
if(doMultitexture) {
stFunc(target->st, stptr);
stptr += ststride;
}
if(doLighting) {
normalFunc(target->nxyz, nptr);
nptr += nstride;
}
if(mode != GL_QUADS) {
next = (j < count - 1) ? vertex + 1 : NULL;
previous = (j > 0) ? vertex - 1 : NULL;
buildFunc(firstV, previous, vertex, next, j);
}
} else {
target = vertex;
target->flags = PVR_CMD_VERTEX;
}
} else {
for(i = first; i < max; ++i, ++j, ++vertex) {
if(mode == GL_QUADS) {
/* Performance optimisation to prevent copying to a temporary */
GLsizei mod = (j + 1) % 4;
if(mod == 0) {
target = vertex - 1;
target->flags = PVR_CMD_VERTEX;
} else if(mod == 3) {
target = vertex + 1;
target->flags = PVR_CMD_VERTEX_EOL;
} else {
target = vertex;
target->flags = PVR_CMD_VERTEX;
}
} else {
target = vertex;
target->flags = PVR_CMD_VERTEX;
}
idx = (indices) ?
indexFunc(&indices[type_byte_size * i]) : i;
idx = (indices) ?
indexFunc(&indices[type_byte_size * i]) : i;
vertexFunc(target->xyz, VERTEX_POINTER.ptr + (idx * vstride));
diffuseFunc(target->bgra, DIFFUSE_POINTER.ptr + (idx * cstride));
vertexFunc(target->xyz, VERTEX_POINTER.ptr + (idx * vstride));
diffuseFunc(target->bgra, DIFFUSE_POINTER.ptr + (idx * cstride));
if(doTexture) {
uvFunc(target->uv, UV_POINTER.ptr + (idx * uvstride));
}
if(doTexture) {
uvFunc(target->uv, UV_POINTER.ptr + (idx * uvstride));
}
if(doMultitexture) {
stFunc(target->st, ST_POINTER.ptr + (idx * ststride));
}
if(doMultitexture) {
stFunc(target->st, ST_POINTER.ptr + (idx * ststride));
}
if(doLighting) {
normalFunc(target->nxyz, NORMAL_POINTER.ptr + (idx * nstride));
}
if(doLighting) {
normalFunc(target->nxyz, NORMAL_POINTER.ptr + (idx * nstride));
}
if(mode != GL_QUADS) {
next = (j < count - 1) ? vertex + 1 : NULL;
previous = (j > 0) ? vertex - 1 : NULL;
buildFunc(firstV, previous, vertex, next, j);
}
if(mode != GL_QUADS) {
next = (j < count - 1) ? vertex + 1 : NULL;
previous = (j > 0) ? vertex - 1 : NULL;
buildFunc(firstV, previous, vertex, next, j);
}
}
}
@ -827,7 +1003,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ
/* Clipping may have realloc'd so reset the start pointer */
start = ((ClipVertex*) activeList->vector.data) + startOffset;
header = start - 1; /* Update the header pointer */
header = (PVRHeader*) (start - 1); /* Update the header pointer */
#if DEBUG_CLIPPING
fprintf(stderr, "--------\n");

View File

@ -19,4 +19,5 @@ all:
$(KOS_MAKE) -C zclip_trianglestrip all
$(KOS_MAKE) -C terrain all
$(KOS_MAKE) -C quadmark all
$(KOS_MAKE) -C trimark all
$(KOS_MAKE) -C multitexture_arrays all

29
samples/trimark/Makefile Normal file
View File

@ -0,0 +1,29 @@
TARGET = trimark.elf
OBJS = main.o
all: rm-elf $(TARGET)
include $(KOS_BASE)/Makefile.rules
clean:
-rm -f $(TARGET) $(OBJS) romdisk.*
rm-elf:
-rm -f $(TARGET) romdisk.*
$(TARGET): $(OBJS) romdisk.o
$(KOS_CC) $(KOS_CFLAGS) $(KOS_LDFLAGS) -o $(TARGET) $(KOS_START) \
$(OBJS) romdisk.o $(OBJEXTRA) -lm -lkosutils $(KOS_LIBS)
romdisk.img:
$(KOS_GENROMFS) -f romdisk.img -d romdisk -v
romdisk.o: romdisk.img
$(KOS_BASE)/utils/bin2o/bin2o romdisk.img romdisk romdisk.o
run: $(TARGET)
$(KOS_LOADER) $(TARGET)
dist:
rm -f $(OBJS) romdisk.o romdisk.img
$(KOS_STRIP) $(TARGET)

174
samples/trimark/main.c Normal file
View File

@ -0,0 +1,174 @@
/*
KallistiGL 2.0.0
quadmark.c
(c)2018 Luke Benstead
(c)2014 Josh Pearson
(c)2002 Dan Potter, Paul Boese
*/
#include <kos.h>
#include <GL/gl.h>
#include <stdlib.h>
#include <time.h>
enum { PHASE_HALVE, PHASE_INCR, PHASE_DECR, PHASE_FINAL };
int polycnt;
int phase = PHASE_HALVE;
float avgfps = -1;
void running_stats() {
pvr_stats_t stats;
pvr_get_stats(&stats);
if(avgfps == -1)
avgfps = stats.frame_rate;
else
avgfps = (avgfps + stats.frame_rate) / 2.0f;
}
void stats() {
pvr_stats_t stats;
pvr_get_stats(&stats);
dbglog(DBG_DEBUG, "3D Stats: %d VBLs, frame rate ~%f fps\n",
stats.vbl_count, stats.frame_rate);
}
int check_start() {
maple_device_t *cont;
cont_state_t *state;
cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER);
if(cont) {
state = (cont_state_t *)maple_dev_status(cont);
if(state)
return state->buttons & CONT_START;
}
return 0;
}
pvr_poly_hdr_t hdr;
void setup() {
glKosInit();
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glOrtho(0, 640, 0, 480, -100, 100);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
}
void do_frame() {
int x, y, z;
int size;
int i;
float col;
glBegin(GL_TRIANGLES);
for(i = 0; i < polycnt; i++) {
x = rand() % 640;
y = rand() % 480;
z = rand() % 100 + 1;
size = rand() % 50 + 1;
col = (rand() % 255) * 0.00391f;
glColor3f(col, col, col);
glVertex3f(x - size, y - size, z);
glVertex3f(x + size, y - size, z);
glVertex3f(x + size, y + size, z);
}
glEnd();
glKosSwapBuffers();
}
time_t start;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
avgfps = -1;
polycnt = ppf;
}
void check_switch() {
time_t now;
now = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
case PHASE_HALVE:
if(avgfps < 55) {
switch_tests(polycnt / 1.2f);
}
else {
printf(" Entering PHASE_INCR\n");
phase = PHASE_INCR;
}
break;
case PHASE_INCR:
if(avgfps >= 55) {
switch_tests(polycnt + 15);
}
else {
printf(" Entering PHASE_DECR\n");
phase = PHASE_DECR;
}
break;
case PHASE_DECR:
if(avgfps < 55) {
switch_tests(polycnt - 30);
}
else {
printf(" Entering PHASE_FINAL\n");
phase = PHASE_FINAL;
}
break;
case PHASE_FINAL:
break;
}
}
}
int main(int argc, char **argv) {
setup();
/* Start off with something obscene */
switch_tests(220000 / 60);
start = time(NULL);
for(;;) {
if(check_start())
break;
printf(" \r");
do_frame();
running_stats();
check_switch();
}
stats();
return 0;
}

View File