From 8b834ac3f16bffd4f4156423fff94f2e330dd8e8 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Tue, 21 Aug 2018 09:37:19 +0100 Subject: [PATCH] Avoid an additional copy of every vertex. Fixes #22 --- GL/clip.c | 8 +-- GL/clip.h | 2 +- GL/draw.c | 130 ++++++++++++++++++------------------ GL/flush.c | 5 ++ GL/profiler.c | 2 +- containers/aligned_vector.c | 4 +- containers/aligned_vector.h | 2 +- 7 files changed, 81 insertions(+), 72 deletions(-) diff --git a/GL/clip.c b/GL/clip.c index 24ca208..672dfa1 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -72,8 +72,8 @@ static inline void interpolateColour(const uint8_t* v1, const uint8_t* v2, const const uint32_t VERTEX_CMD_EOL = 0xf0000000; const uint32_t VERTEX_CMD = 0xe0000000; -void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) __attribute__((optimize("fast-math"))); -void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { +void clipTriangleStrip(const ClipVertex* vertices, const unsigned int count, AlignedVector* outBuffer) __attribute__((optimize("fast-math"))); +void clipTriangleStrip(const ClipVertex* vertices, const unsigned int count, AlignedVector* outBuffer) { /* Clipping triangle strips is *hard* this is the algorithm we follow: * @@ -90,9 +90,9 @@ void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer) { uint32_t i; uint32_t stripCount = 2; /* The number of vertices in the source strip so far */ - ClipVertex* thisVertex = aligned_vector_at(vertices, 1); + ClipVertex* thisVertex = vertices + 1; - for(i = 2; i < vertices->size; ++i) { + for(i = 2; i < count; ++i) { ++thisVertex; if(stripCount < 2) { diff --git a/GL/clip.h b/GL/clip.h index 4035162..44a3357 100644 --- a/GL/clip.h +++ b/GL/clip.h @@ -40,7 +40,7 @@ typedef struct { } ClipVertex; void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); -void clipTriangleStrip(AlignedVector* vertices, AlignedVector* outBuffer); +void clipTriangleStrip(const ClipVertex* vertices, const unsigned int count, AlignedVector* outBuffer); #ifdef __cplusplus } diff --git a/GL/draw.c b/GL/draw.c index 8b72b0b..a61e1cf 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -448,7 +448,7 @@ static inline PolyBuildFunc _calcBuildFunc(const GLenum type) { return &_buildStrip; } -static void generate(AlignedVector* output, const GLenum mode, const GLsizei first, const GLsizei count, +static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLsizei count, const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) { /* Read from the client buffers and generate an array of ClipVertices */ @@ -459,10 +459,8 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir const GLuint nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); const GLsizei max = first + count; - const GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; - /* Make sure we have room for the output */ - ClipVertex* vertex = aligned_vector_resize(output, spaceNeeded); + ClipVertex* vertex = output; const FloatParseFunc vertexFunc = _calcVertexParseFunc(); const ByteParseFunc diffuseFunc = _calcDiffuseParseFunc(); @@ -584,14 +582,14 @@ static void generate(AlignedVector* output, const GLenum mode, const GLsizei fir } } -static void transform(AlignedVector* vertices) { +static void transform(ClipVertex* output, const GLsizei count) { /* Perform modelview transform, storing W */ - ClipVertex* vertex = (ClipVertex*) vertices->data; + ClipVertex* vertex = output; _applyRenderMatrix(); /* Apply the Render Matrix Stack */ - GLsizei i = vertices->size; + GLsizei i = count; while(i--) { register float __x __asm__("fr12") = (vertex->xyz[0]); register float __y __asm__("fr13") = (vertex->xyz[1]); @@ -614,7 +612,7 @@ static void transform(AlignedVector* vertices) { } } -static void clip(AlignedVector* vertices) { +static GLsizei clip(AlignedVector* polylist, ClipVertex* output, const GLsizei count) { /* Perform clipping, generating new vertices as necessary */ static AlignedVector* CLIP_BUFFER = NULL; @@ -626,17 +624,33 @@ static void clip(AlignedVector* vertices) { } /* Make sure we allocate roughly enough space */ - aligned_vector_reserve(CLIP_BUFFER, vertices->size * 1.5); + aligned_vector_reserve(CLIP_BUFFER, count * 1.5); /* Start from empty */ aligned_vector_resize(CLIP_BUFFER, 0); /* Now perform clipping! */ - clipTriangleStrip(vertices, CLIP_BUFFER); + clipTriangleStrip(output, count, CLIP_BUFFER); + + /* Calculate the new required size for the poly list. This is the original size + * plus the difference in size between the original vertex count and the clip buffer + * count */ + GLsizei newSize = polylist->size + (CLIP_BUFFER->size - count); /* Copy the clip buffer over the vertices */ - aligned_vector_resize(vertices, CLIP_BUFFER->size); - memcpy(vertices->data, CLIP_BUFFER->data, CLIP_BUFFER->size * CLIP_BUFFER->element_size); + aligned_vector_resize(polylist, newSize); + + GLsizei i = CLIP_BUFFER->size; + ClipVertex* dst = output; + ClipVertex* src = (ClipVertex*) CLIP_BUFFER->data; + while(i--) { + *dst = *src; + ++dst; + ++src; + } + + /* Return the new vertex count */ + return CLIP_BUFFER->size; } static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { @@ -671,7 +685,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u } } -static void light(AlignedVector* vertices) { +static void light(ClipVertex* output, const GLsizei count) { if(!isLightingEnabled()) { return; } @@ -688,22 +702,22 @@ static void light(AlignedVector* vertices) { aligned_vector_init(eye_space_data, sizeof(EyeSpaceData)); } - aligned_vector_resize(eye_space_data, vertices->size); + aligned_vector_resize(eye_space_data, count); /* Perform lighting calculations and manipulate the colour */ - ClipVertex* vertex = (ClipVertex*) vertices->data; + ClipVertex* vertex = output; EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data; _matrixLoadModelView(); - mat_transform3(vertex->xyz, eye_space->xyz, vertices->size, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform3(vertex->xyz, eye_space->xyz, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); _matrixLoadNormal(); - mat_transform_normal3(vertex->nxyz, eye_space->n, vertices->size, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform_normal3(vertex->nxyz, eye_space->n, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); GLsizei i; EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0); - for(i = 0; i < vertices->size; ++i, ++vertex, ++ES) { + for(i = 0; i < count; ++i, ++vertex, ++ES) { /* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled * then the lighting calculation should possibly take it into account */ @@ -728,11 +742,11 @@ static void light(AlignedVector* vertices) { } } -static void divide(AlignedVector* vertices) { +static void divide(ClipVertex* output, const GLsizei count) { /* Perform perspective divide on each vertex */ - ClipVertex* vertex = (ClipVertex*) vertices->data; + ClipVertex* vertex = output; - GLsizei i = vertices->size; + GLsizei i = count; while(i--) { vertex->xyz[2] = 1.0f / vertex->w; vertex->xyz[0] *= vertex->xyz[2]; @@ -741,16 +755,8 @@ static void divide(AlignedVector* vertices) { } } -static void push(const AlignedVector* vertices, PolyList* activePolyList, GLshort textureUnit) { - /* Copy the vertices to the active poly list */ - - // Make room for the element + the header - ClipVertex* dst = (ClipVertex*) aligned_vector_extend(&activePolyList->vector, vertices->size + 1); - - // Store a pointer to the header - PVRHeader* header = (PVRHeader*) dst; - - // Compile +static void push(PVRHeader* header, ClipVertex* output, const GLsizei count, PolyList* activePolyList, GLshort textureUnit) { + // Compile the header pvr_poly_cxt_t cxt = *getPVRContext(); cxt.list_type = activePolyList->list_type; @@ -758,26 +764,16 @@ static void push(const AlignedVector* vertices, PolyList* activePolyList, GLshor pvr_poly_compile(&header->hdr, &cxt); - // Point dest at the first new vertex to populate, if we're not sending a header - // we won't increment and instead overwrite the header we just created with the - // first vertex - dst++; + /* Post-process the vertex list */ + ClipVertex* vout = output; - ClipVertex* vin = aligned_vector_at(vertices, 0); - ClipVertex* vout = dst; - - GLuint i = vertices->size; + GLuint i = count; while(i--) { - vin->oargb = 0; - *vout = *vin; - ++vout; - ++vin; + vout->oargb = 0; } } static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum type, const GLvoid* indices) { - static AlignedVector* buffer = NULL; - /* Do nothing if vertices aren't enabled */ if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { return; @@ -797,43 +793,41 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ profiler_push(__func__); - /* Initialize the buffer on first call */ - if(!buffer) { - buffer = (AlignedVector*) malloc(sizeof(AlignedVector)); - aligned_vector_init(buffer, sizeof(ClipVertex)); - /* Reserve 64k up-front */ - aligned_vector_reserve(buffer, 64 * 1024); - } else { - /* Else, resize to zero (this will retain the allocated memory) */ - aligned_vector_resize(buffer, 0); - } + PolyList* activeList = activePolyList(); + + /* Make room in the list buffer */ + GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + ClipVertex* start = aligned_vector_extend(&activeList->vector, spaceNeeded + 1); + + /* Store a pointer to the header for later */ + PVRHeader* header = (PVRHeader*) start++; profiler_checkpoint("allocate"); - generate(buffer, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); + generate(start, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); profiler_checkpoint("generate"); - light(buffer); + light(start, spaceNeeded); profiler_checkpoint("light"); - transform(buffer); + transform(start, spaceNeeded); profiler_checkpoint("transform"); if(isClippingEnabled()) { - clip(buffer); + spaceNeeded = clip(&activeList->vector, start, spaceNeeded); } profiler_checkpoint("clip"); - divide(buffer); + divide(start, spaceNeeded); profiler_checkpoint("divide"); - push(buffer, activePolyList(), 0); + push(header, start, spaceNeeded, activePolyList(), 0); profiler_checkpoint("push"); /* @@ -858,10 +852,18 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ return; } - ClipVertex* vertex = (ClipVertex*) aligned_vector_at(buffer, 0); + /* Push back a copy of the list to the transparent poly list, including the header + (hence the - 1) + */ + ClipVertex* vertex = aligned_vector_push_back( + &transparentPolyList()->vector, start - 1, spaceNeeded + 1 + ); + + PVRHeader* mtHeader = (PVRHeader*) vertex++; + ClipVertex* mtStart = vertex; /* Copy ST coordinates to UV ones */ - GLsizei i = buffer->size; + GLsizei i = spaceNeeded; while(i--) { vertex->uv[0] = vertex->st[0]; vertex->uv[1] = vertex->st[1]; @@ -882,7 +884,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLsizei count, GLenum typ glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); /* Send the buffer again to the transparent list */ - push(buffer, transparentPolyList(), 1); + push(mtHeader, mtStart, spaceNeeded, transparentPolyList(), 1); /* Reset state */ glDepthFunc(depthFunc); diff --git a/GL/flush.c b/GL/flush.c index f38f592..23cff65 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -92,6 +92,8 @@ void APIENTRY glKosSwapBuffers() { TRACE(); + profiler_push(__func__); + pvr_wait_ready(); pvr_scene_begin(); @@ -112,6 +114,9 @@ void APIENTRY glKosSwapBuffers() { aligned_vector_clear(&PT_LIST.vector); aligned_vector_clear(&TR_LIST.vector); + profiler_checkpoint("scene"); + profiler_pop(); + if(frame_count++ > 100) { profiler_print_stats(); frame_count = 0; diff --git a/GL/profiler.c b/GL/profiler.c index c61c8a9..bc51cf0 100644 --- a/GL/profiler.c +++ b/GL/profiler.c @@ -138,6 +138,6 @@ void profiler_print_stats() { float ms = ((float) result->total_time_us) / 1000.0f; float avg = ms / (float) result->total_calls; - fprintf(stderr, "%-60s%-20f%-20f%d\n", result->name, avg, ms, result->total_calls); + fprintf(stderr, "%-60s%-20f%-20f%u\n", result->name, avg, ms, result->total_calls); } } diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index c7294e9..60638f6 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -38,7 +38,7 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { vector->capacity = element_count; } -void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) { +void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) { /* Resize enough room */ unsigned int initial_size = vector->size; @@ -48,6 +48,8 @@ void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned /* Copy the objects in */ memcpy(dest, objs, vector->element_size * count); + + return dest; } void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) { diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index e549bc9..4318ffb 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -16,7 +16,7 @@ typedef struct { void aligned_vector_init(AlignedVector* vector, unsigned int element_size); void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count); -void aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count); +void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count); void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count); void* aligned_vector_at(const AlignedVector* vector, const unsigned int index); void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);