From 9cffe14ad69e622efdcc8ec4a9acbff5fbcd1f0e Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 12 May 2023 20:51:36 +0100 Subject: [PATCH] Clean up aligned vector --- GL/draw.c | 13 ++-- GL/draw_fastpath.inc | 2 +- GL/flush.c | 12 ++-- GL/immediate.c | 11 ++- GL/platforms/sh4.c | 12 ++-- GL/platforms/software.c | 2 +- containers/aligned_vector.c | 42 ++++++----- containers/aligned_vector.h | 128 ++++++++++++++++++++++------------ samples/cubes/main.cpp | 15 ++-- samples/quadmark/main.c | 35 +++++----- samples/zclip_triangle/main.c | 2 + 11 files changed, 162 insertions(+), 112 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index aea1f34..3548bd1 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -239,7 +239,7 @@ static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restri float x, y, z; } V; - const static V NegZ = {0.0f, 0.0f, -1.0f}; + static const V NegZ = {0.0f, 0.0f, -1.0f}; *((V*) out) = NegZ; } @@ -391,12 +391,12 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) { } GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) { - gl_assert(target->header_offset < target->output->vector.size); + gl_assert(target->header_offset < aligned_vector_size(&target->output->vector)); return aligned_vector_at(&target->output->vector, target->header_offset); } GL_INLINE_DEBUG Vertex* _glSubmissionTargetStart(SubmissionTarget* target) { - gl_assert(target->start_offset < target->output->vector.size); + gl_assert(target->start_offset < aligned_vector_size(&target->output->vector)); return aligned_vector_at(&target->output->vector, target->start_offset); } @@ -1210,15 +1210,14 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL return; } - GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty(); - - // We don't handle this any further, so just make sure we never pass it down */ gl_assert(mode != GL_POLYGON); target->output = _glActivePolyList(); + GLboolean header_required = (aligned_vector_header(&target->output->vector)->size == 0) || _glGPUStateIsDirty(); + target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; - target->header_offset = target->output->vector.size; + target->header_offset = aligned_vector_header(&target->output->vector)->size; target->start_offset = target->header_offset + (header_required); gl_assert(target->count); diff --git a/GL/draw_fastpath.inc b/GL/draw_fastpath.inc index 82a7014..d483b9d 100644 --- a/GL/draw_fastpath.inc +++ b/GL/draw_fastpath.inc @@ -24,7 +24,7 @@ MAKE_FUNC(POLYMODE) VertexExtra* ve; - for(int min = 0; min < count; min += BATCH_SIZE) { + for(min = 0; min < count; min += BATCH_SIZE) { const Vertex* start = ((Vertex*) _glSubmissionTargetStart(target)) + min; const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE; const int offset = (first + min); diff --git a/GL/flush.c b/GL/flush.c index c328ef6..cf85a4e 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -93,21 +93,21 @@ void APIENTRY glKosSwapBuffers() { TRACE(); SceneBegin(); - if(OP_LIST.vector.size > 2) { + if(aligned_vector_header(&OP_LIST.vector)->size > 2) { SceneListBegin(GPU_LIST_OP_POLY); - SceneListSubmit((Vertex*) OP_LIST.vector.data, OP_LIST.vector.size); + SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector)); SceneListFinish(); } - if(PT_LIST.vector.size > 2) { + if(aligned_vector_header(&PT_LIST.vector)->size > 2) { SceneListBegin(GPU_LIST_PT_POLY); - SceneListSubmit((Vertex*) PT_LIST.vector.data, PT_LIST.vector.size); + SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector)); SceneListFinish(); } - if(TR_LIST.vector.size > 2) { + if(aligned_vector_header(&TR_LIST.vector)->size > 2) { SceneListBegin(GPU_LIST_TR_POLY); - SceneListSubmit((Vertex*) TR_LIST.vector.data, TR_LIST.vector.size); + SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector)); SceneListFinish(); } SceneFinish(); diff --git a/GL/immediate.c b/GL/immediate.c index c0e2adc..69dd7a4 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -50,7 +50,7 @@ void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(IMVertex)); aligned_vector_reserve(&VERTICES, initial_size); - IM_ATTRIBS.vertex.ptr = VERTICES.data; + IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES); IM_ATTRIBS.vertex.size = 3; IM_ATTRIBS.vertex.type = GL_FLOAT; IM_ATTRIBS.vertex.stride = sizeof(IMVertex); @@ -161,12 +161,11 @@ void APIENTRY glColor3fv(const GLfloat* v) { void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - unsigned int cap = VERTICES.capacity; + uint32_t cap = aligned_vector_capacity(&VERTICES); IMVertex* vert = aligned_vector_extend(&VERTICES, 1); - - if(cap != VERTICES.capacity) { + if(cap != aligned_vector_capacity(&VERTICES)) { /* Resizing could've invalidated the pointers */ - IM_ATTRIBS.vertex.ptr = VERTICES.data; + IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES); IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); @@ -281,7 +280,7 @@ void APIENTRY glEnd() { FAST_PATH_ENABLED = GL_TRUE; #endif - glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size); + glDrawArrays(ACTIVE_POLYGON_MODE, 0, aligned_vector_header(&VERTICES)->size); ATTRIB_POINTERS = stashed_attrib_pointers; diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 45cce24..addc6fe 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -50,9 +50,8 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { const float f = _glFastInvert(vertex->w); /* Convert to NDC and apply viewport */ - vertex->xyz[0] = ((vertex->xyz[0] * f) * 320) + 320; - vertex->xyz[1] = ((vertex->xyz[1] * f) * -240) + 240; - vertex->xyz[2] = f; + vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; + vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240; /* Orthographic projections need to use invZ otherwise we lose the depth information. As w == 1, and clip-space range is -w to +w @@ -61,6 +60,8 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { */ if(vertex->w == 1.0f) { vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]); + } else { + vertex->xyz[2] = f; } } @@ -137,12 +138,13 @@ void SceneListSubmit(Vertex* v2, int n) { fprintf(stderr, "----\n"); #endif - uint8_t __attribute__((aligned(32))) visible_mask = 0; - uint8_t __attribute__((aligned(32))) counter = 0; + uint8_t visible_mask = 0; + uint8_t counter = 0; sq = SQ_BASE_ADDRESS; for(int i = 0; i < n; ++i, ++v2) { + PREFETCH(v2 + 1); switch(v2->flags) { case GPU_CMD_VERTEX_EOL: if(counter < 2) { diff --git a/GL/platforms/software.c b/GL/platforms/software.c index bd527c9..6b5a9ee 100644 --- a/GL/platforms/software.c +++ b/GL/platforms/software.c @@ -255,7 +255,7 @@ GL_FORCE_INLINE void ShiftRotateTriangle() { tri_count--; } -void SceneListSubmit(void* src, int n) { +void SceneListSubmit(Vertex* src, int n) { /* Perform perspective divide on each vertex */ Vertex* vertex = (Vertex*) src; diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index 442e0b9..15729ae 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -12,36 +12,44 @@ #include "aligned_vector.h" -extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count); -extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count); -extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count); -extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count); +extern inline void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count); +extern inline void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count); +extern inline void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count); +extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count); -void aligned_vector_init(AlignedVector* vector, unsigned int element_size) { - vector->size = vector->capacity = 0; - vector->element_size = element_size; - vector->data = NULL; +void aligned_vector_init(AlignedVector* vector, uint32_t element_size) { + /* Now initialize the header*/ + AlignedVectorHeader* const hdr = &vector->hdr; + hdr->size = 0; + hdr->capacity = ALIGNED_VECTOR_CHUNK_SIZE; + hdr->element_size = element_size; - /* Reserve some initial capacity */ - aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE); + /* Reserve some initial capacity. This will do the allocation but not set up the header */ + void* ptr = aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE); + assert(ptr); + (void) ptr; } void aligned_vector_shrink_to_fit(AlignedVector* vector) { - if(vector->size == 0) { + AlignedVectorHeader* const hdr = &vector->hdr; + if(hdr->size == 0) { + uint32_t element_size = hdr->element_size; free(vector->data); - vector->data = NULL; - vector->capacity = 0; + + /* Reallocate the header */ + vector->data = memalign(0x20, sizeof(AlignedVectorHeader)); + hdr->size = hdr->capacity = 0; + hdr->element_size = element_size; } else { - unsigned int new_byte_size = vector->size * vector->element_size; - unsigned char* original_data = vector->data; + uint32_t new_byte_size = (hdr->size * hdr->element_size); + uint8_t* original_data = vector->data; vector->data = (unsigned char*) memalign(0x20, new_byte_size); if(original_data) { FASTCPY(vector->data, original_data, new_byte_size); free(original_data); } - - vector->capacity = vector->size; + hdr->capacity = hdr->size; } } diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index d5f361a..5109b8b 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -67,10 +67,14 @@ AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len) #endif typedef struct { - uint8_t* __attribute__((aligned(32))) data; uint32_t size; uint32_t capacity; uint32_t element_size; +} __attribute__((aligned(32))) AlignedVectorHeader; + +typedef struct { + AlignedVectorHeader hdr; + uint8_t* data; } AlignedVector; #define ALIGNED_VECTOR_CHUNK_SIZE 256u @@ -80,91 +84,129 @@ typedef struct { ((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE) -void aligned_vector_init(AlignedVector* vector, unsigned int element_size); +void aligned_vector_init(AlignedVector* vector, uint32_t element_size); -AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { - if(element_count <= vector->capacity) { - return NULL; +AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) { + const AlignedVectorHeader* hdr = &vector->hdr; + assert(index < hdr->size); + return vector->data + (index * hdr->element_size); +} + +AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) { + AlignedVectorHeader* hdr = &vector->hdr; + + if(element_count < hdr->capacity) { + return aligned_vector_at(vector, element_count); } - unsigned int original_byte_size = vector->size * vector->element_size; + uint32_t original_byte_size = (hdr->size * hdr->element_size); /* We overallocate so that we don't make small allocations during push backs */ element_count = ROUND_TO_CHUNK_SIZE(element_count); - unsigned int new_byte_size = element_count * vector->element_size; - unsigned char* original_data = vector->data; + uint32_t new_byte_size = (element_count * hdr->element_size); + uint8_t* original_data = vector->data; - vector->data = (unsigned char*) memalign(0x20, new_byte_size); + vector->data = (uint8_t*) memalign(0x20, new_byte_size); assert(vector->data); - if(original_data) { - AV_MEMCPY4(vector->data, original_data, original_byte_size); - free(original_data); - } - - vector->capacity = element_count; + AV_MEMCPY4(vector->data, original_data, original_byte_size); + free(original_data); + hdr->capacity = element_count; return vector->data + original_byte_size; } -AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) { - assert(index < vector->size); - return &vector->data[index * vector->element_size]; +AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) { + return (AlignedVectorHeader*) &vector->hdr; } -AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) { +AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) { + const AlignedVectorHeader* hdr = &vector->hdr; + return hdr->size; +} + +AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) { + const AlignedVectorHeader* hdr = &vector->hdr; + return hdr->capacity; +} + +AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) { + return vector->data; +} + +/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */ +AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) { void* ret = NULL; - unsigned int previousCount = vector->size; - - if(vector->capacity < element_count) { + AlignedVectorHeader* hdr = &vector->hdr; + uint32_t previous_count = hdr->size; + if(hdr->capacity <= element_count) { /* If we didn't have capacity, increase capacity (slow) */ - vector->size = element_count; - ret = aligned_vector_reserve(vector, element_count); - } else if(previousCount < element_count) { + + aligned_vector_reserve(vector, element_count); + hdr->size = element_count; + + ret = aligned_vector_at(vector, previous_count); + + assert(hdr->size == element_count); + assert(hdr->size <= hdr->capacity); + } else if(previous_count < element_count) { /* So we grew, but had the capacity, just get a pointer to * where we were */ - vector->size = element_count; - ret = aligned_vector_at(vector, previousCount); - } else { - vector->size = element_count; - ret = aligned_vector_at(vector, previousCount); + hdr->size = element_count; + assert(hdr->size < hdr->capacity); + ret = aligned_vector_at(vector, previous_count); + } else if(hdr->size != element_count) { + hdr->size = element_count; + assert(hdr->size < hdr->capacity); } return ret; } -AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) { +AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) { /* Resize enough room */ + AlignedVectorHeader* hdr = &vector->hdr; + assert(count); - assert(vector->element_size); + assert(hdr->element_size); - unsigned int initial_size = vector->size; - aligned_vector_resize(vector, vector->size + count); +#ifndef NDEBUG + uint32_t element_size = hdr->element_size; + uint32_t initial_size = hdr->size; +#endif - assert(vector->size == initial_size + count); - - unsigned char* dest = vector->data + (vector->element_size * initial_size); + uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count); + assert(dest); /* Copy the objects in */ - AV_MEMCPY4(dest, objs, vector->element_size * count); + AV_MEMCPY4(dest, objs, hdr->element_size * count); + assert(hdr->element_size == element_size); + assert(hdr->size == initial_size + count); return dest; } -AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) { - return aligned_vector_resize(vector, vector->size + additional_count); +AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) { + AlignedVectorHeader* hdr = &vector->hdr; + void* ret = aligned_vector_resize(vector, hdr->size + additional_count); + assert(ret); // Should always return something + return ret; } AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){ - vector->size = 0; + AlignedVectorHeader* hdr = &vector->hdr; + hdr->size = 0; } + void aligned_vector_shrink_to_fit(AlignedVector* vector); void aligned_vector_cleanup(AlignedVector* vector); -static inline void* aligned_vector_back(AlignedVector* vector){ - return aligned_vector_at(vector, vector->size - 1); + +AV_FORCE_INLINE void* aligned_vector_back(AlignedVector* vector){ + AlignedVectorHeader* hdr = &vector->hdr; + return aligned_vector_at(vector, hdr->size - 1); } #ifdef __cplusplus diff --git a/samples/cubes/main.cpp b/samples/cubes/main.cpp index eaeeaea..59a9f1b 100644 --- a/samples/cubes/main.cpp +++ b/samples/cubes/main.cpp @@ -1,18 +1,19 @@ + #include #include #include #include -#include "GL/gl.h" -#include "GL/glu.h" #ifdef __DREAMCAST__ #include -#include "GL/glext.h" -#include "GL/glkos.h" - float avgfps = -1; #endif +#include "GL/gl.h" +#include "GL/glkos.h" +#include "GL/glu.h" +#include "GL/glext.h" + #define PI 3.14159265358979323846264338327950288f #define RAD_TO_DEG 57.295779513082320876798154814105f #define MAX_CUBES 350 @@ -251,9 +252,7 @@ float rnd(float Min, float Max) void initialize() { debugLog("Initialize video output"); -#ifdef __DREAMCAST__ glKosInit(); -#endif glClearDepth(1.0); glDepthFunc(GL_LEQUAL); @@ -280,7 +279,7 @@ void initialize() glLoadIdentity(); // Set up colors (each face has a different color) - for (int i = 0; i < 6; i++) + for (int i = 0; i < 6; i++) { faceColors[i * 4] = colors[i]; faceColors[i * 4 + 1] = colors[i]; diff --git a/samples/quadmark/main.c b/samples/quadmark/main.c index 4da3046..e1bdcc9 100644 --- a/samples/quadmark/main.c +++ b/samples/quadmark/main.c @@ -68,21 +68,16 @@ int check_start() { } void setup() { - //PVR needs to warm up for a frame, or results will be low - glKosInit(); + GLdcConfig cfg; + glKosInitConfig(&cfg); + cfg.initial_immediate_capacity = 14000; + glKosInitEx(&cfg); + glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glOrtho(0, 640, 0, 480, -100, 100); glMatrixMode(GL_PROJECTION); glLoadIdentity(); - - glDisable(GL_NEARZ_CLIPPING_KOS); - -#ifdef __DREAMCAST__ - pvr_wait_ready(); - pvr_scene_begin(); - pvr_scene_finish(); -#endif } void do_frame() { @@ -116,6 +111,8 @@ time_t begin; void switch_tests(int ppf) { printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", ppf * 2, ppf * 2 * 60); + fflush(stdout); + avgfps = -1; polycnt = ppf; } @@ -128,7 +125,6 @@ void check_switch() { if(now >= (begin + 5)) { begin = time(NULL); printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2)); - switch(phase) { case PHASE_HALVE: @@ -169,19 +165,24 @@ void check_switch() { case PHASE_FINAL: break; } + + fflush(stdout); } } +#define PROFILE 0 + int main(int argc, char **argv) { -#ifndef NDEBUG -#ifdef __DREAMCAST__ +#if PROFILE profiler_init("/pc/gmon.out"); - profiler_start(); -#endif #endif setup(); +#if PROFILE + profiler_start(); +#endif + /* Start off with something obscene */ switch_tests(200000 / 60); begin = time(NULL); @@ -200,11 +201,9 @@ int main(int argc, char **argv) { stats(); -#ifdef __DREAMCAST__ -#ifndef NDEBUG +#if PROFILE profiler_stop(); profiler_clean_up(); -#endif #endif return 0; diff --git a/samples/zclip_triangle/main.c b/samples/zclip_triangle/main.c index 92482ab..1cae41a 100644 --- a/samples/zclip_triangle/main.c +++ b/samples/zclip_triangle/main.c @@ -28,6 +28,8 @@ void InitGL(int Width, int Height) // We call this right after our OpenG glMatrixMode(GL_MODELVIEW); glLoadIdentity(); + + glEnable(GL_CULL_FACE); } /* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */