From edbff323403b5d733af16fc9415f202eafbbef39 Mon Sep 17 00:00:00 2001 From: Hayden K <819028+mrneo240@users.noreply.github.com> Date: Tue, 12 Mar 2019 23:47:54 -0400 Subject: [PATCH] - changes to allocation and mem mgmt - Replace some mem* functions with store queues - fix capacity of AlignedVector to 32 intervals - turn profiler fully compile only --- GL/draw.c | 9 ++++++++- GL/flush.c | 25 ++++++++++++++++++++++--- GL/immediate.c | 5 +++++ GL/private.h | 2 +- GL/profiler.c | 2 +- GL/profiler.h | 4 +++- GL/texture.c | 4 +++- containers/aligned_vector.c | 18 +++++++++++++++++- containers/named_array.c | 10 +++++++--- include/glkos.h | 4 ++++ 10 files changed, 71 insertions(+), 12 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index a16a7ea..3441b1a 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -783,7 +783,14 @@ static void genArraysTriangleFan( return; } - memcpy(buffer, output, sizeof(ClipVertex) * count); + if( !((uint32)&buffer % 32) && !((uint32)&output % 32)){ + int temp = sizeof(ClipVertex) *count; + if (temp % 4) + temp = (temp & 0xfffffffc) + 4; + sq_cpy(buffer, output, temp); + } else { + memcpy(buffer, output, sizeof(ClipVertex) * count); + } // First 3 vertices are in the right place, just end early output[2].flags = PVR_CMD_VERTEX_EOL; diff --git a/GL/flush.c b/GL/flush.c index 096c73e..dd1881a 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -94,7 +94,7 @@ void APIENTRY glKosInitEx(GLdcConfig* config) { _glInitImmediateMode(); _glInitFramebuffers(); - _glSetInternalPaletteFormat(GL_RGBA4); + _glSetInternalPaletteFormat(config->internal_palette_format); _glInitTextures(); @@ -118,9 +118,10 @@ void APIENTRY glKosInit() { } #define QACRTA ((((unsigned int)0x10000000)>>26)<<2)&0x1c +static int frame_count = 0; void APIENTRY glKosSwapBuffers() { - static int frame_count = 0; + TRACE(); @@ -152,8 +153,26 @@ void APIENTRY glKosSwapBuffers() { PROFILER_CHECKPOINT("scene"); PROFILER_POP(); +#if PROFILER_COMPILE if(++frame_count > 49) { - profiler_print_stats(); + PROFILER_PRINT_STATS(); frame_count = 0; } +#endif +} + + +void APIENTRY glKosReserveOPList(unsigned int elements){ + aligned_vector_reserve(&OP_LIST.vector, elements); + aligned_vector_reserve(_glKosINTERNALGetVertices(), elements/3); +} + +void APIENTRY glKosReservePTList(unsigned int elements){ + aligned_vector_reserve(&PT_LIST.vector, elements); + aligned_vector_reserve(_glKosINTERNALGetVertices(), elements/3); +} + +void APIENTRY glKosReserveTRList(unsigned int elements){ + aligned_vector_reserve(&TR_LIST.vector, elements); + aligned_vector_reserve(_glKosINTERNALGetVertices(), elements/3); } diff --git a/GL/immediate.c b/GL/immediate.c index 0c7e46c..387baf7 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -259,3 +259,8 @@ void APIENTRY glRecti(GLint x1, GLint y1, GLint x2, GLint y2) { void APIENTRY glRectiv(const GLint *v1, const GLint *v2) { return glRectfv((const GLfloat *)v1, (const GLfloat *)v2); } + + +AlignedVector* APIENTRY _glKosINTERNALGetVertices(){ + return &VERTICES; +} \ No newline at end of file diff --git a/GL/private.h b/GL/private.h index 29392a1..239e856 100644 --- a/GL/private.h +++ b/GL/private.h @@ -131,7 +131,7 @@ typedef struct { GLint size; } AttribPointer; -GLboolean _glCheckValidEnum(GLint param, GLenum* values, const char* func); +GLboolean _glCheckValidEnum(GLenum param, GLenum* values, const char* func); GLuint _glGetEnabledAttributes(); AttribPointer* _glGetVertexAttribPointer(); diff --git a/GL/profiler.c b/GL/profiler.c index 49d353b..9811670 100644 --- a/GL/profiler.c +++ b/GL/profiler.c @@ -6,7 +6,7 @@ #include "profiler.h" #include "private.h" -#ifdef PROFILER_COMPILE +#if PROFILER_COMPILE #include "../containers/aligned_vector.h" #define MAX_PATH 256 diff --git a/GL/profiler.h b/GL/profiler.h index 15216e8..69c307d 100644 --- a/GL/profiler.h +++ b/GL/profiler.h @@ -2,11 +2,12 @@ #include -#define PROFILER_COMPILE 1 +#define PROFILER_COMPILE 0 #if PROFILER_COMPILE #define PROFILER_PUSH(S) profiler_push(S) #define PROFILER_CHECKPOINT(P) profiler_checkpoint(P) #define PROFILER_POP() profiler_pop() +#define PROFILER_PRINT_STATS() profiler_print_stats() void profiler_enable(); void profiler_disable(); @@ -25,4 +26,5 @@ void profiler_print_stats(); #define PROFILER_PUSH(S) #define PROFILER_CHECKPOINT(P) #define PROFILER_POP() +#define PROFILER_PRINT_STATS() #endif diff --git a/GL/texture.c b/GL/texture.c index 4a94a86..5bc6af0 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -35,7 +35,9 @@ static GLenum INTERNAL_PALETTE_FORMAT = GL_RGBA4; static TexturePalette* _initTexturePalette() { TexturePalette* palette = (TexturePalette*) malloc(sizeof(TexturePalette)); assert(palette); - memset(palette, 0x0, sizeof(TexturePalette)); + + //memset(palette, 0x0, sizeof(TexturePalette)); + sq_clr(palette, (sizeof(TexturePalette) & 0xfffffffc) + 4); palette->data = NULL; palette->format = 0; diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index 011d0df..e79b0c8 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -23,10 +23,16 @@ void aligned_vector_init(AlignedVector* vector, unsigned int element_size) { aligned_vector_reserve(vector, ALIGNED_VECTOR_INITIAL_CAPACITY); } +#define VECTOR_ALIGN_COUNT 256 +#define _VECTOR_ALIGN_COUNT (VECTOR_ALIGN_COUNT - 1) + void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { if(element_count <= vector->capacity) { return; } + element_count = ((element_count+_VECTOR_ALIGN_COUNT) & ~_VECTOR_ALIGN_COUNT); + //printf("List growing from %d -> %d\n",vector->size,element_count); + unsigned int original_byte_size = vector->size * vector->element_size; unsigned int new_byte_size = element_count * vector->element_size; @@ -34,7 +40,14 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { vector->data = (unsigned char*) memalign(0x20, new_byte_size); if(original_data) { - memcpy(vector->data, original_data, original_byte_size); + if( !(*vector->data % 32) && !(*original_data % 32)){ + if (original_byte_size % 4) + original_byte_size = (original_byte_size & 0xfffffffc) + 4; + sq_cpy(vector->data, original_data, original_byte_size); + } else { + memcpy(vector->data, original_data, original_byte_size); + } + free(original_data); } @@ -51,6 +64,7 @@ void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned /* Copy the objects in */ memcpy(dest, objs, vector->element_size * count); + //sq_cpy(dest, objs, ((vector->element_size * count) & 0xfffffffc) + 4); return dest; } @@ -108,6 +122,8 @@ void aligned_vector_shrink_to_fit(AlignedVector* vector) { if(original_data) { memcpy(vector->data, original_data, new_byte_size); + //sq_cpy(vector->data, original_data, ((new_byte_size) & 0xfffffffc) + 4); + free(original_data); } diff --git a/containers/named_array.c b/containers/named_array.c index c028e96..c1185fd 100644 --- a/containers/named_array.c +++ b/containers/named_array.c @@ -27,7 +27,8 @@ void named_array_init(NamedArray* array, unsigned int element_size, unsigned int array->elements = (unsigned char*) malloc(element_size * max_elements); array->used_markers = (unsigned char*) malloc(array->marker_count); #endif - memset(array->used_markers, 0, sizeof(unsigned char) * array->marker_count); + //memset(array->used_markers, 0, sizeof(unsigned char) * array->marker_count); + sq_clr(array->used_markers, ((sizeof(unsigned char) * array->marker_count) & 0xfffffffc) + 4); } char named_array_used(NamedArray* array, unsigned int id) { @@ -47,7 +48,9 @@ void* named_array_alloc(NamedArray* array, unsigned int* new_id) { array->used_markers[i] |= (unsigned char) 1 << j; *new_id = id; unsigned char* ptr = &array->elements[id * array->element_size]; - memset(ptr, 0, array->element_size); + + //memset(ptr, 0, array->element_size); + sq_clr(ptr, (array->element_size & 0xfffffffc) + 4); return ptr; } } @@ -66,7 +69,8 @@ void* named_array_reserve(NamedArray* array, unsigned int id) { assert(named_array_used(array, id)); unsigned char* ptr = &array->elements[id * array->element_size]; - memset(ptr, 0, array->element_size); + //memset(ptr, 0, array->element_size); + sq_clr(ptr, (array->element_size & 0xfffffffc) + 4); return ptr; } diff --git a/include/glkos.h b/include/glkos.h index ba2b323..59d27a2 100644 --- a/include/glkos.h +++ b/include/glkos.h @@ -91,5 +91,9 @@ GLAPI void APIENTRY glKosSwapBuffers(); /* Pass to glTexParameteri to set the shared bank */ #define GL_SHARED_TEXTURE_BANK_KOS 0xEF00 +GLAPI void APIENTRY glKosReserveOPList(unsigned int elements); +GLAPI void APIENTRY glKosReservePTList(unsigned int elements); +GLAPI void APIENTRY glKosReserveTRList(unsigned int elements); + __END_DECLS