From 3308a57e5948aa09bb739279e265f37367471822 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 8 Sep 2023 17:49:46 +0100 Subject: [PATCH] Implement defragmenting the memory --- GL/alloc/alloc.c | 169 ++++++++++++++++++++++++----------------- GL/alloc/alloc.h | 6 +- GL/texture.c | 36 +++++---- tests/test_allocator.h | 48 ++++++++++-- tools/test.h | 8 +- 5 files changed, 170 insertions(+), 97 deletions(-) diff --git a/GL/alloc/alloc.c b/GL/alloc/alloc.c index 6b787d6..6760ff4 100644 --- a/GL/alloc/alloc.c +++ b/GL/alloc/alloc.c @@ -6,7 +6,7 @@ #include "alloc.h" -/* This allocator is designed so that all allocations larger +/* This allocator is designed so that ideally all allocations larger * than 2k, fall on a 2k boundary. Smaller allocations will * never cross a 2k boundary. * @@ -15,9 +15,9 @@ * blocks anyway as they have to be 2k aligned (so you'd need to * store them in reverse or something) * - * Defragmenting the pool will move allocations less than 2k - * first, and then shift any full 2k blocks to the start of the - * address space. + * Defragmenting the pool will move larger allocations first, then + * smaller ones, recursively until you tell it to stop, or until things + * stop moving. * * The maximum pool size is 8M, made up of: * @@ -41,10 +41,7 @@ * * FIXME: * - * - Allocations < 2048 can still cross boundaries * - Only operates on one pool (ignores what you pass) - * - If there are no 2048 aligned blocks, we should fall-back to unaligned - * - Defrag not implemented! */ #include @@ -90,7 +87,6 @@ typedef struct { size_t pool_size; // Size of the memory pool uint8_t* base_address; // First 2k aligned address in the pool size_t block_count; // Number of 2k blocks in the pool - bool defrag_in_progress; /* It's frustrating that we need to do this dynamically * but we need to know the size allocated when we free()... @@ -104,7 +100,7 @@ typedef struct { static PoolHeader pool_header = { - {0}, NULL, 0, NULL, 0, false, NULL + {0}, NULL, 0, NULL, 0, NULL }; void* alloc_base_address(void* pool) { @@ -147,8 +143,6 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su uint32_t required_subblocks = (required_size / 256); if(required_size % 256) required_subblocks += 1; - // uint8_t* end = pool_header.block_usage + pool_header.block_count; - /* Anything gte to 2048 must be aligned to a 2048 boundary */ bool requires_alignment = required_size >= 2048; @@ -160,7 +154,6 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su * but it's not aligned, or it's straddling a 2k boundary, then we store * it here and if we reach the end of the search and find nothing better * we use this instead */ - uint8_t* poor_option = NULL; size_t poor_start_subblock = 0; @@ -180,7 +173,16 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su /* Now let's see how many consecutive blocks we can find */ for(int i = 0; i < 8; ++i) { if((t & 0x80) == 0) { - if(requires_alignment && found_subblocks == 0 && i != 0) { + bool block_overflow = ( + required_size < 2048 && found_subblocks > 0 && i == 0 + ); + + bool reset_subblocks = ( + (requires_alignment && found_subblocks == 0 && i != 0) || + block_overflow + ); + + if(reset_subblocks) { // Ignore this subblock, because we want the first subblock to be aligned // at a 2048 boundary and this one isn't (i != 0) found_subblocks = 0; @@ -188,6 +190,12 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su found_subblocks++; } + /* If we reset the subblocks due to an overflow, we still + * want to count this free subblock in our count */ + if(block_overflow) { + found_subblocks++; + } + found_poor_subblocks++; if(found_subblocks >= required_subblocks) { @@ -234,7 +242,7 @@ int alloc_init(void* pool, size_t size) { uint8_t* p = (uint8_t*) pool; - memset(pool_header.block_usage, 0, sizeof(pool_header.block_usage)); + memset(pool_header.block_usage, 0, BLOCK_COUNT); pool_header.pool = pool; pool_header.pool_size = size; @@ -253,6 +261,10 @@ int alloc_init(void* pool, size_t size) { void alloc_shutdown(void* pool) { (void) pool; + if(!pool_header.pool) { + return; + } + struct AllocEntry* it = pool_header.allocations; while(it) { struct AllocEntry* next = it->next; @@ -261,6 +273,7 @@ void alloc_shutdown(void* pool) { } memset(&pool_header, 0, sizeof(pool_header)); + pool_header.pool = NULL; } static inline uint32_t size_to_subblock_count(size_t size) { @@ -285,10 +298,6 @@ void* alloc_malloc(void* pool, size_t size) { size_t start_subblock, required_subblocks; void* ret = alloc_next_available_ex(pool, size, &start_subblock, &required_subblocks); - if(size >= 2048) { - assert(((uintptr_t) ret) % 2048 == 0); - } - if(ret) { size_t block; uint8_t offset; @@ -367,6 +376,45 @@ void* alloc_malloc(void* pool, size_t size) { return ret; } +static void alloc_release_blocks(struct AllocEntry* it) { + size_t used_subblocks = size_to_subblock_count(it->size); + size_t subblock = subblock_from_pointer(it->pointer); + size_t block; + uint8_t offset; + block_and_offset_from_subblock(subblock, &block, &offset); + + uint8_t mask = 0; + + DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset); + + /* Wipe out any leading subblocks */ + int c = (used_subblocks < 8) ? used_subblocks : 8; + for(int i = 0; i < c; ++i) { + mask |= (1 << (7 - (offset + i))); + used_subblocks--; + } + + if(mask) { + pool_header.block_usage[block++] &= ~mask; + } + + /* Clear any full blocks in the middle of the allocation */ + while(used_subblocks > 8) { + pool_header.block_usage[block++] = 0; + used_subblocks -= 8; + } + + /* Wipe out any trailing subblocks */ + mask = 0; + for(size_t i = 0; i < used_subblocks; ++i) { + mask |= (1 << (7 - i)); + } + + if(mask) { + pool_header.block_usage[block++] &= ~mask; + } +} + void alloc_free(void* pool, void* p) { (void) pool; @@ -374,42 +422,7 @@ void alloc_free(void* pool, void* p) { struct AllocEntry* last = NULL; while(it) { if(it->pointer == p) { - size_t used_subblocks = size_to_subblock_count(it->size); - size_t subblock = subblock_from_pointer(p); - size_t block; - uint8_t offset; - block_and_offset_from_subblock(subblock, &block, &offset); - - uint8_t mask = 0; - - DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset); - - /* Wipe out any leading subblocks */ - int c = (used_subblocks < 8) ? used_subblocks : 8; - for(int i = 0; i < c; ++i) { - mask |= (1 << (7 - (offset + i))); - used_subblocks--; - } - - if(mask) { - pool_header.block_usage[block++] &= ~mask; - } - - /* Clear any full blocks in the middle of the allocation */ - while(used_subblocks > 8) { - pool_header.block_usage[block++] = 0; - used_subblocks -= 8; - } - - /* Wipe out any trailing subblocks */ - mask = 0; - for(size_t i = 0; i < used_subblocks; ++i) { - mask |= (1 << (7 - i)); - } - - if(mask) { - pool_header.block_usage[block++] &= ~mask; - } + alloc_release_blocks(it); if(last) { last->next = it->next; @@ -430,24 +443,40 @@ void alloc_free(void* pool, void* p) { DBG_MSG("Free done\n"); } -void alloc_defrag_start(void* pool) { - (void) pool; - pool_header.defrag_in_progress = true; -} +void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data) { -void* alloc_defrag_address(void* pool, void* p) { - (void) pool; - return p; -} + for(int i = 0; i < max_iterations; ++i) { + bool move_occurred = false; -void alloc_defrag_commit(void* pool) { - (void) pool; - pool_header.defrag_in_progress = false; -} + struct AllocEntry* it = pool_header.allocations; -bool alloc_defrag_in_progress(void* pool) { - (void) pool; - return pool_header.defrag_in_progress; + if(!it) { + return; + } + + while(it) { + void* potential_dest = alloc_next_available(pool, it->size); + if(potential_dest < it->pointer) { + potential_dest = alloc_malloc(pool, it->size); + memcpy(potential_dest, it->pointer, it->size); + + /* Mark this block as now free, but don't fiddle with the + * allocation list */ + alloc_release_blocks(it); + + callback(it->pointer, potential_dest, user_data); + + it->pointer = potential_dest; + move_occurred = true; + } + + it = it->next; + } + + if(!move_occurred) { + return; + } + } } static inline uint8_t count_ones(uint8_t byte) { @@ -459,6 +488,8 @@ static inline uint8_t count_ones(uint8_t byte) { } size_t alloc_count_free(void* pool) { + (void) pool; + uint8_t* it = pool_header.block_usage; uint8_t* end = it + pool_header.block_count; diff --git a/GL/alloc/alloc.h b/GL/alloc/alloc.h index d69bb9f..49400f9 100644 --- a/GL/alloc/alloc.h +++ b/GL/alloc/alloc.h @@ -14,10 +14,8 @@ void alloc_shutdown(void* pool); void *alloc_malloc(void* pool, size_t size); void alloc_free(void* pool, void* p); -void alloc_defrag_start(void* pool); -void* alloc_defrag_address(void* pool, void* p); -void alloc_defrag_commit(void* pool); -bool alloc_defrag_in_progress(void* pool); +typedef void (defrag_address_move)(void*, void*, void*); +void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data); size_t alloc_count_free(void* pool); size_t alloc_count_continuous(void* pool); diff --git a/GL/texture.c b/GL/texture.c index 5fcf473..4871e12 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -78,8 +78,8 @@ static const unsigned short MortonTable256[256] = /* Given a 0-based texel location, and an image width/height. Return the * new 0-based texel location */ GL_FORCE_INLINE uint32_t twid_location(uint32_t i, uint32_t w, uint32_t h) { - uint16_t y = i % w; - uint16_t x = i / w; + uint16_t y = i / w; + uint16_t x = i % w; return MortonTable256[y >> 8] << 17 | MortonTable256[x >> 8] << 16 | @@ -1611,14 +1611,21 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, for(uint32_t i = 0; i < (width * height); ++i) { uint32_t newLocation = twid_location(i, width, height); + assert(newLocation < (width * height)); + assert((newLocation / 2) < destBytes); + assert((i / 2) < srcBytes); + // This is the src/dest byte, but we need to figure // out which half based on the odd/even of i src = &((uint8_t*) data)[i / 2]; dst = &conversionBuffer[newLocation / 2]; - if(i % 2 == 0) { - *dst = (*dst & 0xF) | (*src & 0xF0); + + uint8_t src_value = (i % 2) == 0 ? (*src >> 4) : (*src & 0xF); + + if(newLocation % 2 == 0) { + *dst = (*dst & 0xF) | (src_value << 4); } else { - *dst = (*dst & 0xF0) | (*src & 0xF); + *dst = (*dst & 0xF0) | (src_value & 0xF); } } } else { @@ -2044,21 +2051,20 @@ GLuint _glFreeContiguousTextureMemory() { return alloc_count_continuous(ALLOC_BASE); } -GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void) { - alloc_defrag_start(ALLOC_BASE); - - GLuint id; - - /* Replace all texture pointers */ - for(id = 0; id < MAX_TEXTURE_COUNT; id++){ +static void update_data_pointer(void* src, void* dst, void*) { + for(size_t id = 0; id < MAX_TEXTURE_COUNT; id++){ TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, id); - if(txr){ + if(txr && txr->data == src) { + fprintf(stderr, "Defrag moved 0x%x -> 0x%x\n", src, dst); gl_assert(txr->index == id); - txr->data = alloc_defrag_address(ALLOC_BASE, txr->data); + txr->data = dst; + return; } } +} - alloc_defrag_commit(ALLOC_BASE); +GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void) { + alloc_run_defrag(ALLOC_BASE, update_data_pointer, 5, NULL); } GLAPI void APIENTRY glGetTexImage(GLenum tex, GLint lod, GLenum format, GLenum type, GLvoid* img) { diff --git a/tests/test_allocator.h b/tests/test_allocator.h index 5117f52..5967983 100644 --- a/tests/test_allocator.h +++ b/tests/test_allocator.h @@ -1,7 +1,9 @@ #include "tools/test.h" -#include -#include +#include +#include +#include +#include #include #include @@ -14,22 +16,52 @@ static inline int round_up(int n, int multiple) return ((n + multiple - 1) / multiple) * multiple; } +#define POOL_SIZE (16 * 2048) + class AllocatorTests : public test::TestCase { public: - uint8_t __attribute__((aligned(2048))) pool[16 * 2048]; + uint8_t* pool = NULL; + + std::vector> defrag_moves; void set_up() { + pool = (uint8_t*) memalign(2048, POOL_SIZE); assert(((intptr_t) pool) % 2048 == 0); } void tear_down() { alloc_shutdown(pool); + free(pool); + } + + static void on_defrag(void* src, void* dst, void* user_data) { + AllocatorTests* self = (AllocatorTests*) user_data; + self->defrag_moves.push_back(std::make_pair(src, dst)); + } + + void test_defrag() { + alloc_init(pool, POOL_SIZE); + + alloc_malloc(pool, 256); + void* a2 = alloc_malloc(pool, 256); + void* a3 = alloc_malloc(pool, 256); + + alloc_free(pool, a2); + + alloc_run_defrag(pool, &AllocatorTests::on_defrag, 5, this); + + assert_equal(defrag_moves.size(), 1u); // Moved a3 -> a2 + + assert_equal(defrag_moves[0].first, a3); + assert_equal(defrag_moves[0].second, a2); + + assert_equal(alloc_malloc(pool, 256), a3); } void test_poor_alloc_aligned() { /* If we try to allocate and there are no suitable aligned * slots available, we fallback to any available unaligned slots */ - alloc_init(pool, sizeof(pool)); + alloc_init(pool, POOL_SIZE); // Leave only space for an unaligned block alloc_malloc(pool, (15 * 2048) - 256); @@ -44,7 +76,7 @@ public: /* * If we try to allocate a small block, it should not * cross a 2048 boundary unless there is no other option */ - alloc_init(pool, sizeof(pool)); + alloc_init(pool, POOL_SIZE); alloc_malloc(pool, (15 * 2048) - 256); void* a1 = alloc_malloc(pool, 512); assert_true((uintptr_t(a1) % 2048) == 0); // Should've aligned to the last 2048 block @@ -59,14 +91,14 @@ public: } void test_alloc_init() { - alloc_init(pool, sizeof(pool)); + alloc_init(pool, POOL_SIZE); void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048); assert_equal(alloc_next_available(pool, 16), expected_base_address); assert_equal(alloc_base_address(pool), expected_base_address); size_t expected_blocks = ( - uintptr_t(pool + sizeof(pool)) - + uintptr_t(pool + POOL_SIZE) - uintptr_t(expected_base_address) ) / 2048; @@ -109,7 +141,7 @@ public: } void test_alloc_malloc() { - alloc_init(pool, sizeof(pool)); + alloc_init(pool, POOL_SIZE); uint8_t* base_address = (uint8_t*) alloc_base_address(pool); void* a1 = alloc_malloc(pool, 1024); diff --git a/tools/test.h b/tools/test.h index 6341a36..b4ef409 100644 --- a/tools/test.h +++ b/tools/test.h @@ -289,7 +289,13 @@ public: std::function func = std::bind(method, dynamic_cast(instance.get())); tests_.push_back([=]() { instance->set_up(); - func(); + try { + func(); + } catch(...) { + instance->tear_down(); + throw; + } + instance->tear_down(); }); }