Implement defragmenting the memory

This commit is contained in:
Luke Benstead 2023-09-08 17:49:46 +01:00
parent db9e1cd424
commit 3308a57e59
5 changed files with 170 additions and 97 deletions

View File

@ -6,7 +6,7 @@
#include "alloc.h" #include "alloc.h"
/* This allocator is designed so that all allocations larger /* This allocator is designed so that ideally all allocations larger
* than 2k, fall on a 2k boundary. Smaller allocations will * than 2k, fall on a 2k boundary. Smaller allocations will
* never cross a 2k boundary. * never cross a 2k boundary.
* *
@ -15,9 +15,9 @@
* blocks anyway as they have to be 2k aligned (so you'd need to * blocks anyway as they have to be 2k aligned (so you'd need to
* store them in reverse or something) * store them in reverse or something)
* *
* Defragmenting the pool will move allocations less than 2k * Defragmenting the pool will move larger allocations first, then
* first, and then shift any full 2k blocks to the start of the * smaller ones, recursively until you tell it to stop, or until things
* address space. * stop moving.
* *
* The maximum pool size is 8M, made up of: * The maximum pool size is 8M, made up of:
* *
@ -41,10 +41,7 @@
* *
* FIXME: * FIXME:
* *
* - Allocations < 2048 can still cross boundaries
* - Only operates on one pool (ignores what you pass) * - Only operates on one pool (ignores what you pass)
* - If there are no 2048 aligned blocks, we should fall-back to unaligned
* - Defrag not implemented!
*/ */
#include <assert.h> #include <assert.h>
@ -90,7 +87,6 @@ typedef struct {
size_t pool_size; // Size of the memory pool size_t pool_size; // Size of the memory pool
uint8_t* base_address; // First 2k aligned address in the pool uint8_t* base_address; // First 2k aligned address in the pool
size_t block_count; // Number of 2k blocks in the pool size_t block_count; // Number of 2k blocks in the pool
bool defrag_in_progress;
/* It's frustrating that we need to do this dynamically /* It's frustrating that we need to do this dynamically
* but we need to know the size allocated when we free()... * but we need to know the size allocated when we free()...
@ -104,7 +100,7 @@ typedef struct {
static PoolHeader pool_header = { static PoolHeader pool_header = {
{0}, NULL, 0, NULL, 0, false, NULL {0}, NULL, 0, NULL, 0, NULL
}; };
void* alloc_base_address(void* pool) { void* alloc_base_address(void* pool) {
@ -147,8 +143,6 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su
uint32_t required_subblocks = (required_size / 256); uint32_t required_subblocks = (required_size / 256);
if(required_size % 256) required_subblocks += 1; if(required_size % 256) required_subblocks += 1;
// uint8_t* end = pool_header.block_usage + pool_header.block_count;
/* Anything gte to 2048 must be aligned to a 2048 boundary */ /* Anything gte to 2048 must be aligned to a 2048 boundary */
bool requires_alignment = required_size >= 2048; bool requires_alignment = required_size >= 2048;
@ -160,7 +154,6 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su
* but it's not aligned, or it's straddling a 2k boundary, then we store * but it's not aligned, or it's straddling a 2k boundary, then we store
* it here and if we reach the end of the search and find nothing better * it here and if we reach the end of the search and find nothing better
* we use this instead */ * we use this instead */
uint8_t* poor_option = NULL; uint8_t* poor_option = NULL;
size_t poor_start_subblock = 0; size_t poor_start_subblock = 0;
@ -180,7 +173,16 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su
/* Now let's see how many consecutive blocks we can find */ /* Now let's see how many consecutive blocks we can find */
for(int i = 0; i < 8; ++i) { for(int i = 0; i < 8; ++i) {
if((t & 0x80) == 0) { if((t & 0x80) == 0) {
if(requires_alignment && found_subblocks == 0 && i != 0) { bool block_overflow = (
required_size < 2048 && found_subblocks > 0 && i == 0
);
bool reset_subblocks = (
(requires_alignment && found_subblocks == 0 && i != 0) ||
block_overflow
);
if(reset_subblocks) {
// Ignore this subblock, because we want the first subblock to be aligned // Ignore this subblock, because we want the first subblock to be aligned
// at a 2048 boundary and this one isn't (i != 0) // at a 2048 boundary and this one isn't (i != 0)
found_subblocks = 0; found_subblocks = 0;
@ -188,6 +190,12 @@ void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_su
found_subblocks++; found_subblocks++;
} }
/* If we reset the subblocks due to an overflow, we still
* want to count this free subblock in our count */
if(block_overflow) {
found_subblocks++;
}
found_poor_subblocks++; found_poor_subblocks++;
if(found_subblocks >= required_subblocks) { if(found_subblocks >= required_subblocks) {
@ -234,7 +242,7 @@ int alloc_init(void* pool, size_t size) {
uint8_t* p = (uint8_t*) pool; uint8_t* p = (uint8_t*) pool;
memset(pool_header.block_usage, 0, sizeof(pool_header.block_usage)); memset(pool_header.block_usage, 0, BLOCK_COUNT);
pool_header.pool = pool; pool_header.pool = pool;
pool_header.pool_size = size; pool_header.pool_size = size;
@ -253,6 +261,10 @@ int alloc_init(void* pool, size_t size) {
void alloc_shutdown(void* pool) { void alloc_shutdown(void* pool) {
(void) pool; (void) pool;
if(!pool_header.pool) {
return;
}
struct AllocEntry* it = pool_header.allocations; struct AllocEntry* it = pool_header.allocations;
while(it) { while(it) {
struct AllocEntry* next = it->next; struct AllocEntry* next = it->next;
@ -261,6 +273,7 @@ void alloc_shutdown(void* pool) {
} }
memset(&pool_header, 0, sizeof(pool_header)); memset(&pool_header, 0, sizeof(pool_header));
pool_header.pool = NULL;
} }
static inline uint32_t size_to_subblock_count(size_t size) { static inline uint32_t size_to_subblock_count(size_t size) {
@ -285,10 +298,6 @@ void* alloc_malloc(void* pool, size_t size) {
size_t start_subblock, required_subblocks; size_t start_subblock, required_subblocks;
void* ret = alloc_next_available_ex(pool, size, &start_subblock, &required_subblocks); void* ret = alloc_next_available_ex(pool, size, &start_subblock, &required_subblocks);
if(size >= 2048) {
assert(((uintptr_t) ret) % 2048 == 0);
}
if(ret) { if(ret) {
size_t block; size_t block;
uint8_t offset; uint8_t offset;
@ -367,6 +376,45 @@ void* alloc_malloc(void* pool, size_t size) {
return ret; return ret;
} }
static void alloc_release_blocks(struct AllocEntry* it) {
size_t used_subblocks = size_to_subblock_count(it->size);
size_t subblock = subblock_from_pointer(it->pointer);
size_t block;
uint8_t offset;
block_and_offset_from_subblock(subblock, &block, &offset);
uint8_t mask = 0;
DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
/* Wipe out any leading subblocks */
int c = (used_subblocks < 8) ? used_subblocks : 8;
for(int i = 0; i < c; ++i) {
mask |= (1 << (7 - (offset + i)));
used_subblocks--;
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
/* Clear any full blocks in the middle of the allocation */
while(used_subblocks > 8) {
pool_header.block_usage[block++] = 0;
used_subblocks -= 8;
}
/* Wipe out any trailing subblocks */
mask = 0;
for(size_t i = 0; i < used_subblocks; ++i) {
mask |= (1 << (7 - i));
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
}
void alloc_free(void* pool, void* p) { void alloc_free(void* pool, void* p) {
(void) pool; (void) pool;
@ -374,42 +422,7 @@ void alloc_free(void* pool, void* p) {
struct AllocEntry* last = NULL; struct AllocEntry* last = NULL;
while(it) { while(it) {
if(it->pointer == p) { if(it->pointer == p) {
size_t used_subblocks = size_to_subblock_count(it->size); alloc_release_blocks(it);
size_t subblock = subblock_from_pointer(p);
size_t block;
uint8_t offset;
block_and_offset_from_subblock(subblock, &block, &offset);
uint8_t mask = 0;
DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
/* Wipe out any leading subblocks */
int c = (used_subblocks < 8) ? used_subblocks : 8;
for(int i = 0; i < c; ++i) {
mask |= (1 << (7 - (offset + i)));
used_subblocks--;
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
/* Clear any full blocks in the middle of the allocation */
while(used_subblocks > 8) {
pool_header.block_usage[block++] = 0;
used_subblocks -= 8;
}
/* Wipe out any trailing subblocks */
mask = 0;
for(size_t i = 0; i < used_subblocks; ++i) {
mask |= (1 << (7 - i));
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
if(last) { if(last) {
last->next = it->next; last->next = it->next;
@ -430,24 +443,40 @@ void alloc_free(void* pool, void* p) {
DBG_MSG("Free done\n"); DBG_MSG("Free done\n");
} }
void alloc_defrag_start(void* pool) { void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data) {
(void) pool;
pool_header.defrag_in_progress = true;
}
void* alloc_defrag_address(void* pool, void* p) { for(int i = 0; i < max_iterations; ++i) {
(void) pool; bool move_occurred = false;
return p;
}
void alloc_defrag_commit(void* pool) { struct AllocEntry* it = pool_header.allocations;
(void) pool;
pool_header.defrag_in_progress = false;
}
bool alloc_defrag_in_progress(void* pool) { if(!it) {
(void) pool; return;
return pool_header.defrag_in_progress; }
while(it) {
void* potential_dest = alloc_next_available(pool, it->size);
if(potential_dest < it->pointer) {
potential_dest = alloc_malloc(pool, it->size);
memcpy(potential_dest, it->pointer, it->size);
/* Mark this block as now free, but don't fiddle with the
* allocation list */
alloc_release_blocks(it);
callback(it->pointer, potential_dest, user_data);
it->pointer = potential_dest;
move_occurred = true;
}
it = it->next;
}
if(!move_occurred) {
return;
}
}
} }
static inline uint8_t count_ones(uint8_t byte) { static inline uint8_t count_ones(uint8_t byte) {
@ -459,6 +488,8 @@ static inline uint8_t count_ones(uint8_t byte) {
} }
size_t alloc_count_free(void* pool) { size_t alloc_count_free(void* pool) {
(void) pool;
uint8_t* it = pool_header.block_usage; uint8_t* it = pool_header.block_usage;
uint8_t* end = it + pool_header.block_count; uint8_t* end = it + pool_header.block_count;

View File

@ -14,10 +14,8 @@ void alloc_shutdown(void* pool);
void *alloc_malloc(void* pool, size_t size); void *alloc_malloc(void* pool, size_t size);
void alloc_free(void* pool, void* p); void alloc_free(void* pool, void* p);
void alloc_defrag_start(void* pool); typedef void (defrag_address_move)(void*, void*, void*);
void* alloc_defrag_address(void* pool, void* p); void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data);
void alloc_defrag_commit(void* pool);
bool alloc_defrag_in_progress(void* pool);
size_t alloc_count_free(void* pool); size_t alloc_count_free(void* pool);
size_t alloc_count_continuous(void* pool); size_t alloc_count_continuous(void* pool);

View File

@ -78,8 +78,8 @@ static const unsigned short MortonTable256[256] =
/* Given a 0-based texel location, and an image width/height. Return the /* Given a 0-based texel location, and an image width/height. Return the
* new 0-based texel location */ * new 0-based texel location */
GL_FORCE_INLINE uint32_t twid_location(uint32_t i, uint32_t w, uint32_t h) { GL_FORCE_INLINE uint32_t twid_location(uint32_t i, uint32_t w, uint32_t h) {
uint16_t y = i % w; uint16_t y = i / w;
uint16_t x = i / w; uint16_t x = i % w;
return MortonTable256[y >> 8] << 17 | return MortonTable256[y >> 8] << 17 |
MortonTable256[x >> 8] << 16 | MortonTable256[x >> 8] << 16 |
@ -1611,14 +1611,21 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
for(uint32_t i = 0; i < (width * height); ++i) { for(uint32_t i = 0; i < (width * height); ++i) {
uint32_t newLocation = twid_location(i, width, height); uint32_t newLocation = twid_location(i, width, height);
assert(newLocation < (width * height));
assert((newLocation / 2) < destBytes);
assert((i / 2) < srcBytes);
// This is the src/dest byte, but we need to figure // This is the src/dest byte, but we need to figure
// out which half based on the odd/even of i // out which half based on the odd/even of i
src = &((uint8_t*) data)[i / 2]; src = &((uint8_t*) data)[i / 2];
dst = &conversionBuffer[newLocation / 2]; dst = &conversionBuffer[newLocation / 2];
if(i % 2 == 0) {
*dst = (*dst & 0xF) | (*src & 0xF0); uint8_t src_value = (i % 2) == 0 ? (*src >> 4) : (*src & 0xF);
if(newLocation % 2 == 0) {
*dst = (*dst & 0xF) | (src_value << 4);
} else { } else {
*dst = (*dst & 0xF0) | (*src & 0xF); *dst = (*dst & 0xF0) | (src_value & 0xF);
} }
} }
} else { } else {
@ -2044,21 +2051,20 @@ GLuint _glFreeContiguousTextureMemory() {
return alloc_count_continuous(ALLOC_BASE); return alloc_count_continuous(ALLOC_BASE);
} }
GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void) { static void update_data_pointer(void* src, void* dst, void*) {
alloc_defrag_start(ALLOC_BASE); for(size_t id = 0; id < MAX_TEXTURE_COUNT; id++){
GLuint id;
/* Replace all texture pointers */
for(id = 0; id < MAX_TEXTURE_COUNT; id++){
TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, id); TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, id);
if(txr){ if(txr && txr->data == src) {
fprintf(stderr, "Defrag moved 0x%x -> 0x%x\n", src, dst);
gl_assert(txr->index == id); gl_assert(txr->index == id);
txr->data = alloc_defrag_address(ALLOC_BASE, txr->data); txr->data = dst;
return;
} }
} }
}
alloc_defrag_commit(ALLOC_BASE); GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void) {
alloc_run_defrag(ALLOC_BASE, update_data_pointer, 5, NULL);
} }
GLAPI void APIENTRY glGetTexImage(GLenum tex, GLint lod, GLenum format, GLenum type, GLvoid* img) { GLAPI void APIENTRY glGetTexImage(GLenum tex, GLint lod, GLenum format, GLenum type, GLvoid* img) {

View File

@ -1,7 +1,9 @@
#include "tools/test.h" #include "tools/test.h"
#include <stdint.h> #include <cstdint>
#include <assert.h> #include <cassert>
#include <malloc.h>
#include <utility>
#include <GL/gl.h> #include <GL/gl.h>
#include <GL/glkos.h> #include <GL/glkos.h>
@ -14,22 +16,52 @@ static inline int round_up(int n, int multiple)
return ((n + multiple - 1) / multiple) * multiple; return ((n + multiple - 1) / multiple) * multiple;
} }
#define POOL_SIZE (16 * 2048)
class AllocatorTests : public test::TestCase { class AllocatorTests : public test::TestCase {
public: public:
uint8_t __attribute__((aligned(2048))) pool[16 * 2048]; uint8_t* pool = NULL;
std::vector<std::pair<void*, void*>> defrag_moves;
void set_up() { void set_up() {
pool = (uint8_t*) memalign(2048, POOL_SIZE);
assert(((intptr_t) pool) % 2048 == 0); assert(((intptr_t) pool) % 2048 == 0);
} }
void tear_down() { void tear_down() {
alloc_shutdown(pool); alloc_shutdown(pool);
free(pool);
}
static void on_defrag(void* src, void* dst, void* user_data) {
AllocatorTests* self = (AllocatorTests*) user_data;
self->defrag_moves.push_back(std::make_pair(src, dst));
}
void test_defrag() {
alloc_init(pool, POOL_SIZE);
alloc_malloc(pool, 256);
void* a2 = alloc_malloc(pool, 256);
void* a3 = alloc_malloc(pool, 256);
alloc_free(pool, a2);
alloc_run_defrag(pool, &AllocatorTests::on_defrag, 5, this);
assert_equal(defrag_moves.size(), 1u); // Moved a3 -> a2
assert_equal(defrag_moves[0].first, a3);
assert_equal(defrag_moves[0].second, a2);
assert_equal(alloc_malloc(pool, 256), a3);
} }
void test_poor_alloc_aligned() { void test_poor_alloc_aligned() {
/* If we try to allocate and there are no suitable aligned /* If we try to allocate and there are no suitable aligned
* slots available, we fallback to any available unaligned slots */ * slots available, we fallback to any available unaligned slots */
alloc_init(pool, sizeof(pool)); alloc_init(pool, POOL_SIZE);
// Leave only space for an unaligned block // Leave only space for an unaligned block
alloc_malloc(pool, (15 * 2048) - 256); alloc_malloc(pool, (15 * 2048) - 256);
@ -44,7 +76,7 @@ public:
/* /*
* If we try to allocate a small block, it should not * If we try to allocate a small block, it should not
* cross a 2048 boundary unless there is no other option */ * cross a 2048 boundary unless there is no other option */
alloc_init(pool, sizeof(pool)); alloc_init(pool, POOL_SIZE);
alloc_malloc(pool, (15 * 2048) - 256); alloc_malloc(pool, (15 * 2048) - 256);
void* a1 = alloc_malloc(pool, 512); void* a1 = alloc_malloc(pool, 512);
assert_true((uintptr_t(a1) % 2048) == 0); // Should've aligned to the last 2048 block assert_true((uintptr_t(a1) % 2048) == 0); // Should've aligned to the last 2048 block
@ -59,14 +91,14 @@ public:
} }
void test_alloc_init() { void test_alloc_init() {
alloc_init(pool, sizeof(pool)); alloc_init(pool, POOL_SIZE);
void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048); void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048);
assert_equal(alloc_next_available(pool, 16), expected_base_address); assert_equal(alloc_next_available(pool, 16), expected_base_address);
assert_equal(alloc_base_address(pool), expected_base_address); assert_equal(alloc_base_address(pool), expected_base_address);
size_t expected_blocks = ( size_t expected_blocks = (
uintptr_t(pool + sizeof(pool)) - uintptr_t(pool + POOL_SIZE) -
uintptr_t(expected_base_address) uintptr_t(expected_base_address)
) / 2048; ) / 2048;
@ -109,7 +141,7 @@ public:
} }
void test_alloc_malloc() { void test_alloc_malloc() {
alloc_init(pool, sizeof(pool)); alloc_init(pool, POOL_SIZE);
uint8_t* base_address = (uint8_t*) alloc_base_address(pool); uint8_t* base_address = (uint8_t*) alloc_base_address(pool);
void* a1 = alloc_malloc(pool, 1024); void* a1 = alloc_malloc(pool, 1024);

View File

@ -289,7 +289,13 @@ public:
std::function<void()> func = std::bind(method, dynamic_cast<T*>(instance.get())); std::function<void()> func = std::bind(method, dynamic_cast<T*>(instance.get()));
tests_.push_back([=]() { tests_.push_back([=]() {
instance->set_up(); instance->set_up();
func(); try {
func();
} catch(...) {
instance->tear_down();
throw;
}
instance->tear_down(); instance->tear_down();
}); });
} }