Merge commit 'f49a98ab543b1be0049e07456fb23022435ba450' into texture-refactor

2023-08-31 20:49:42 +01:00 · 2023-08-31 20:49:42 +01:00 · fd9a9d1c25
commit fd9a9d1c25
parent 77531ca347 f49a98ab54
5 changed files with 489 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -81,6 +81,7 @@ set(
    GL/texture.c
    GL/util.c
    GL/yalloc/yalloc.c
+    GL/alloc/alloc.c
    ${CMAKE_CURRENT_BINARY_DIR}/version.c
 )

--- a/GL/alloc/alloc.c
+++ b/GL/alloc/alloc.c
@ -0,0 +1,370 @@
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "alloc.h"
+
+
+/* This allocator is designed so that all allocations larger
+ * than 2k, fall on a 2k boundary. Smaller allocations will
+ * never cross a 2k boundary.
+ *
+ * House keeping is stored in RAM to avoid reading back from the
+ * VRAM to check for usage. Headers can't be easily stored in the
+ * blocks anyway as they have to be 2k aligned (so you'd need to
+ * store them in reverse or something)
+ *
+ * Defragmenting the pool will move allocations less than 2k
+ * first, and then shift any full 2k blocks to the start of the
+ * address space.
+ *
+ * The maximum pool size is 8M, made up of:
+ *
+ * - 4096 blocks of 2k
+ * - each with 8 sub-blocks of 256 bytes
+ *
+ * Why?
+ *
+ * The PVR performs better if textures don't cross 2K memory
+ * addresses, so we try to avoid that. Obviously we can't
+ * if the allocation is > 2k, but in that case we can at least
+ * align with 2k and the VQ codebook (which is usually 2k) will
+ * be in its own page.
+ *
+ * The smallest PVR texture allowed is 8x8 at 16 bit (so 128 bytes)
+ * but we're unlikely to use too many of those, so having a min sub-block
+ * size of 256 should be OK (a 16x16 image is 512, so two sub-blocks).
+ *
+ * We could go down to 128 bytes if wastage is an issue, but then we have
+ * to store double the number of usage markers.
+ *
+ * FIXME:
+ *
+ *  - Allocations < 2048 can still cross boundaries
+ */
+
+#include <assert.h>
+
+#define EIGHT_MEG (8 * 1024 * 1024)
+#define TWO_KILOBYTES (2 * 1024)
+#define BLOCK_COUNT (EIGHT_MEG / TWO_KILOBYTES)
+
+static inline int round_up(int n, int multiple)
+{
+    assert(multiple);
+    return ((n + multiple - 1) / multiple) * multiple;
+}
+
+struct AllocEntry {
+    void* pointer;
+    size_t size;
+    struct AllocEntry* next;
+};
+
+
+typedef struct {
+    /* This is a usage bitmask for each block. A block
+     * is divided into 8 x 256 byte subblocks. If a block
+     * is entirely used, it's value will be 255, if
+     * it's entirely free then it will be 0.
+     */
+    uint8_t block_usage[BLOCK_COUNT];
+    uint8_t* pool;  // Pointer to the memory pool
+    size_t pool_size; // Size of the memory pool
+    uint8_t* base_address; // First 2k aligned address in the pool
+    size_t block_count;  // Number of 2k blocks in the pool
+
+    /* It's frustrating that we need to do this dynamically
+     * but we need to know the size allocated when we free()...
+     * we could store it statically but it would take 64k if we had
+     * an array of block_index -> block size where there would be 2 ** 32
+     * entries of 16 bit block sizes. The drawback (aside the memory usage)
+     * would be that we won't be able to order by size, so defragging will
+     * take much more time.*/
+    struct AllocEntry* allocations;
+} PoolHeader;
+
+
+static PoolHeader pool_header = {
+    {0}, NULL, 0, NULL, 0, NULL
+};
+
+void* alloc_base_address(void* pool) {
+    (void) pool;
+    return pool_header.base_address;
+}
+
+size_t alloc_block_count(void* pool) {
+    (void) pool;
+    return pool_header.block_count;
+}
+
+void* alloc_next_available(void* pool, size_t required_size) {
+    uint8_t* it = pool_header.block_usage;
+    uint32_t required_subblocks = (required_size / 256);
+    if(required_size % 256) required_subblocks += 1;
+
+    uint8_t* end = pool_header.block_usage + pool_header.block_count;
+
+    while(it < end) {
+        // Skip full blocks
+        while((*it) == 255) {
+            ++it;
+            if(it >= pool_header.block_usage + sizeof(pool_header.block_usage)) {
+                return NULL;
+            }
+            continue;
+        }
+
+        uint32_t found_subblocks = 0;
+
+        /* Anything gte to 2048 must be aligned to a 2048 boundary */
+        bool requires_alignment = required_size >= 2048;
+
+        /* We just need to find enough consecutive blocks */
+        while(found_subblocks < required_subblocks) {
+            uint8_t t = *it;
+
+            /* Optimisation only. Skip over full blocks */
+            if(t == 255) {
+                ++it;
+                found_subblocks = 0;
+
+                if(it >= end) {
+                    return NULL;
+                }
+
+                continue;
+            }
+
+            /* Now let's see how many consecutive blocks we can find */
+            for(int i = 0; i < 8; ++i) {
+                if((t & 0x80) == 0) {
+                    if(requires_alignment && found_subblocks == 0 && i != 0) {
+                        // Ignore this subblock, because we want the first subblock to be aligned
+                        // at a 2048 boundary and this one isn't (i != 0)
+                        found_subblocks = 0;
+                    } else {
+                        found_subblocks++;
+                        if(found_subblocks >= required_subblocks) {
+                            /* We found space! Now calculate the address */
+                            uintptr_t offset = (it - pool_header.block_usage) * 8;
+                            offset += (i + 1);
+                            offset -= required_subblocks;
+                            return pool_header.base_address + (offset * 256);
+                        }
+                    }
+                } else {
+                    found_subblocks = 0;
+                }
+
+                t <<= 1;
+            }
+
+            ++it;
+            if(it >= end) {
+                return NULL;
+            }
+        }
+
+    }
+
+    return NULL;
+}
+
+int alloc_init(void* pool, size_t size) {
+    (void) pool;
+
+    if(pool_header.pool) {
+        return -1;
+    }
+
+    if(size > EIGHT_MEG) {  // FIXME: >= ?
+        return -1;
+    }
+
+    uint8_t* p = (uint8_t*) pool;
+
+    memset(pool_header.block_usage, 0, sizeof(pool_header.block_usage));
+    pool_header.pool = pool;
+    pool_header.pool_size = size;
+    pool_header.base_address = (uint8_t*) round_up((uintptr_t) pool_header.pool, 2048);
+    pool_header.block_count = ((p + size) - pool_header.base_address) / 2048;
+    pool_header.allocations = NULL;
+
+    assert(((uintptr_t) pool_header.base_address) % 2048 == 0);
+
+    return 0;
+}
+
+void alloc_shutdown(void* pool) {
+    (void) pool;
+
+    struct AllocEntry* it = pool_header.allocations;
+    while(it) {
+        struct AllocEntry* next = it->next;
+        free(it);
+        it = next;
+    }
+
+    memset(&pool_header, 0, sizeof(pool_header));
+}
+
+static inline uint32_t size_to_subblock_count(size_t size) {
+    uint32_t required_subblocks = (size / 256);
+    if(size % 256) required_subblocks += 1;
+    return required_subblocks;
+}
+
+static inline uint32_t subblock_from_pointer(void* p) {
+    uint8_t* ptr = (uint8_t*) p;
+    return (ptr - pool_header.base_address) / 256;
+}
+
+void* alloc_malloc(void* pool, size_t size) {
+    void* ret = alloc_next_available(pool, size);
+    if(size >= 2048) {
+        assert(((uintptr_t) ret) % 2048 == 0);
+    }
+
+    if(ret) {
+        uintptr_t start_subblock = subblock_from_pointer(ret);
+        uint32_t required_subblocks = size_to_subblock_count(size);
+        size_t offset = start_subblock % 8;
+        size_t block = start_subblock / 8;
+        uint8_t mask = 0;
+
+        /* Toggle any bits for the first block */
+        for(int i = offset - 1; i >= 0; --i) {
+            mask |= (1 << i);
+            required_subblocks--;
+        }
+
+        if(mask) {
+            pool_header.block_usage[block++] |= mask;
+        }
+
+        /* Fill any full blocks in the middle of the allocation */
+        while(required_subblocks > 8) {
+            pool_header.block_usage[block++] = 255;
+            required_subblocks -= 8;
+        }
+
+        /* Fill out any trailing subblocks */
+        mask = 0;
+        for(size_t i = 0; i < required_subblocks; ++i) {
+            mask |= (1 << (7 - i));
+        }
+
+        if(mask) {
+            pool_header.block_usage[block++] |= mask;
+        }
+
+
+        /* Insert allocations in the list by size descending so that when we
+         * defrag we can move the larger blocks before the smaller ones without
+         * much effort */
+        struct AllocEntry* new_entry = (struct AllocEntry*) malloc(sizeof(struct AllocEntry));
+        new_entry->pointer = ret;
+        new_entry->size = size;
+        new_entry->next = NULL;
+
+        struct AllocEntry* it = pool_header.allocations;
+        struct AllocEntry* last = NULL;
+
+        if(!it) {
+            pool_header.allocations = new_entry;
+        } else {
+            while(it) {
+                if(it->size < size) {
+                    if(last) {
+                        last->next = new_entry;
+                    } else {
+                        pool_header.allocations = new_entry;
+                    }
+
+                    new_entry->next = it;
+                    break;
+                } else if(!it->next) {
+                    it->next = new_entry;
+                    new_entry->next = NULL;
+                    break;
+                }
+
+                last = it;
+                it = it->next;
+            }
+        }
+    }
+
+    return ret;
+}
+
+void alloc_free(void* pool, void* p) {
+    struct AllocEntry* it = pool_header.allocations;
+    struct AllocEntry* last = NULL;
+    while(it) {
+        if(it->pointer == p) {
+            size_t used_subblocks = size_to_subblock_count(it->size);
+            size_t subblock = subblock_from_pointer(p);
+            size_t block = subblock / 8;
+            size_t offset = subblock % 8;
+            uint8_t mask = 0;
+
+            /* Wipe out any leading subblocks */
+            for(int i = offset; i > 0; --i) {
+                mask |= (1 << i);
+                used_subblocks--;
+            }
+
+            if(mask) {
+                pool_header.block_usage[block++] &= ~mask;
+            }
+
+            /* Clear any full blocks in the middle of the allocation */
+            while(used_subblocks > 8) {
+                pool_header.block_usage[block++] = 0;
+                used_subblocks -= 8;
+            }
+
+            /* Wipe out any trailing subblocks */
+            mask = 0;
+            for(size_t i = 0; i < used_subblocks; ++i) {
+                mask |= (1 << (7 - i));
+            }
+
+            if(mask) {
+                pool_header.block_usage[block++] &= ~mask;
+            }
+
+            if(last) {
+                last->next = it->next;
+            } else {
+                assert(it == pool_header.allocations);
+                pool_header.allocations = it->next;
+            }
+
+            free(it);
+            break;
+        }
+
+        last = it;
+        it = it->next;
+    }
+}
+
+void alloc_defrag_start(void* pool) {
+
+}
+
+void* alloc_defrag_address(void* pool, void* p) {
+
+}
+
+void alloc_defrag_commit(void* pool) {
+
+}
+
+bool alloc_defrag_in_progress(void* pool) {
+
+}
--- a/GL/alloc/alloc.h
+++ b/GL/alloc/alloc.h
@ -0,0 +1,28 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int alloc_init(void* pool, size_t size);
+void alloc_shutdown(void* pool);
+
+void *alloc_malloc(void* pool, size_t size);
+void alloc_free(void* pool, void* p);
+
+void alloc_defrag_start(void* pool);
+void* alloc_defrag_address(void* pool, void* p);
+void alloc_defrag_commit(void* pool);
+bool alloc_defrag_in_progress(void* pool);
+
+void* alloc_next_available(void* pool, size_t required_size);
+void* alloc_base_address(void* pool);
+size_t alloc_block_count(void* pool);
+
+#ifdef __cplusplus
+}
+#endif
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -16,7 +16,7 @@ ADD_CUSTOM_COMMAND(
 add_executable(gldc_tests ${TEST_FILES} ${TEST_SOURCES} ${TEST_MAIN_FILENAME})
 target_link_libraries(gldc_tests GLdc)

-if(!PLATFORM_DREAMCAST)
+if(NOT PLATFORM_DREAMCAST)
 set_target_properties(
    gldc_tests
    PROPERTIES
--- a/tests/test_allocator.h
+++ b/tests/test_allocator.h
@ -0,0 +1,89 @@
+#include "tools/test.h"
+
+#include <stdint.h>
+#include <assert.h>
+
+#include <GL/gl.h>
+#include <GL/glkos.h>
+
+#include "GL/alloc/alloc.h"
+
+static inline int round_up(int n, int multiple)
+{
+    assert(multiple);
+    return ((n + multiple - 1) / multiple) * multiple;
+}
+
+class AllocatorTests : public test::TestCase {
+public:
+    uint8_t pool[16 * 2048];
+
+    void set_up() {
+    }
+
+    void tear_down() {
+        alloc_shutdown(pool);
+    }
+    
+    void test_alloc_init() {
+        alloc_init(pool, sizeof(pool));
+
+        void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048);
+        assert_equal(alloc_next_available(pool, 16), expected_base_address);
+        assert_equal(alloc_base_address(pool), expected_base_address);
+
+        int expected_blocks = (
+            uintptr_t(pool + sizeof(pool)) -
+            uintptr_t(expected_base_address)
+        ) / 2048;
+
+        assert_equal(alloc_block_count(pool), expected_blocks);
+    }
+
+    void test_alloc_malloc() {
+        alloc_init(pool, sizeof(pool));
+
+        void* base_address = alloc_base_address(pool);
+        void* a1 = alloc_malloc(pool, 1024);
+
+        /* First alloc should always be the base address */
+        assert_equal(a1, base_address);
+
+        /* An allocation of <= 2048 (well 1024) will not necessarily be at
+         * a 2k boundary */
+        void* expected_next_available = base_address + uintptr_t(1024);
+        assert_equal(alloc_next_available(pool, 1024), expected_next_available);
+
+        /* Requesting 2k though will force to a 2k boundary */
+        expected_next_available = base_address + uintptr_t(2048);
+        assert_equal(alloc_next_available(pool, 2048), expected_next_available);
+
+        /* Now alloc 2048 bytes, this should be on the 2k boundary */
+        void* a2 = alloc_malloc(pool, 2048);
+        assert_equal(a2, expected_next_available);
+
+        /* If we try to allocate 1k, this should go in the second half of the
+         * first block */
+        expected_next_available = base_address + uintptr_t(1024);
+        void* a3 = alloc_malloc(pool, 1024);
+        assert_equal(a3, expected_next_available);
+
+        alloc_free(pool, a1);
+
+        /* Next allocation would go in the just freed block */
+        expected_next_available = base_address;
+        assert_equal(alloc_next_available(pool, 64), expected_next_available);
+
+        /* Now allocate 14 more 2048 size blocks, the following one should
+         * return NULL */
+        for(int i = 0; i < 14; ++i) {
+            alloc_malloc(pool, 2048);
+        }
+
+        assert_is_null(alloc_malloc(pool, 2048));
+
+        /* But we should still have room in the second block for this */
+        assert_is_not_null(alloc_malloc(pool, 64));
+    }
+
+};