diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index a2e3446..d84b25e 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -21,8 +21,8 @@ * destination is properly aligned so we assert that. */ #define FASTCPY(dst, src, bytes) \ do { \ - if(bytes % 32 == 0 && (uintptr_t) src % 32 == 0) { \ - assert((uintptr_t) dst % 32 == 0); \ + if(bytes % 32 == 0 && ((uintptr_t) src % 4) == 0) { \ + assert(((uintptr_t) dst) % 32 == 0); \ sq_cpy(dst, src, bytes); \ } else { \ PERF_WARNING(); \ diff --git a/GL/texture.c b/GL/texture.c index 6c5260a..4deb9d5 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -381,6 +381,10 @@ GLubyte _glInitTextures() { size_t vram_free = GPUMemoryAvailable(); YALLOC_SIZE = vram_free - PVR_MEM_BUFFER_SIZE; /* Take all but 64kb VRAM */ YALLOC_BASE = GPUMemoryAlloc(YALLOC_SIZE); + + /* Ensure memory is aligned */ + assert((uintptr_t) YALLOC_BASE % 32 == 0); + yalloc_init(YALLOC_BASE, YALLOC_SIZE); return 1; } diff --git a/GL/yalloc/yalloc_internals.h b/GL/yalloc/yalloc_internals.h index b0ec62a..ffb70cb 100644 --- a/GL/yalloc/yalloc_internals.h +++ b/GL/yalloc/yalloc_internals.h @@ -7,6 +7,10 @@ typedef struct { uint32_t prev; // low bit set if free uint32_t next; // for used blocks: low bit set if unused header at the end + + /* We need user data to be 32-byte aligned, so the header needs + * to be 32 bytes in size (as user data follows the header) */ + uint8_t padding[32 - (sizeof(uint32_t) * 2)]; } Header; // NOTE: We have 32bit aligned data and 16bit offsets where the lowest bit is used as flag. So we remove the low bit and shift by 1 to address 128k bytes with the 15bit significant offset bits.