Optimisations

This commit is contained in:
Luke Benstead 2021-09-12 15:04:52 +01:00
parent cb96769074
commit 4adc49cd40
8 changed files with 215 additions and 200 deletions

View File

@ -25,7 +25,7 @@ if(NOT PLATFORM_DREAMCAST)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 --fast-math")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 --fast-math -fexpensive-optimizations -funroll-all-loops")
set(
SOURCES

View File

@ -13,10 +13,12 @@ AttribPointer UV_POINTER;
AttribPointer ST_POINTER;
AttribPointer NORMAL_POINTER;
AttribPointer DIFFUSE_POINTER;
GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
GLboolean FAST_PATH_ENABLED = GL_FALSE;
static GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
static GLubyte ACTIVE_CLIENT_TEXTURE = 0;
static GLboolean FAST_PATH_ENABLED = GL_FALSE;
extern inline GLboolean _glRecalcFastPath();
#define ITERATE(count) \
GLuint i = count; \
@ -52,53 +54,7 @@ void _glInitAttributePointers() {
NORMAL_POINTER.size = 3;
}
GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() {
/* The fast path is enabled when all enabled elements of the vertex
* match the output format. This means:
*
* xyz == 3f
* uv == 2f
* rgba == argb4444
* st == 2f
* normal == 3f
*
* When this happens we do inline straight copies of the enabled data
* and transforms for positions and normals happen while copying.
*/
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) {
if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) {
/* FIXME: Shouldn't this be a reversed format? */
if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) {
if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) {
if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
return GL_TRUE;
}
GL_FORCE_INLINE GLsizei byte_size(GLenum type) {
switch(type) {
@ -1182,19 +1138,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
target->extras = &extras;
}
GLboolean doMultitexture, doTexture, doLighting;
GLint activeTexture;
glGetIntegerv(GL_ACTIVE_TEXTURE_ARB, &activeTexture);
glActiveTextureARB(GL_TEXTURE0);
glGetBooleanv(GL_TEXTURE_2D, &doTexture);
glActiveTextureARB(GL_TEXTURE1);
glGetBooleanv(GL_TEXTURE_2D, &doMultitexture);
doLighting = _glIsLightingEnabled();
glActiveTextureARB(activeTexture);
const GLboolean doLighting = LIGHTING_ENABLED;
const GLboolean doMultitexture = TEXTURES_ENABLED[1];
/* Polygons are treated as triangle fans, the only time this would be a
* problem is if we supported glPolygonMode(..., GL_LINE) but we don't.
@ -1423,11 +1368,6 @@ void APIENTRY glClientActiveTextureARB(GLenum texture) {
ACTIVE_CLIENT_TEXTURE = (texture == GL_TEXTURE1_ARB) ? 1 : 0;
}
GLboolean _glRecalcFastPath() {
FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible();
return FAST_PATH_ENABLED;
}
void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
TRACE();

View File

@ -12,7 +12,9 @@
#include "private.h"
static GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
extern inline GLboolean _glRecalcFastPath();
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
static AlignedVector VERTICES;
@ -39,7 +41,7 @@ extern AttribPointer DIFFUSE_POINTER;
/* We store the list of attributes that have been "enabled" by a call to
glColor, glNormal, glTexCoord etc. otherwise we already have defaults that
can be applied faster */
static GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
static inline uint32_t pack_vertex_attribute_vec3_1i(float x, float y, float z) {
const float w = 0.0f;
@ -95,17 +97,6 @@ void _glInitImmediateMode(GLuint initial_size) {
NORMAL = pack_vertex_attribute_vec3_1i(0.0f, 0.0f, 1.0f);
}
GLubyte _glCheckImmediateModeInactive(const char* func) {
/* Returns 1 on error */
if(IMMEDIATE_MODE_ACTIVE) {
_glKosThrowError(GL_INVALID_OPERATION, func);
_glKosPrintError();
return 1;
}
return 0;
}
void APIENTRY glBegin(GLenum mode) {
if(IMMEDIATE_MODE_ACTIVE) {
_glKosThrowError(GL_INVALID_OPERATION, __func__);
@ -118,7 +109,7 @@ void APIENTRY glBegin(GLenum mode) {
}
void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = (GLubyte)(a * 255.0f);
COLOR[R8IDX] = (GLubyte)(r * 255.0f);
@ -127,7 +118,7 @@ void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
}
void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = a;
COLOR[R8IDX] = r;
@ -136,7 +127,7 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
}
void APIENTRY glColor4fv(const GLfloat* v) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[B8IDX] = (GLubyte)(v[2] * 255);
COLOR[G8IDX] = (GLubyte)(v[1] * 255);
@ -145,7 +136,7 @@ void APIENTRY glColor4fv(const GLfloat* v) {
}
void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[B8IDX] = (GLubyte)(b * 255);
COLOR[G8IDX] = (GLubyte)(g * 255);
@ -154,7 +145,7 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) {
}
void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = 255;
COLOR[R8IDX] = red;
@ -163,7 +154,7 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
}
void APIENTRY glColor3ubv(const GLubyte *v) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = 255;
COLOR[R8IDX] = v[0];
@ -172,7 +163,7 @@ void APIENTRY glColor3ubv(const GLubyte *v) {
}
void APIENTRY glColor3fv(const GLfloat* v) {
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = 255;
COLOR[R8IDX] = (GLubyte)(v[0] * 255);
@ -181,7 +172,7 @@ void APIENTRY glColor3fv(const GLfloat* v) {
}
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
GLVertexKOS* vert = aligned_vector_extend(&VERTICES, 1);
@ -192,12 +183,12 @@ void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
vert->v = UV_COORD[1];
*((uint32_t*) vert->bgra) = *((uint32_t*) COLOR);
if(ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) {
if(IM_ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) {
GLuint* n = aligned_vector_extend(&NORMALS, 1);
*n = NORMAL;
}
if(ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) {
if(IM_ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) {
GLfloat* st = aligned_vector_extend(&ST_COORDS, 2);
st[0] = ST_COORD[0];
st[1] = ST_COORD[1];
@ -227,11 +218,11 @@ void APIENTRY glVertex4fv(const GLfloat* v) {
void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) {
if(target == GL_TEXTURE0) {
ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
UV_COORD[0] = s;
UV_COORD[1] = t;
} else if(target == GL_TEXTURE1) {
ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG;
ST_COORD[0] = s;
ST_COORD[1] = t;
} else {
@ -242,7 +233,7 @@ void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) {
}
void APIENTRY glTexCoord2f(GLfloat u, GLfloat v) {
ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
UV_COORD[0] = u;
UV_COORD[1] = v;
}
@ -252,12 +243,12 @@ void APIENTRY glTexCoord2fv(const GLfloat* v) {
}
void APIENTRY glNormal3f(GLfloat x, GLfloat y, GLfloat z) {
ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
NORMAL = pack_vertex_attribute_vec3_1i(x, y, z);
}
void APIENTRY glNormal3fv(const GLfloat* v) {
ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
glNormal3f(v[0], v[1], v[2]);
}
@ -272,7 +263,7 @@ void APIENTRY glEnd() {
NORMAL_ATTRIB.ptr = NORMALS.data;
ST_ATTRIB.ptr = ST_COORDS.data;
GLuint* attrs = _glGetEnabledAttributes();
GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES;
/* Stash existing values */
AttribPointer vptr = VERTEX_POINTER;
@ -290,10 +281,11 @@ void APIENTRY glEnd() {
UV_POINTER = UV_ATTRIB;
ST_POINTER = ST_ATTRIB;
*attrs = ENABLED_VERTEX_ATTRIBUTES;
*attrs = IM_ENABLED_VERTEX_ATTRIBUTES;
#ifndef NDEBUG
_glRecalcFastPath();
/* If we're not debugging, set to true - we assume we haven't broken it! */
FAST_PATH_ENABLED = GL_TRUE;
#else
// Immediate mode should always activate the fast path
GLboolean fastPathEnabled = _glRecalcFastPath();

View File

@ -301,7 +301,6 @@ Matrix4x4* _glGetProjectionMatrix();
Matrix4x4* _glGetModelViewMatrix();
void _glWipeTextureOnFramebuffers(GLuint texture);
GLubyte _glCheckImmediateModeInactive(const char* func);
PolyContext* _glGetPVRContext();
GLubyte _glInitTextures();
@ -329,7 +328,12 @@ GLenum _glGetShadeModel();
TextureObject* _glGetTexture0();
TextureObject* _glGetTexture1();
TextureObject* _glGetBoundTexture();
extern GLubyte ACTIVE_TEXTURE;
extern GLboolean TEXTURES_ENABLED[];
GLubyte _glGetActiveTexture();
GLuint _glGetActiveClientTexture();
TexturePalette* _glGetSharedPalette(GLshort bank);
void _glSetInternalPaletteFormat(GLenum val);
@ -367,13 +371,92 @@ GLboolean _glIsMipmapComplete(const TextureObject* obj);
GLubyte* _glGetMipmapLocation(const TextureObject* obj, GLuint level);
GLuint _glGetMipmapLevelCount(const TextureObject* obj);
extern GLboolean LIGHTING_ENABLED;
GLboolean _glIsLightingEnabled();
void _glEnableLight(GLubyte light, unsigned char value);
GLboolean _glIsColorMaterialEnabled();
GLboolean _glIsNormalizeEnabled();
GLboolean _glRecalcFastPath();
extern AttribPointer VERTEX_POINTER;
extern AttribPointer UV_POINTER;
extern AttribPointer ST_POINTER;
extern AttribPointer NORMAL_POINTER;
extern AttribPointer DIFFUSE_POINTER;
extern GLuint ENABLED_VERTEX_ATTRIBUTES;
extern GLboolean FAST_PATH_ENABLED;
GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() {
/* The fast path is enabled when all enabled elements of the vertex
* match the output format. This means:
*
* xyz == 3f
* uv == 2f
* rgba == argb4444
* st == 2f
* normal == 3f
*
* When this happens we do inline straight copies of the enabled data
* and transforms for positions and normals happen while copying.
*/
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) {
if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) {
/* FIXME: Shouldn't this be a reversed format? */
if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) {
if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) {
if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
return GL_TRUE;
}
GL_FORCE_INLINE GLboolean _glRecalcFastPath() {
FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible();
return FAST_PATH_ENABLED;
}
extern GLboolean IMMEDIATE_MODE_ACTIVE;
void _glKosThrowError(GLenum error, const char *function);
void _glKosPrintError();
GL_FORCE_INLINE GLboolean _glCheckImmediateModeInactive(const char* func) {
/* Returns 1 on error */
if(IMMEDIATE_MODE_ACTIVE) {
_glKosThrowError(GL_INVALID_OPERATION, func);
_glKosPrintError();
return GL_TRUE;
}
return GL_FALSE;
}
typedef struct {
float n[3]; // 12 bytes

View File

@ -18,7 +18,7 @@ static GLenum FRONT_FACE = GL_CCW;
static GLboolean CULLING_ENABLED = GL_FALSE;
static GLboolean COLOR_MATERIAL_ENABLED = GL_FALSE;
static GLboolean LIGHTING_ENABLED = GL_FALSE;
GLboolean LIGHTING_ENABLED = GL_FALSE;
/* Is the shared texture palette enabled? */
static GLboolean SHARED_PALETTE_ENABLED = GL_FALSE;

View File

@ -20,7 +20,7 @@
static TextureObject* TEXTURE_UNITS[MAX_TEXTURE_UNITS] = {NULL, NULL};
static NamedArray TEXTURE_OBJECTS;
static GLubyte ACTIVE_TEXTURE = 0;
GLubyte ACTIVE_TEXTURE = 0;
static TexturePalette* SHARED_PALETTES[4] = {NULL, NULL, NULL, NULL};

View File

@ -4,15 +4,6 @@
#include <assert.h>
#include <stdio.h>
#if defined(__APPLE__) || defined(__WIN32__)
/* Linux + Kos define this, OSX does not, so just use malloc there */
static inline void* memalign(size_t alignment, size_t size) {
return malloc(size);
}
#else
#include <malloc.h>
#endif
#ifdef _arch_dreamcast
#include "../GL/private.h"
#else
@ -21,6 +12,11 @@ static inline void* memalign(size_t alignment, size_t size) {
#include "aligned_vector.h"
extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
vector->size = vector->capacity = 0;
vector->element_size = element_size;
@ -30,91 +26,6 @@ void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
}
static inline unsigned int round_to_chunk_size(unsigned int val) {
const unsigned int n = val;
const unsigned int m = ALIGNED_VECTOR_CHUNK_SIZE;
return ((n + m - 1) / m) * m;
}
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
if(element_count == 0) {
return NULL;
}
if(element_count <= vector->capacity) {
return NULL;
}
unsigned int original_byte_size = vector->size * vector->element_size;
/* We overallocate so that we don't make small allocations during push backs */
element_count = round_to_chunk_size(element_count);
unsigned int new_byte_size = element_count * vector->element_size;
unsigned char* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
assert(vector->data);
if(original_data) {
FASTCPY(vector->data, original_data, original_byte_size);
free(original_data);
}
vector->capacity = element_count;
return vector->data + original_byte_size;
}
void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
/* Resize enough room */
assert(count);
assert(vector->element_size);
unsigned int initial_size = vector->size;
aligned_vector_resize(vector, vector->size + count);
assert(vector->size == initial_size + count);
unsigned char* dest = vector->data + (vector->element_size * initial_size);
/* Copy the objects in */
FASTCPY(dest, objs, vector->element_size * count);
return dest;
}
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
void* ret = NULL;
unsigned int previousCount = vector->size;
/* Don't change memory when resizing downwards, just change the size */
if(element_count <= vector->size) {
vector->size = element_count;
return NULL;
}
if(vector->capacity < element_count) {
ret = aligned_vector_reserve(vector, element_count);
vector->size = element_count;
} else if(previousCount < element_count) {
vector->size = element_count;
ret = aligned_vector_at(vector, previousCount);
}
if(previousCount < vector->size) {
return ret;
} else {
return NULL;
}
}
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
return aligned_vector_resize(vector, vector->size + additional_count);
}
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
if(vector->size == 0) {
free(vector->data);

View File

@ -2,11 +2,21 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__APPLE__) || defined(__WIN32__)
/* Linux + Kos define this, OSX does not, so just use malloc there */
static inline void* memalign(size_t alignment, size_t size) {
return malloc(size);
}
#else
#include <malloc.h>
#endif
typedef struct {
unsigned int size;
unsigned int capacity;
@ -24,16 +34,95 @@ typedef struct {
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
#endif
#define ROUND_TO_CHUNK_SIZE(v) \
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
if(element_count == 0) {
return NULL;
}
if(element_count <= vector->capacity) {
return NULL;
}
unsigned int original_byte_size = vector->size * vector->element_size;
/* We overallocate so that we don't make small allocations during push backs */
element_count = ROUND_TO_CHUNK_SIZE(element_count);
unsigned int new_byte_size = element_count * vector->element_size;
unsigned char* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
assert(vector->data);
if(original_data) {
memcpy(vector->data, original_data, original_byte_size);
free(original_data);
}
vector->capacity = element_count;
return vector->data + original_byte_size;
}
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
assert(index < vector->size);
return &vector->data[index * vector->element_size];
}
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
void* ret = NULL;
unsigned int previousCount = vector->size;
/* Don't change memory when resizing downwards, just change the size */
if(element_count <= vector->size) {
vector->size = element_count;
return NULL;
}
if(vector->capacity < element_count) {
ret = aligned_vector_reserve(vector, element_count);
vector->size = element_count;
} else if(previousCount < element_count) {
vector->size = element_count;
ret = aligned_vector_at(vector, previousCount);
}
if(previousCount < vector->size) {
return ret;
} else {
return NULL;
}
}
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
/* Resize enough room */
assert(count);
assert(vector->element_size);
unsigned int initial_size = vector->size;
aligned_vector_resize(vector, vector->size + count);
assert(vector->size == initial_size + count);
unsigned char* dest = vector->data + (vector->element_size * initial_size);
/* Copy the objects in */
memcpy(dest, objs, vector->element_size * count);
return dest;
}
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
return aligned_vector_resize(vector, vector->size + additional_count);
}
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
vector->size = 0;