Optimisations
This commit is contained in:
parent
cb96769074
commit
4adc49cd40
|
@ -25,7 +25,7 @@ if(NOT PLATFORM_DREAMCAST)
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 --fast-math")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 --fast-math -fexpensive-optimizations -funroll-all-loops")
|
||||
|
||||
set(
|
||||
SOURCES
|
||||
|
|
72
GL/draw.c
72
GL/draw.c
|
@ -13,10 +13,12 @@ AttribPointer UV_POINTER;
|
|||
AttribPointer ST_POINTER;
|
||||
AttribPointer NORMAL_POINTER;
|
||||
AttribPointer DIFFUSE_POINTER;
|
||||
GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
|
||||
GLboolean FAST_PATH_ENABLED = GL_FALSE;
|
||||
|
||||
static GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
|
||||
static GLubyte ACTIVE_CLIENT_TEXTURE = 0;
|
||||
static GLboolean FAST_PATH_ENABLED = GL_FALSE;
|
||||
|
||||
extern inline GLboolean _glRecalcFastPath();
|
||||
|
||||
#define ITERATE(count) \
|
||||
GLuint i = count; \
|
||||
|
@ -52,53 +54,7 @@ void _glInitAttributePointers() {
|
|||
NORMAL_POINTER.size = 3;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() {
|
||||
/* The fast path is enabled when all enabled elements of the vertex
|
||||
* match the output format. This means:
|
||||
*
|
||||
* xyz == 3f
|
||||
* uv == 2f
|
||||
* rgba == argb4444
|
||||
* st == 2f
|
||||
* normal == 3f
|
||||
*
|
||||
* When this happens we do inline straight copies of the enabled data
|
||||
* and transforms for positions and normals happen while copying.
|
||||
*/
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
|
||||
if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) {
|
||||
if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) {
|
||||
/* FIXME: Shouldn't this be a reversed format? */
|
||||
if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) {
|
||||
if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) {
|
||||
if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE GLsizei byte_size(GLenum type) {
|
||||
switch(type) {
|
||||
|
@ -1182,19 +1138,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
target->extras = &extras;
|
||||
}
|
||||
|
||||
GLboolean doMultitexture, doTexture, doLighting;
|
||||
GLint activeTexture;
|
||||
glGetIntegerv(GL_ACTIVE_TEXTURE_ARB, &activeTexture);
|
||||
|
||||
glActiveTextureARB(GL_TEXTURE0);
|
||||
glGetBooleanv(GL_TEXTURE_2D, &doTexture);
|
||||
|
||||
glActiveTextureARB(GL_TEXTURE1);
|
||||
glGetBooleanv(GL_TEXTURE_2D, &doMultitexture);
|
||||
|
||||
doLighting = _glIsLightingEnabled();
|
||||
|
||||
glActiveTextureARB(activeTexture);
|
||||
const GLboolean doLighting = LIGHTING_ENABLED;
|
||||
const GLboolean doMultitexture = TEXTURES_ENABLED[1];
|
||||
|
||||
/* Polygons are treated as triangle fans, the only time this would be a
|
||||
* problem is if we supported glPolygonMode(..., GL_LINE) but we don't.
|
||||
|
@ -1423,11 +1368,6 @@ void APIENTRY glClientActiveTextureARB(GLenum texture) {
|
|||
ACTIVE_CLIENT_TEXTURE = (texture == GL_TEXTURE1_ARB) ? 1 : 0;
|
||||
}
|
||||
|
||||
GLboolean _glRecalcFastPath() {
|
||||
FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible();
|
||||
return FAST_PATH_ENABLED;
|
||||
}
|
||||
|
||||
void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
|
||||
TRACE();
|
||||
|
||||
|
|
|
@ -12,7 +12,9 @@
|
|||
|
||||
#include "private.h"
|
||||
|
||||
static GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
|
||||
extern inline GLboolean _glRecalcFastPath();
|
||||
|
||||
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
|
||||
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
|
||||
|
||||
static AlignedVector VERTICES;
|
||||
|
@ -39,7 +41,7 @@ extern AttribPointer DIFFUSE_POINTER;
|
|||
/* We store the list of attributes that have been "enabled" by a call to
|
||||
glColor, glNormal, glTexCoord etc. otherwise we already have defaults that
|
||||
can be applied faster */
|
||||
static GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
|
||||
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
|
||||
|
||||
static inline uint32_t pack_vertex_attribute_vec3_1i(float x, float y, float z) {
|
||||
const float w = 0.0f;
|
||||
|
@ -95,17 +97,6 @@ void _glInitImmediateMode(GLuint initial_size) {
|
|||
NORMAL = pack_vertex_attribute_vec3_1i(0.0f, 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
GLubyte _glCheckImmediateModeInactive(const char* func) {
|
||||
/* Returns 1 on error */
|
||||
if(IMMEDIATE_MODE_ACTIVE) {
|
||||
_glKosThrowError(GL_INVALID_OPERATION, func);
|
||||
_glKosPrintError();
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void APIENTRY glBegin(GLenum mode) {
|
||||
if(IMMEDIATE_MODE_ACTIVE) {
|
||||
_glKosThrowError(GL_INVALID_OPERATION, __func__);
|
||||
|
@ -118,7 +109,7 @@ void APIENTRY glBegin(GLenum mode) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[A8IDX] = (GLubyte)(a * 255.0f);
|
||||
COLOR[R8IDX] = (GLubyte)(r * 255.0f);
|
||||
|
@ -127,7 +118,7 @@ void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[A8IDX] = a;
|
||||
COLOR[R8IDX] = r;
|
||||
|
@ -136,7 +127,7 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor4fv(const GLfloat* v) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[B8IDX] = (GLubyte)(v[2] * 255);
|
||||
COLOR[G8IDX] = (GLubyte)(v[1] * 255);
|
||||
|
@ -145,7 +136,7 @@ void APIENTRY glColor4fv(const GLfloat* v) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[B8IDX] = (GLubyte)(b * 255);
|
||||
COLOR[G8IDX] = (GLubyte)(g * 255);
|
||||
|
@ -154,7 +145,7 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[A8IDX] = 255;
|
||||
COLOR[R8IDX] = red;
|
||||
|
@ -163,7 +154,7 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor3ubv(const GLubyte *v) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[A8IDX] = 255;
|
||||
COLOR[R8IDX] = v[0];
|
||||
|
@ -172,7 +163,7 @@ void APIENTRY glColor3ubv(const GLubyte *v) {
|
|||
}
|
||||
|
||||
void APIENTRY glColor3fv(const GLfloat* v) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
|
||||
|
||||
COLOR[A8IDX] = 255;
|
||||
COLOR[R8IDX] = (GLubyte)(v[0] * 255);
|
||||
|
@ -181,7 +172,7 @@ void APIENTRY glColor3fv(const GLfloat* v) {
|
|||
}
|
||||
|
||||
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
|
||||
|
||||
GLVertexKOS* vert = aligned_vector_extend(&VERTICES, 1);
|
||||
|
||||
|
@ -192,12 +183,12 @@ void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
|
|||
vert->v = UV_COORD[1];
|
||||
*((uint32_t*) vert->bgra) = *((uint32_t*) COLOR);
|
||||
|
||||
if(ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) {
|
||||
if(IM_ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) {
|
||||
GLuint* n = aligned_vector_extend(&NORMALS, 1);
|
||||
*n = NORMAL;
|
||||
}
|
||||
|
||||
if(ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) {
|
||||
if(IM_ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) {
|
||||
GLfloat* st = aligned_vector_extend(&ST_COORDS, 2);
|
||||
st[0] = ST_COORD[0];
|
||||
st[1] = ST_COORD[1];
|
||||
|
@ -227,11 +218,11 @@ void APIENTRY glVertex4fv(const GLfloat* v) {
|
|||
|
||||
void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) {
|
||||
if(target == GL_TEXTURE0) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
|
||||
UV_COORD[0] = s;
|
||||
UV_COORD[1] = t;
|
||||
} else if(target == GL_TEXTURE1) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG;
|
||||
ST_COORD[0] = s;
|
||||
ST_COORD[1] = t;
|
||||
} else {
|
||||
|
@ -242,7 +233,7 @@ void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) {
|
|||
}
|
||||
|
||||
void APIENTRY glTexCoord2f(GLfloat u, GLfloat v) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
|
||||
UV_COORD[0] = u;
|
||||
UV_COORD[1] = v;
|
||||
}
|
||||
|
@ -252,12 +243,12 @@ void APIENTRY glTexCoord2fv(const GLfloat* v) {
|
|||
}
|
||||
|
||||
void APIENTRY glNormal3f(GLfloat x, GLfloat y, GLfloat z) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
|
||||
NORMAL = pack_vertex_attribute_vec3_1i(x, y, z);
|
||||
}
|
||||
|
||||
void APIENTRY glNormal3fv(const GLfloat* v) {
|
||||
ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
|
||||
glNormal3f(v[0], v[1], v[2]);
|
||||
}
|
||||
|
||||
|
@ -272,7 +263,7 @@ void APIENTRY glEnd() {
|
|||
NORMAL_ATTRIB.ptr = NORMALS.data;
|
||||
ST_ATTRIB.ptr = ST_COORDS.data;
|
||||
|
||||
GLuint* attrs = _glGetEnabledAttributes();
|
||||
GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES;
|
||||
|
||||
/* Stash existing values */
|
||||
AttribPointer vptr = VERTEX_POINTER;
|
||||
|
@ -290,10 +281,11 @@ void APIENTRY glEnd() {
|
|||
UV_POINTER = UV_ATTRIB;
|
||||
ST_POINTER = ST_ATTRIB;
|
||||
|
||||
*attrs = ENABLED_VERTEX_ATTRIBUTES;
|
||||
*attrs = IM_ENABLED_VERTEX_ATTRIBUTES;
|
||||
|
||||
#ifndef NDEBUG
|
||||
_glRecalcFastPath();
|
||||
/* If we're not debugging, set to true - we assume we haven't broken it! */
|
||||
FAST_PATH_ENABLED = GL_TRUE;
|
||||
#else
|
||||
// Immediate mode should always activate the fast path
|
||||
GLboolean fastPathEnabled = _glRecalcFastPath();
|
||||
|
|
87
GL/private.h
87
GL/private.h
|
@ -301,7 +301,6 @@ Matrix4x4* _glGetProjectionMatrix();
|
|||
Matrix4x4* _glGetModelViewMatrix();
|
||||
|
||||
void _glWipeTextureOnFramebuffers(GLuint texture);
|
||||
GLubyte _glCheckImmediateModeInactive(const char* func);
|
||||
|
||||
PolyContext* _glGetPVRContext();
|
||||
GLubyte _glInitTextures();
|
||||
|
@ -329,7 +328,12 @@ GLenum _glGetShadeModel();
|
|||
TextureObject* _glGetTexture0();
|
||||
TextureObject* _glGetTexture1();
|
||||
TextureObject* _glGetBoundTexture();
|
||||
|
||||
extern GLubyte ACTIVE_TEXTURE;
|
||||
extern GLboolean TEXTURES_ENABLED[];
|
||||
|
||||
GLubyte _glGetActiveTexture();
|
||||
|
||||
GLuint _glGetActiveClientTexture();
|
||||
TexturePalette* _glGetSharedPalette(GLshort bank);
|
||||
void _glSetInternalPaletteFormat(GLenum val);
|
||||
|
@ -367,13 +371,92 @@ GLboolean _glIsMipmapComplete(const TextureObject* obj);
|
|||
GLubyte* _glGetMipmapLocation(const TextureObject* obj, GLuint level);
|
||||
GLuint _glGetMipmapLevelCount(const TextureObject* obj);
|
||||
|
||||
extern GLboolean LIGHTING_ENABLED;
|
||||
GLboolean _glIsLightingEnabled();
|
||||
|
||||
void _glEnableLight(GLubyte light, unsigned char value);
|
||||
GLboolean _glIsColorMaterialEnabled();
|
||||
|
||||
GLboolean _glIsNormalizeEnabled();
|
||||
|
||||
GLboolean _glRecalcFastPath();
|
||||
extern AttribPointer VERTEX_POINTER;
|
||||
extern AttribPointer UV_POINTER;
|
||||
extern AttribPointer ST_POINTER;
|
||||
extern AttribPointer NORMAL_POINTER;
|
||||
extern AttribPointer DIFFUSE_POINTER;
|
||||
extern GLuint ENABLED_VERTEX_ATTRIBUTES;
|
||||
extern GLboolean FAST_PATH_ENABLED;
|
||||
|
||||
GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() {
|
||||
/* The fast path is enabled when all enabled elements of the vertex
|
||||
* match the output format. This means:
|
||||
*
|
||||
* xyz == 3f
|
||||
* uv == 2f
|
||||
* rgba == argb4444
|
||||
* st == 2f
|
||||
* normal == 3f
|
||||
*
|
||||
* When this happens we do inline straight copies of the enabled data
|
||||
* and transforms for positions and normals happen while copying.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
|
||||
if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) {
|
||||
if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) {
|
||||
/* FIXME: Shouldn't this be a reversed format? */
|
||||
if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) {
|
||||
if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) {
|
||||
if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE GLboolean _glRecalcFastPath() {
|
||||
FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible();
|
||||
return FAST_PATH_ENABLED;
|
||||
}
|
||||
|
||||
extern GLboolean IMMEDIATE_MODE_ACTIVE;
|
||||
|
||||
void _glKosThrowError(GLenum error, const char *function);
|
||||
void _glKosPrintError();
|
||||
|
||||
GL_FORCE_INLINE GLboolean _glCheckImmediateModeInactive(const char* func) {
|
||||
/* Returns 1 on error */
|
||||
if(IMMEDIATE_MODE_ACTIVE) {
|
||||
_glKosThrowError(GL_INVALID_OPERATION, func);
|
||||
_glKosPrintError();
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
float n[3]; // 12 bytes
|
||||
|
|
|
@ -18,7 +18,7 @@ static GLenum FRONT_FACE = GL_CCW;
|
|||
static GLboolean CULLING_ENABLED = GL_FALSE;
|
||||
static GLboolean COLOR_MATERIAL_ENABLED = GL_FALSE;
|
||||
|
||||
static GLboolean LIGHTING_ENABLED = GL_FALSE;
|
||||
GLboolean LIGHTING_ENABLED = GL_FALSE;
|
||||
|
||||
/* Is the shared texture palette enabled? */
|
||||
static GLboolean SHARED_PALETTE_ENABLED = GL_FALSE;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
static TextureObject* TEXTURE_UNITS[MAX_TEXTURE_UNITS] = {NULL, NULL};
|
||||
static NamedArray TEXTURE_OBJECTS;
|
||||
static GLubyte ACTIVE_TEXTURE = 0;
|
||||
GLubyte ACTIVE_TEXTURE = 0;
|
||||
|
||||
static TexturePalette* SHARED_PALETTES[4] = {NULL, NULL, NULL, NULL};
|
||||
|
||||
|
|
|
@ -4,15 +4,6 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__APPLE__) || defined(__WIN32__)
|
||||
/* Linux + Kos define this, OSX does not, so just use malloc there */
|
||||
static inline void* memalign(size_t alignment, size_t size) {
|
||||
return malloc(size);
|
||||
}
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef _arch_dreamcast
|
||||
#include "../GL/private.h"
|
||||
#else
|
||||
|
@ -21,6 +12,11 @@ static inline void* memalign(size_t alignment, size_t size) {
|
|||
|
||||
#include "aligned_vector.h"
|
||||
|
||||
extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
||||
extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
|
||||
extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
||||
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
|
||||
|
||||
void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
|
||||
vector->size = vector->capacity = 0;
|
||||
vector->element_size = element_size;
|
||||
|
@ -30,91 +26,6 @@ void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
|
|||
aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned int round_to_chunk_size(unsigned int val) {
|
||||
const unsigned int n = val;
|
||||
const unsigned int m = ALIGNED_VECTOR_CHUNK_SIZE;
|
||||
|
||||
return ((n + m - 1) / m) * m;
|
||||
}
|
||||
|
||||
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
|
||||
if(element_count == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(element_count <= vector->capacity) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned int original_byte_size = vector->size * vector->element_size;
|
||||
|
||||
/* We overallocate so that we don't make small allocations during push backs */
|
||||
element_count = round_to_chunk_size(element_count);
|
||||
|
||||
unsigned int new_byte_size = element_count * vector->element_size;
|
||||
unsigned char* original_data = vector->data;
|
||||
|
||||
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
||||
assert(vector->data);
|
||||
|
||||
if(original_data) {
|
||||
FASTCPY(vector->data, original_data, original_byte_size);
|
||||
free(original_data);
|
||||
}
|
||||
|
||||
vector->capacity = element_count;
|
||||
|
||||
return vector->data + original_byte_size;
|
||||
}
|
||||
|
||||
void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
|
||||
/* Resize enough room */
|
||||
assert(count);
|
||||
assert(vector->element_size);
|
||||
|
||||
unsigned int initial_size = vector->size;
|
||||
aligned_vector_resize(vector, vector->size + count);
|
||||
|
||||
assert(vector->size == initial_size + count);
|
||||
|
||||
unsigned char* dest = vector->data + (vector->element_size * initial_size);
|
||||
|
||||
/* Copy the objects in */
|
||||
FASTCPY(dest, objs, vector->element_size * count);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
|
||||
void* ret = NULL;
|
||||
|
||||
unsigned int previousCount = vector->size;
|
||||
|
||||
/* Don't change memory when resizing downwards, just change the size */
|
||||
if(element_count <= vector->size) {
|
||||
vector->size = element_count;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(vector->capacity < element_count) {
|
||||
ret = aligned_vector_reserve(vector, element_count);
|
||||
vector->size = element_count;
|
||||
} else if(previousCount < element_count) {
|
||||
vector->size = element_count;
|
||||
ret = aligned_vector_at(vector, previousCount);
|
||||
}
|
||||
|
||||
if(previousCount < vector->size) {
|
||||
return ret;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
|
||||
return aligned_vector_resize(vector, vector->size + additional_count);
|
||||
}
|
||||
|
||||
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
|
||||
if(vector->size == 0) {
|
||||
free(vector->data);
|
||||
|
|
|
@ -2,11 +2,21 @@
|
|||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__WIN32__)
|
||||
/* Linux + Kos define this, OSX does not, so just use malloc there */
|
||||
static inline void* memalign(size_t alignment, size_t size) {
|
||||
return malloc(size);
|
||||
}
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
unsigned int size;
|
||||
unsigned int capacity;
|
||||
|
@ -24,16 +34,95 @@ typedef struct {
|
|||
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
|
||||
#endif
|
||||
|
||||
#define ROUND_TO_CHUNK_SIZE(v) \
|
||||
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
|
||||
|
||||
|
||||
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
|
||||
void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
||||
void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
|
||||
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
|
||||
if(element_count == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(element_count <= vector->capacity) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned int original_byte_size = vector->size * vector->element_size;
|
||||
|
||||
/* We overallocate so that we don't make small allocations during push backs */
|
||||
element_count = ROUND_TO_CHUNK_SIZE(element_count);
|
||||
|
||||
unsigned int new_byte_size = element_count * vector->element_size;
|
||||
unsigned char* original_data = vector->data;
|
||||
|
||||
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
||||
assert(vector->data);
|
||||
|
||||
if(original_data) {
|
||||
memcpy(vector->data, original_data, original_byte_size);
|
||||
free(original_data);
|
||||
}
|
||||
|
||||
vector->capacity = element_count;
|
||||
|
||||
return vector->data + original_byte_size;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
|
||||
assert(index < vector->size);
|
||||
return &vector->data[index * vector->element_size];
|
||||
}
|
||||
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
|
||||
void* ret = NULL;
|
||||
|
||||
unsigned int previousCount = vector->size;
|
||||
|
||||
/* Don't change memory when resizing downwards, just change the size */
|
||||
if(element_count <= vector->size) {
|
||||
vector->size = element_count;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(vector->capacity < element_count) {
|
||||
ret = aligned_vector_reserve(vector, element_count);
|
||||
vector->size = element_count;
|
||||
} else if(previousCount < element_count) {
|
||||
vector->size = element_count;
|
||||
ret = aligned_vector_at(vector, previousCount);
|
||||
}
|
||||
|
||||
if(previousCount < vector->size) {
|
||||
return ret;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
|
||||
/* Resize enough room */
|
||||
assert(count);
|
||||
assert(vector->element_size);
|
||||
|
||||
unsigned int initial_size = vector->size;
|
||||
aligned_vector_resize(vector, vector->size + count);
|
||||
|
||||
assert(vector->size == initial_size + count);
|
||||
|
||||
unsigned char* dest = vector->data + (vector->element_size * initial_size);
|
||||
|
||||
/* Copy the objects in */
|
||||
memcpy(dest, objs, vector->element_size * count);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
|
||||
return aligned_vector_resize(vector, vector->size + additional_count);
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
|
||||
vector->size = 0;
|
||||
|
|
Loading…
Reference in New Issue
Block a user