Clean up aligned vector

This commit is contained in:
Luke Benstead 2023-05-12 20:51:36 +01:00
parent e683b8becb
commit 9cffe14ad6
11 changed files with 162 additions and 112 deletions

View File

@ -239,7 +239,7 @@ static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restri
float x, y, z;
} V;
const static V NegZ = {0.0f, 0.0f, -1.0f};
static const V NegZ = {0.0f, 0.0f, -1.0f};
*((V*) out) = NegZ;
}
@ -391,12 +391,12 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
}
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
gl_assert(target->header_offset < target->output->vector.size);
gl_assert(target->header_offset < aligned_vector_size(&target->output->vector));
return aligned_vector_at(&target->output->vector, target->header_offset);
}
GL_INLINE_DEBUG Vertex* _glSubmissionTargetStart(SubmissionTarget* target) {
gl_assert(target->start_offset < target->output->vector.size);
gl_assert(target->start_offset < aligned_vector_size(&target->output->vector));
return aligned_vector_at(&target->output->vector, target->start_offset);
}
@ -1210,15 +1210,14 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
return;
}
GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty();
// We don't handle this any further, so just make sure we never pass it down */
gl_assert(mode != GL_POLYGON);
target->output = _glActivePolyList();
GLboolean header_required = (aligned_vector_header(&target->output->vector)->size == 0) || _glGPUStateIsDirty();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = target->output->vector.size;
target->header_offset = aligned_vector_header(&target->output->vector)->size;
target->start_offset = target->header_offset + (header_required);
gl_assert(target->count);

View File

@ -24,7 +24,7 @@ MAKE_FUNC(POLYMODE)
VertexExtra* ve;
for(int min = 0; min < count; min += BATCH_SIZE) {
for(min = 0; min < count; min += BATCH_SIZE) {
const Vertex* start = ((Vertex*) _glSubmissionTargetStart(target)) + min;
const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE;
const int offset = (first + min);

View File

@ -93,21 +93,21 @@ void APIENTRY glKosSwapBuffers() {
TRACE();
SceneBegin();
if(OP_LIST.vector.size > 2) {
if(aligned_vector_header(&OP_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_OP_POLY);
SceneListSubmit((Vertex*) OP_LIST.vector.data, OP_LIST.vector.size);
SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector));
SceneListFinish();
}
if(PT_LIST.vector.size > 2) {
if(aligned_vector_header(&PT_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_PT_POLY);
SceneListSubmit((Vertex*) PT_LIST.vector.data, PT_LIST.vector.size);
SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector));
SceneListFinish();
}
if(TR_LIST.vector.size > 2) {
if(aligned_vector_header(&TR_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_TR_POLY);
SceneListSubmit((Vertex*) TR_LIST.vector.data, TR_LIST.vector.size);
SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector));
SceneListFinish();
}
SceneFinish();

View File

@ -50,7 +50,7 @@ void _glInitImmediateMode(GLuint initial_size) {
aligned_vector_init(&VERTICES, sizeof(IMVertex));
aligned_vector_reserve(&VERTICES, initial_size);
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES);
IM_ATTRIBS.vertex.size = 3;
IM_ATTRIBS.vertex.type = GL_FLOAT;
IM_ATTRIBS.vertex.stride = sizeof(IMVertex);
@ -161,12 +161,11 @@ void APIENTRY glColor3fv(const GLfloat* v) {
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
unsigned int cap = VERTICES.capacity;
uint32_t cap = aligned_vector_capacity(&VERTICES);
IMVertex* vert = aligned_vector_extend(&VERTICES, 1);
if(cap != VERTICES.capacity) {
if(cap != aligned_vector_capacity(&VERTICES)) {
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES);
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
@ -281,7 +280,7 @@ void APIENTRY glEnd() {
FAST_PATH_ENABLED = GL_TRUE;
#endif
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
glDrawArrays(ACTIVE_POLYGON_MODE, 0, aligned_vector_header(&VERTICES)->size);
ATTRIB_POINTERS = stashed_attrib_pointers;

View File

@ -50,9 +50,8 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
const float f = _glFastInvert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = ((vertex->xyz[0] * f) * 320) + 320;
vertex->xyz[1] = ((vertex->xyz[1] * f) * -240) + 240;
vertex->xyz[2] = f;
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240;
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
@ -61,6 +60,8 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
*/
if(vertex->w == 1.0f) {
vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]);
} else {
vertex->xyz[2] = f;
}
}
@ -137,12 +138,13 @@ void SceneListSubmit(Vertex* v2, int n) {
fprintf(stderr, "----\n");
#endif
uint8_t __attribute__((aligned(32))) visible_mask = 0;
uint8_t __attribute__((aligned(32))) counter = 0;
uint8_t visible_mask = 0;
uint8_t counter = 0;
sq = SQ_BASE_ADDRESS;
for(int i = 0; i < n; ++i, ++v2) {
PREFETCH(v2 + 1);
switch(v2->flags) {
case GPU_CMD_VERTEX_EOL:
if(counter < 2) {

View File

@ -255,7 +255,7 @@ GL_FORCE_INLINE void ShiftRotateTriangle() {
tri_count--;
}
void SceneListSubmit(void* src, int n) {
void SceneListSubmit(Vertex* src, int n) {
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;

View File

@ -12,36 +12,44 @@
#include "aligned_vector.h"
extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
extern inline void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count);
void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
vector->size = vector->capacity = 0;
vector->element_size = element_size;
vector->data = NULL;
void aligned_vector_init(AlignedVector* vector, uint32_t element_size) {
/* Now initialize the header*/
AlignedVectorHeader* const hdr = &vector->hdr;
hdr->size = 0;
hdr->capacity = ALIGNED_VECTOR_CHUNK_SIZE;
hdr->element_size = element_size;
/* Reserve some initial capacity */
aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
/* Reserve some initial capacity. This will do the allocation but not set up the header */
void* ptr = aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
assert(ptr);
(void) ptr;
}
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
if(vector->size == 0) {
AlignedVectorHeader* const hdr = &vector->hdr;
if(hdr->size == 0) {
uint32_t element_size = hdr->element_size;
free(vector->data);
vector->data = NULL;
vector->capacity = 0;
/* Reallocate the header */
vector->data = memalign(0x20, sizeof(AlignedVectorHeader));
hdr->size = hdr->capacity = 0;
hdr->element_size = element_size;
} else {
unsigned int new_byte_size = vector->size * vector->element_size;
unsigned char* original_data = vector->data;
uint32_t new_byte_size = (hdr->size * hdr->element_size);
uint8_t* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
if(original_data) {
FASTCPY(vector->data, original_data, new_byte_size);
free(original_data);
}
vector->capacity = vector->size;
hdr->capacity = hdr->size;
}
}

View File

@ -67,10 +67,14 @@ AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
#endif
typedef struct {
uint8_t* __attribute__((aligned(32))) data;
uint32_t size;
uint32_t capacity;
uint32_t element_size;
} __attribute__((aligned(32))) AlignedVectorHeader;
typedef struct {
AlignedVectorHeader hdr;
uint8_t* data;
} AlignedVector;
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
@ -80,91 +84,129 @@ typedef struct {
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
void aligned_vector_init(AlignedVector* vector, uint32_t element_size);
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
if(element_count <= vector->capacity) {
return NULL;
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) {
const AlignedVectorHeader* hdr = &vector->hdr;
assert(index < hdr->size);
return vector->data + (index * hdr->element_size);
}
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) {
AlignedVectorHeader* hdr = &vector->hdr;
if(element_count < hdr->capacity) {
return aligned_vector_at(vector, element_count);
}
unsigned int original_byte_size = vector->size * vector->element_size;
uint32_t original_byte_size = (hdr->size * hdr->element_size);
/* We overallocate so that we don't make small allocations during push backs */
element_count = ROUND_TO_CHUNK_SIZE(element_count);
unsigned int new_byte_size = element_count * vector->element_size;
unsigned char* original_data = vector->data;
uint32_t new_byte_size = (element_count * hdr->element_size);
uint8_t* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
vector->data = (uint8_t*) memalign(0x20, new_byte_size);
assert(vector->data);
if(original_data) {
AV_MEMCPY4(vector->data, original_data, original_byte_size);
free(original_data);
}
vector->capacity = element_count;
hdr->capacity = element_count;
return vector->data + original_byte_size;
}
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
assert(index < vector->size);
return &vector->data[index * vector->element_size];
AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) {
return (AlignedVectorHeader*) &vector->hdr;
}
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) {
const AlignedVectorHeader* hdr = &vector->hdr;
return hdr->size;
}
AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) {
const AlignedVectorHeader* hdr = &vector->hdr;
return hdr->capacity;
}
AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) {
return vector->data;
}
/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) {
void* ret = NULL;
unsigned int previousCount = vector->size;
if(vector->capacity < element_count) {
AlignedVectorHeader* hdr = &vector->hdr;
uint32_t previous_count = hdr->size;
if(hdr->capacity <= element_count) {
/* If we didn't have capacity, increase capacity (slow) */
vector->size = element_count;
ret = aligned_vector_reserve(vector, element_count);
} else if(previousCount < element_count) {
aligned_vector_reserve(vector, element_count);
hdr->size = element_count;
ret = aligned_vector_at(vector, previous_count);
assert(hdr->size == element_count);
assert(hdr->size <= hdr->capacity);
} else if(previous_count < element_count) {
/* So we grew, but had the capacity, just get a pointer to
* where we were */
vector->size = element_count;
ret = aligned_vector_at(vector, previousCount);
} else {
vector->size = element_count;
ret = aligned_vector_at(vector, previousCount);
hdr->size = element_count;
assert(hdr->size < hdr->capacity);
ret = aligned_vector_at(vector, previous_count);
} else if(hdr->size != element_count) {
hdr->size = element_count;
assert(hdr->size < hdr->capacity);
}
return ret;
}
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) {
/* Resize enough room */
AlignedVectorHeader* hdr = &vector->hdr;
assert(count);
assert(vector->element_size);
assert(hdr->element_size);
unsigned int initial_size = vector->size;
aligned_vector_resize(vector, vector->size + count);
#ifndef NDEBUG
uint32_t element_size = hdr->element_size;
uint32_t initial_size = hdr->size;
#endif
assert(vector->size == initial_size + count);
unsigned char* dest = vector->data + (vector->element_size * initial_size);
uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count);
assert(dest);
/* Copy the objects in */
AV_MEMCPY4(dest, objs, vector->element_size * count);
AV_MEMCPY4(dest, objs, hdr->element_size * count);
assert(hdr->element_size == element_size);
assert(hdr->size == initial_size + count);
return dest;
}
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
return aligned_vector_resize(vector, vector->size + additional_count);
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) {
AlignedVectorHeader* hdr = &vector->hdr;
void* ret = aligned_vector_resize(vector, hdr->size + additional_count);
assert(ret); // Should always return something
return ret;
}
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
vector->size = 0;
AlignedVectorHeader* hdr = &vector->hdr;
hdr->size = 0;
}
void aligned_vector_shrink_to_fit(AlignedVector* vector);
void aligned_vector_cleanup(AlignedVector* vector);
static inline void* aligned_vector_back(AlignedVector* vector){
return aligned_vector_at(vector, vector->size - 1);
AV_FORCE_INLINE void* aligned_vector_back(AlignedVector* vector){
AlignedVectorHeader* hdr = &vector->hdr;
return aligned_vector_at(vector, hdr->size - 1);
}
#ifdef __cplusplus

View File

@ -1,18 +1,19 @@
#include <cstdio>
#include <stdbool.h>
#include <stdlib.h>
#include <time.h>
#include "GL/gl.h"
#include "GL/glu.h"
#ifdef __DREAMCAST__
#include <kos.h>
#include "GL/glext.h"
#include "GL/glkos.h"
float avgfps = -1;
#endif
#include "GL/gl.h"
#include "GL/glkos.h"
#include "GL/glu.h"
#include "GL/glext.h"
#define PI 3.14159265358979323846264338327950288f
#define RAD_TO_DEG 57.295779513082320876798154814105f
#define MAX_CUBES 350
@ -251,9 +252,7 @@ float rnd(float Min, float Max)
void initialize()
{
debugLog("Initialize video output");
#ifdef __DREAMCAST__
glKosInit();
#endif
glClearDepth(1.0);
glDepthFunc(GL_LEQUAL);

View File

@ -68,21 +68,16 @@ int check_start() {
}
void setup() {
//PVR needs to warm up for a frame, or results will be low
glKosInit();
GLdcConfig cfg;
glKosInitConfig(&cfg);
cfg.initial_immediate_capacity = 14000;
glKosInitEx(&cfg);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glOrtho(0, 640, 0, 480, -100, 100);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glDisable(GL_NEARZ_CLIPPING_KOS);
#ifdef __DREAMCAST__
pvr_wait_ready();
pvr_scene_begin();
pvr_scene_finish();
#endif
}
void do_frame() {
@ -116,6 +111,8 @@ time_t begin;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
fflush(stdout);
avgfps = -1;
polycnt = ppf;
}
@ -128,7 +125,6 @@ void check_switch() {
if(now >= (begin + 5)) {
begin = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
case PHASE_HALVE:
@ -169,19 +165,24 @@ void check_switch() {
case PHASE_FINAL:
break;
}
fflush(stdout);
}
}
#define PROFILE 0
int main(int argc, char **argv) {
#ifndef NDEBUG
#ifdef __DREAMCAST__
#if PROFILE
profiler_init("/pc/gmon.out");
profiler_start();
#endif
#endif
setup();
#if PROFILE
profiler_start();
#endif
/* Start off with something obscene */
switch_tests(200000 / 60);
begin = time(NULL);
@ -200,11 +201,9 @@ int main(int argc, char **argv) {
stats();
#ifdef __DREAMCAST__
#ifndef NDEBUG
#if PROFILE
profiler_stop();
profiler_clean_up();
#endif
#endif
return 0;

View File

@ -28,6 +28,8 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glEnable(GL_CULL_FACE);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */