From a5891056db642af1a4f1092da0534aeddb53ea51 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Tue, 16 May 2023 13:31:44 +0100 Subject: [PATCH 01/12] Many bug fixes and optimisations --- CMakeLists.txt | 22 +- GL/immediate.c | 49 ++-- GL/matrix.c | 34 +-- GL/platforms/sh4.c | 469 ++++++++++++++++----------------- GL/platforms/software.c | 559 ++++++++++++++++++++-------------------- GL/state.c | 3 +- GL/texture.c | 51 ++-- samples/cubes/main.cpp | 2 +- samples/nehe02/main.c | 4 +- samples/nehe10/main.c | 53 +++- tests/zclip/main.cpp | 2 +- 11 files changed, 650 insertions(+), 598 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f9aadb8..41a2b07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.9) project(GLdc) +set(CMAKE_VERBOSE_MAKEFILE ON) + # set the default backend if(PLATFORM_DREAMCAST) set(BACKEND "kospvr" CACHE STRING "Backend to use") @@ -8,6 +10,9 @@ else() set(BACKEND "software" CACHE STRING "Backend to use") endif() +include(CheckIPOSupported) +check_ipo_supported(RESULT FLTO_SUPPORTED OUTPUT FLTO_ERROR) + # List of possible backends set_property(CACHE BACKEND PROPERTY STRINGS kospvr software) @@ -49,10 +54,10 @@ else() set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math") endif() -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing") -set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions") +set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra") @@ -106,6 +111,10 @@ endif() add_library(GLdc STATIC ${SOURCES}) +if(FLTO_SUPPORTED) + set_property(TARGET GLdc PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +endif() + if(NOT PLATFORM_DREAMCAST) set_target_properties(GLdc PROPERTIES COMPILE_OPTIONS "-m32" @@ -129,6 +138,13 @@ function(gen_sample sample) add_executable(${sample} ${SAMPLE_SRCS}) + if(FLTO_SUPPORTED) + # FIXME: Cubes + LTO causes an ICE + if(NOT ${sample} MATCHES "cubes") + set_property(TARGET ${sample} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() + endif() + if(PLATFORM_DREAMCAST) if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk") message("Generating romdisk for sample: ${sample}") diff --git a/GL/immediate.c b/GL/immediate.c index 69dd7a4..afe80c5 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -17,10 +17,10 @@ extern inline GLuint _glRecalcFastPath(); GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; -static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f}; -static GLubyte COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */ -static GLfloat UV_COORD[2] = {0.0f, 0.0f}; -static GLfloat ST_COORD[2] = {0.0f, 0.0f}; +static GLfloat __attribute__((aligned(32))) NORMAL[3] = {0.0f, 0.0f, 1.0f}; +static GLubyte __attribute__((aligned(32))) COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */ +static GLfloat __attribute__((aligned(32))) UV_COORD[2] = {0.0f, 0.0f}; +static GLfloat __attribute__((aligned(32))) ST_COORD[2] = {0.0f, 0.0f}; static AlignedVector VERTICES; static AttribPointerList IM_ATTRIBS; @@ -30,7 +30,7 @@ static AttribPointerList IM_ATTRIBS; can be applied faster */ static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0; -typedef struct { +typedef struct __attribute__((aligned(32))) { GLfloat x; GLfloat y; GLfloat z; @@ -161,30 +161,27 @@ void APIENTRY glColor3fv(const GLfloat* v) { void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - uint32_t cap = aligned_vector_capacity(&VERTICES); IMVertex* vert = aligned_vector_extend(&VERTICES, 1); - if(cap != aligned_vector_capacity(&VERTICES)) { - /* Resizing could've invalidated the pointers */ - IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES); - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); - IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t); - } - vert->x = x; - vert->y = y; - vert->z = z; - vert->u = UV_COORD[0]; - vert->v = UV_COORD[1]; - vert->s = ST_COORD[0]; - vert->t = ST_COORD[1]; + /* Resizing could've invalidated the pointers */ + IM_ATTRIBS.vertex.ptr = VERTICES.data; + IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12; + IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8; + IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; + IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; - *((uint32_t*) vert->bgra) = *((uint32_t*) COLOR); - - vert->nx = NORMAL[0]; - vert->ny = NORMAL[1]; - vert->nz = NORMAL[2]; + uint32_t* dest = (uint32_t*) &vert->x; + *(dest++) = *((uint32_t*) &x); + *(dest++) = *((uint32_t*) &y); + *(dest++) = *((uint32_t*) &z); + *(dest++) = *((uint32_t*) &UV_COORD[0]); + *(dest++) = *((uint32_t*) &UV_COORD[1]); + *(dest++) = *((uint32_t*) &ST_COORD[0]); + *(dest++) = *((uint32_t*) &ST_COORD[1]); + *(dest++) = *((uint32_t*) COLOR); + *(dest++) = *((uint32_t*) &NORMAL[0]); + *(dest++) = *((uint32_t*) &NORMAL[1]); + *(dest++) = *((uint32_t*) &NORMAL[2]); } void APIENTRY glVertex3fv(const GLfloat* v) { diff --git a/GL/matrix.c b/GL/matrix.c index 73ca6fd..32afff4 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -13,8 +13,8 @@ GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2; GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2; -static Stack MATRIX_STACKS[3]; // modelview, projection, texture -static Matrix4x4 NORMAL_MATRIX __attribute__((aligned(32))); +static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture +static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX; Viewport VIEWPORT = { 0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f @@ -23,7 +23,7 @@ Viewport VIEWPORT = { static GLenum MATRIX_MODE = GL_MODELVIEW; static GLubyte MATRIX_IDX = 0; -static const Matrix4x4 IDENTITY = { +static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, @@ -106,7 +106,11 @@ void APIENTRY glMatrixMode(GLenum mode) { } void APIENTRY glPushMatrix() { - stack_push(MATRIX_STACKS + MATRIX_IDX, stack_top(MATRIX_STACKS + MATRIX_IDX)); + void* top = stack_top(MATRIX_STACKS + MATRIX_IDX); + assert(top); + void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top); + (void) ret; + assert(ret); } void APIENTRY glPopMatrix() { @@ -127,10 +131,16 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) { 0.0f, 0.0f, 1.0f, 0.0f, x, y, z, 1.0f }; + void* top = stack_top(MATRIX_STACKS + MATRIX_IDX); + assert(top); - UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); + UploadMatrix4x4(top); MultiplyMatrix4x4(&trn); - DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); + + top = stack_top(MATRIX_STACKS + MATRIX_IDX); + assert(top); + + DownloadMatrix4x4(top); if(MATRIX_MODE == GL_MODELVIEW) { recalculateNormalMatrix(); @@ -270,18 +280,10 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right, /* Multiply the current matrix by an arbitrary matrix */ void glMultMatrixf(const GLfloat *m) { Matrix4x4 TEMP __attribute__((aligned(32))); - const Matrix4x4 *pMatrix; - - if (((GLint)m)&0xf){ /* Unaligned matrix */ - pMatrix = &TEMP; - MEMCPY4(TEMP, m, sizeof(Matrix4x4)); - } - else{ - pMatrix = (const Matrix4x4*) m; - } + MEMCPY4(TEMP, m, sizeof(Matrix4x4)); UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); - MultiplyMatrix4x4(pMatrix); + MultiplyMatrix4x4(&TEMP); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); if(MATRIX_MODE == GL_MODELVIEW) { diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index addc6fe..a69f286 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -146,23 +146,22 @@ void SceneListSubmit(Vertex* v2, int n) { for(int i = 0; i < n; ++i, ++v2) { PREFETCH(v2 + 1); switch(v2->flags) { - case GPU_CMD_VERTEX_EOL: - if(counter < 2) { - continue; - } - - counter = 0; - break; - case GPU_CMD_VERTEX: - ++counter; - if(counter < 3) { - continue; - } - break; - default: - _glPushHeaderOrVertex(v2); - counter = 0; + case GPU_CMD_VERTEX_EOL: + if(counter < 2) { continue; + } + counter = 0; + break; + case GPU_CMD_VERTEX: + ++counter; + if(counter < 3) { + continue; + } + break; + default: + _glPushHeaderOrVertex(v2); + counter = 0; + continue; }; Vertex* const v0 = v2 - 2; @@ -176,252 +175,254 @@ void SceneListSubmit(Vertex* v2, int n) { ); switch(visible_mask) { - case 15: /* All visible, but final vertex in strip */ + case 15: /* All visible, but final vertex in strip */ + { + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v1, h); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2, h); + _glPushHeaderOrVertex(v2); + } + break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + break; + case 9: + /* First vertex was visible, last in strip */ { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(v0, h); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(v1, h); - _glPushHeaderOrVertex(v1); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(v2, h); - _glPushHeaderOrVertex(v2); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); } break; - case 7: - /* All visible, push the first vertex and move on */ + case 1: + /* First vertex was visible, but not last in strip */ + { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(v0, h); _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(b); + } break; - case 9: - /* First vertex was visible, last in strip */ - { - Vertex __attribute__((aligned(32))) scratch[2]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - } - break; - case 1: - /* First vertex was visible, but not last in strip */ - { - Vertex __attribute__((aligned(32))) scratch[2]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(b); - } - break; - case 10: - case 2: - /* Second vertex was visible. In self case we need to create a triangle and produce + case 10: + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ - { - Vertex __attribute__((aligned(32))) scratch[2]; - Vertex* a = &scratch[0]; - Vertex* c = &scratch[1]; + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - memcpy_vertex(c, v1); + memcpy_vertex(c, v1); - _glClipEdge(v0, c, a); - a->flags = GPU_CMD_VERTEX; + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); + _glClipEdge(v1, v2, b); + b->flags = v2->flags; - _glClipEdge(c, v2, a); - a->flags = v2->flags; + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + } + break; + case 11: + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glClipEdge(v1, v2, a); + a->flags = v2->flags; + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(a); + } + break; + case 12: + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + + _glClipEdge(v2, v0, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + if(counter % 2 == 1) { _glPushHeaderOrVertex(a); } + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + } break; - case 11: - case 3: /* First and second vertex were visible */ - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; + case 13: + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - memcpy_vertex(c, v1); + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; - _glClipEdge(v1, v2, a); - a->flags = v2->flags; + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(c); - _glPushHeaderOrVertex(a); - } + c->flags = GPU_CMD_VERTEX_EOL; + _glPushHeaderOrVertex(c); + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + } + break; + case 14: + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[4]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + Vertex* d = &scratch[3]; + + memcpy_vertex(c, v1); + memcpy_vertex(d, v2); + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(d, h); + _glPushHeaderOrVertex(d); + } + break; + case 8: + default: break; - case 12: - case 4: - /* Third vertex was visible. */ - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - - _glClipEdge(v2, v0, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - if(counter % 2 == 1) { - _glPushHeaderOrVertex(a); - } - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - } - break; - case 13: - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - c->flags = GPU_CMD_VERTEX; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - c->flags = GPU_CMD_VERTEX_EOL; - _glPushHeaderOrVertex(c); - } - break; - case 5: /* First and third vertex were visible */ - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - c->flags = GPU_CMD_VERTEX; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); - } - break; - case 14: - case 6: /* Second and third vertex were visible */ - { - Vertex __attribute__((aligned(32))) scratch[4]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - Vertex* d = &scratch[3]; - - memcpy_vertex(c, v1); - memcpy_vertex(d, v2); - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(d, h); - _glPushHeaderOrVertex(d); - } - break; - default: - break; } } diff --git a/GL/platforms/software.c b/GL/platforms/software.c index 6b5a9ee..9a27c46 100644 --- a/GL/platforms/software.c +++ b/GL/platforms/software.c @@ -162,7 +162,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { } } -GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { +GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) { #ifndef NDEBUG if(glIsVertex(v->flags)) { gl_assert(!isnan(v->xyz[2])); @@ -177,335 +177,329 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { BUFFER[vertex_counter++] = *v; } -static struct { - Vertex* v; - int visible; -} triangle[3]; +static inline void _glFlushBuffer() {} -static int tri_count = 0; -static int strip_count = 0; - -GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) { - const int MASK1 = 0x00FF00FF; - const int MASK2 = 0xFF00FF00; - - const int f2 = 256 * t; - const int f1 = 256 - f2; - - const uint32_t a = *(uint32_t*) v1; - const uint32_t b = *(uint32_t*) v2; - - *((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) | - (((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2); -} GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { - /* Clipping time! */ + const static float o = 0.003921569f; // 1 / 255 const float d0 = v1->w + v1->xyz[2]; const float d1 = v2->w + v2->xyz[2]; + const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f; + const float invt = 1.0f - t; - const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f; + vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0]; + vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1]; + vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2]; - float t = (d0 / (d0 - d1)) + epsilon; + vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0]; + vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1]; - t = (t > 1.0f) ? 1.0f : t; - t = (t < 0.0f) ? 0.0f : t; + vout->w = invt * v1->w + t * v2->w; - vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); - vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); - vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); - vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w); + const float m = 255 * t; + const float n = 255 - m; - vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); - vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); - - interpolateColour(v1->bgra, v2->bgra, t, vout->bgra); + vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o; + vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o; + vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o; + vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o; } -GL_FORCE_INLINE void ClearTriangle() { - tri_count = 0; -} - -GL_FORCE_INLINE void ShiftTriangle() { - if(!tri_count) { +void SceneListSubmit(Vertex* v2, int n) { + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) { return; } - tri_count--; - triangle[0] = triangle[1]; - triangle[1] = triangle[2]; - -#ifndef NDEBUG - triangle[2].v = NULL; - triangle[2].visible = false; -#endif -} - -GL_FORCE_INLINE void ShiftRotateTriangle() { - if(!tri_count) { - return; - } - - if(triangle[0].v < triangle[1].v) { - triangle[0] = triangle[2]; - } else { - triangle[1] = triangle[2]; - } - - tri_count--; -} - -void SceneListSubmit(Vertex* src, int n) { - /* Perform perspective divide on each vertex */ - Vertex* vertex = (Vertex*) src; - const float h = GetVideoMode()->height; - /* If Z-clipping is disabled, just fire everything over to the buffer */ - if(!ZNEAR_CLIPPING_ENABLED) { - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - if(glIsVertex(vertex->flags)) { - _glPerspectiveDivideVertex(vertex, h); - } - _glSubmitHeaderOrVertex(vertex); - } + uint8_t visible_mask = 0; + uint8_t counter = 0; - return; - } - - tri_count = 0; - strip_count = 0; - -#if CLIP_DEBUG - printf("----\n"); -#endif - - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - - bool is_last_in_strip = glIsLastVertex(vertex->flags); - - /* Wait until we fill the triangle */ - if(tri_count < 3) { - if(glIsVertex(vertex->flags)) { - triangle[tri_count].v = vertex; - triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w; - tri_count++; - strip_count++; - } else { - /* We hit a header */ - tri_count = 0; - strip_count = 0; - _glSubmitHeaderOrVertex(vertex); - } - - if(tri_count < 3) { + for(int i = 0; i < n; ++i, ++v2) { + PREFETCH(v2 + 1); + switch(v2->flags) { + case GPU_CMD_VERTEX_EOL: + if(counter < 2) { + continue; + } + counter = 0; + break; + case GPU_CMD_VERTEX: + ++counter; + if(counter < 3) { + continue; + } + break; + default: + _glPushHeaderOrVertex(v2); + counter = 0; continue; - } + }; + + Vertex* const v0 = v2 - 2; + Vertex* const v1 = v2 - 1; + + visible_mask = ( + (v0->xyz[2] > -v0->w) << 0 | + (v1->xyz[2] > -v1->w) << 1 | + (v2->xyz[2] > -v2->w) << 2 | + (counter == 0) << 3 + ); + + switch(visible_mask) { + case 15: /* All visible, but final vertex in strip */ + { + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v1, h); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2, h); + _glPushHeaderOrVertex(v2); } + break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + break; + case 9: + /* First vertex was visible, last in strip */ + { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; -#if CLIP_DEBUG - printf("SC: %d\n", strip_count); -#endif + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - /* If we got here, then triangle contains 3 vertices */ - int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); - if(visible_mask == 7) { -#if CLIP_DEBUG - printf("Visible\n"); -#endif - /* All the vertices are visible! We divide and submit v0, then shift */ - _glPerspectiveDivideVertex(vertex - 2, h); - _glSubmitHeaderOrVertex(vertex - 2); + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX_EOL; - if(is_last_in_strip) { - _glPerspectiveDivideVertex(vertex - 1, h); - _glSubmitHeaderOrVertex(vertex - 1); - _glPerspectiveDivideVertex(vertex, h); - _glSubmitHeaderOrVertex(vertex); - tri_count = 0; - strip_count = 0; + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + } + break; + case 1: + /* First vertex was visible, but not last in strip */ + { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(b); + } + break; + case 10: + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = v2->flags; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + } + break; + case 11: + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glClipEdge(v1, v2, a); + a->flags = v2->flags; + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(a); + } + break; + case 12: + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + + _glClipEdge(v2, v0, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + if(counter % 2 == 1) { + _glPushHeaderOrVertex(a); } - ShiftRotateTriangle(); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); - } else if(visible_mask) { - /* Clipping time! + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + } + break; + case 13: + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - There are 6 distinct possibilities when clipping a triangle. 3 of them result - in another triangle, 3 of them result in a quadrilateral. + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; - Assuming you iterate the edges of the triangle in order, and create a new *visible* - vertex when you cross the plane, and discard vertices behind the plane, then the only - difference between the two cases is that the final two vertices that need submitting have - to be reversed. + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may - be used in a subsequent triangle in the strip and would end up being double divided. - */ -#if CLIP_DEBUG - printf("Clip: %d, SC: %d\n", visible_mask, strip_count); - printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1); -#endif - Vertex tmp; - if(strip_count > 3) { -#if CLIP_DEBUG - printf("Flush\n"); -#endif - tmp = *(vertex - 2); - /* If we had triangles ahead of this one, submit and finalize */ - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; - tmp = *(vertex - 1); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); - switch(visible_mask) { - case 1: { - /* 0, 0a, 2a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 2: { - /* 0a, 1, 1a */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + c->flags = GPU_CMD_VERTEX_EOL; + _glPushHeaderOrVertex(c); + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 3: { - /* 0, 1, 2a, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 4: { - /* 1a, 2, 2a */ - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + } + break; + case 14: + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[4]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + Vertex* d = &scratch[3]; - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 5: { - /* 0, 0a, 2, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + memcpy_vertex(c, v1); + memcpy_vertex(d, v2); - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 6: { - /* 0a, 1, 2a, 2 */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - default: - break; - } - - /* If this was the last in the strip, we don't need to - submit anything else, we just wipe the tri_count */ - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } else { - ShiftRotateTriangle(); - strip_count = 2; - } - } else { - /* Invisible? Move to the next in the strip */ - - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } - strip_count = 2; - ShiftRotateTriangle(); + _glPerspectiveDivideVertex(d, h); + _glPushHeaderOrVertex(d); + } + break; + case 8: + default: + break; } } + + _glFlushBuffer(); } void SceneListFinish() { @@ -548,7 +542,6 @@ void SceneListFinish() { void SceneFinish() { SDL_RenderPresent(RENDERER); - return; /* Only sensible place to hook the quit signal */ SDL_Event e; while (SDL_PollEvent(&e)) { diff --git a/GL/state.c b/GL/state.c index e1bfe14..2a16367 100644 --- a/GL/state.c +++ b/GL/state.c @@ -255,7 +255,8 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) { context->txr2.enable = GPU_TEXTURE_DISABLE; context->txr2.alpha = GPU_TXRALPHA_DISABLE; - if(!TEXTURES_ENABLED[textureUnit] || !tx1) { + if(!TEXTURES_ENABLED[textureUnit] || !tx1 || !tx1->data) { + context->txr.base = NULL; return; } diff --git a/GL/texture.c b/GL/texture.c index 016be45..3d2dd77 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -39,6 +39,7 @@ static void* yalloc_alloc_and_defrag(size_t size) { if(!ret) { /* Tried to allocate, but out of room, let's try defragging * and repeating the alloc */ + fprintf(stderr, "Ran out of memory, defragmenting\n"); glDefragmentTextureMemory_KOS(); ret = yalloc_alloc(YALLOC_BASE, size); } @@ -537,6 +538,7 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { GLuint id = 0; TextureObject* txr = (TextureObject*) named_array_alloc(&TEXTURE_OBJECTS, &id); + gl_assert(txr); gl_assert(id); // Generated IDs must never be zero _glInitializeTextureObject(txr, id); @@ -553,31 +555,32 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { while(n--) { TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, *textures); - /* Make sure we update framebuffer objects that have this texture attached */ - _glWipeTextureOnFramebuffers(*textures); + if(txr) { + /* Make sure we update framebuffer objects that have this texture attached */ + _glWipeTextureOnFramebuffers(*textures); - if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) { - TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; - } - - if(txr->data) { - yalloc_free(YALLOC_BASE, txr->data); - txr->data = NULL; - } - - if(txr->palette && txr->palette->data) { - - if (txr->palette->bank > -1) { - _glReleasePaletteSlot(txr->palette->bank, txr->palette->size); - txr->palette->bank = -1; + if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) { + TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; } - free(txr->palette->data); - txr->palette->data = NULL; - } - if(txr->palette) { - free(txr->palette); - txr->palette = NULL; + if(txr->data) { + yalloc_free(YALLOC_BASE, txr->data); + txr->data = NULL; + } + + if(txr->palette && txr->palette->data) { + if (txr->palette->bank > -1) { + _glReleasePaletteSlot(txr->palette->bank, txr->palette->size); + txr->palette->bank = -1; + } + free(txr->palette->data); + txr->palette->data = NULL; + } + + if(txr->palette) { + free(txr->palette); + txr->palette = NULL; + } } named_array_release(&TEXTURE_OBJECTS, *textures); @@ -820,6 +823,8 @@ void APIENTRY glCompressedTexImage2DARB(GLenum target, if(data) { FASTCPY(active->data, data, imageSize); } + + _glGPUStateMarkDirty(); } static GLint _cleanInternalFormat(GLint internalFormat) { @@ -1555,6 +1560,8 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, free(conversionBuffer); conversionBuffer = NULL; } + + _glGPUStateMarkDirty(); } void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) { diff --git a/samples/cubes/main.cpp b/samples/cubes/main.cpp index 59a9f1b..1599466 100644 --- a/samples/cubes/main.cpp +++ b/samples/cubes/main.cpp @@ -328,7 +328,7 @@ void updateLogic() glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal); // Apply cube transformation (identity matrix) - glMultMatrixf(cubeTransformationMatrix); + glLoadIdentity(); updateCubes(dt); diff --git a/samples/nehe02/main.c b/samples/nehe02/main.c index 1b2e3e0..7775721 100644 --- a/samples/nehe02/main.c +++ b/samples/nehe02/main.c @@ -9,7 +9,7 @@ /* A general OpenGL initialization function. Sets all of the initial parameters. */ void InitGL(int Width, int Height) // We call this right after our OpenGL window is created. { - glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black + glClearColor(0.0f, 0.0f, 1.0f, 0.0f); // This Will Clear The Background Color To Black glClearDepth(1.0); // Enables Clearing Of The Depth Buffer glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do glEnable(GL_DEPTH_TEST); // Enables Depth Testing @@ -20,7 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window - glMatrixMode(GL_MODELVIEW); + glMatrixMode(GL_MODELVIEW); } /* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */ diff --git a/samples/nehe10/main.c b/samples/nehe10/main.c index faa3bc0..aa6c87f 100644 --- a/samples/nehe10/main.c +++ b/samples/nehe10/main.c @@ -10,6 +10,8 @@ #ifdef __DREAMCAST__ #include +#else +#include #endif #include @@ -17,7 +19,9 @@ #include #include +#include #include +#include #include "../loadbmp.h" @@ -84,7 +88,16 @@ void SetupWorld() int numtriangles; FILE *filein; char oneline[255]; +#ifdef __DREAMCAST__ filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From +#else + filein = fopen("../samples/nehe10/romdisk/world.txt", "rt"); +#endif + + if(!filein) { + fprintf(stderr, "Failed to load world file\n"); + exit(1); + } readstr(filein,oneline); sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles); @@ -228,6 +241,13 @@ void DrawGLScene(void) { } int ReadController(void) { + bool start = false; + bool up = false; + bool down = false; + bool left = false; + bool right = false; + + #ifdef __DREAMCAST__ maple_device_t *cont; cont_state_t *state; @@ -241,10 +261,27 @@ int ReadController(void) { return 0; } - if(state->buttons & CONT_START) - return 0; + start = (state->buttons & CONT_START); + up = (state->buttons & CONT_DPAD_UP); + down = (state->buttons & CONT_DPAD_DOWN); + left = (state->buttons & CONT_DPAD_LEFT); + right = (state->buttons & CONT_DPAD_RIGHT); - if(state->buttons & CONT_DPAD_UP) { +#else + int num_keys = 0; + uint8_t* state = SDL_GetKeyboardState(&num_keys); + start = state[SDL_SCANCODE_RETURN]; + up = state[SDL_SCANCODE_UP]; + down = state[SDL_SCANCODE_DOWN]; + left = state[SDL_SCANCODE_LEFT]; + right = state[SDL_SCANCODE_RIGHT]; +#endif + + if(start) { + return 0; + } + + if(up) { xpos -= (float)sin(heading*piover180) * 0.05f; zpos -= (float)cos(heading*piover180) * 0.05f; if (walkbiasangle >= 359.0f) @@ -258,8 +295,7 @@ int ReadController(void) { walkbias = (float)sin(walkbiasangle * piover180)/20.0f; } - - if(state->buttons & CONT_DPAD_DOWN) { + if(down) { xpos += (float)sin(heading*piover180) * 0.05f; zpos += (float)cos(heading*piover180) * 0.05f; if (walkbiasangle <= 1.0f) @@ -273,18 +309,17 @@ int ReadController(void) { walkbias = (float)sin(walkbiasangle * piover180)/20.0f; } - - if(state->buttons & CONT_DPAD_LEFT) { + if(left) { heading += 1.0f; yrot = heading; } - if(state->buttons & CONT_DPAD_RIGHT) { + if(right) { heading -= 1.0f; yrot = heading; } -#endif + /* Switch to the blended polygon list if needed */ if(blend) { diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index 7b7e316..adada72 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -177,7 +177,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &queue[queue_tail]); break; case GPU_CMD_VERTEX_EOL: - last_vertex = true; + last_vertex = true; // fallthru case GPU_CMD_VERTEX: visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2; assert(visible_mask < 15); From 52a0215ed8664c3cc941e247b6bafd63d754d8f1 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 17 May 2023 20:36:59 +0100 Subject: [PATCH 02/12] Make sure we initialize texture 0. We don't actually use it yet (binding zero disables texturing) but I believe the spec says that texture 0 is the "default texture" and is an actual texture object. --- GL/texture.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index 3d2dd77..68f40ae 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -451,6 +451,29 @@ void _glResetSharedPalettes() memset((void*) SUBBANKS_USED, 0x0, sizeof(SUBBANKS_USED)); } + +static void _glInitializeTextureObject(TextureObject* txr, unsigned int id) { + txr->index = id; + txr->width = txr->height = 0; + txr->mipmap = 0; + txr->uv_clamp = 0; + txr->env = GPU_TXRENV_MODULATEALPHA; + txr->data = NULL; + txr->mipmapCount = 0; + txr->minFilter = GL_NEAREST; + txr->magFilter = GL_NEAREST; + txr->palette = NULL; + txr->isCompressed = GL_FALSE; + txr->isPaletted = GL_FALSE; + txr->mipmap_bias = GL_KOS_INTERNAL_DEFAULT_MIPMAP_LOD_BIAS; + + /* Not mipmapped by default */ + txr->baseDataOffset = 0; + + /* Always default to the first shared bank */ + txr->shared_bank = 0; +} + GLubyte _glInitTextures() { uint32_t i; @@ -460,6 +483,9 @@ GLubyte _glInitTextures() { // Reserve zero so that it is never given to anyone as an ID! named_array_reserve(&TEXTURE_OBJECTS, 0); + // Initialize zero as an actual texture object though because apparently it is! + _glInitializeTextureObject((TextureObject*) named_array_get(&TEXTURE_OBJECTS, 0), 0); + for (i=0; i < MAX_GLDC_SHARED_PALETTES;i++){ SHARED_PALETTES[i] = _initTexturePalette(); } @@ -509,28 +535,6 @@ GLboolean APIENTRY glIsTexture(GLuint texture) { return (named_array_used(&TEXTURE_OBJECTS, texture)) ? GL_TRUE : GL_FALSE; } -static void _glInitializeTextureObject(TextureObject* txr, unsigned int id) { - txr->index = id; - txr->width = txr->height = 0; - txr->mipmap = 0; - txr->uv_clamp = 0; - txr->env = GPU_TXRENV_MODULATEALPHA; - txr->data = NULL; - txr->mipmapCount = 0; - txr->minFilter = GL_NEAREST; - txr->magFilter = GL_NEAREST; - txr->palette = NULL; - txr->isCompressed = GL_FALSE; - txr->isPaletted = GL_FALSE; - txr->mipmap_bias = GL_KOS_INTERNAL_DEFAULT_MIPMAP_LOD_BIAS; - - /* Not mipmapped by default */ - txr->baseDataOffset = 0; - - /* Always default to the first shared bank */ - txr->shared_bank = 0; -} - void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { TRACE(); From 9037d157d53e986bc7d84a69b7ac5c8f6fdbe4d0 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 17 May 2023 20:38:21 +0100 Subject: [PATCH 03/12] Clean up --- GL/texture.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index 68f40ae..8b92d96 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -1929,8 +1929,9 @@ GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void) { /* Replace all texture pointers */ for(id = 0; id < MAX_TEXTURE_COUNT; id++){ - if(glIsTexture(id)){ - TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, id); + TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, id); + if(txr){ + gl_assert(txr->index == id); txr->data = yalloc_defrag_address(YALLOC_BASE, txr->data); } } From c4c0bf4239d7fc1de8e2f6078be2a33777014179 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 17 May 2023 20:39:27 +0100 Subject: [PATCH 04/12] Fix an off-by-one error --- GL/texture.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/GL/texture.c b/GL/texture.c index 8b92d96..3720045 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -523,12 +523,15 @@ TextureObject* _glGetBoundTexture() { void APIENTRY glActiveTextureARB(GLenum texture) { TRACE(); - if(texture < GL_TEXTURE0_ARB || texture > GL_TEXTURE0_ARB + MAX_GLDC_TEXTURE_UNITS) { + if(texture < GL_TEXTURE0_ARB || texture >= GL_TEXTURE0_ARB + MAX_GLDC_TEXTURE_UNITS) { _glKosThrowError(GL_INVALID_ENUM, "glActiveTextureARB"); return; } ACTIVE_TEXTURE = texture & 0xF; + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); + + gl_assert(TEXTURE_OBJECTS.element_size > 0); } GLboolean APIENTRY glIsTexture(GLuint texture) { From 462eb40d7acd12ac0492a4588319f34281a9be27 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 17 May 2023 20:39:49 +0100 Subject: [PATCH 05/12] Fix bugs in texture deletion --- GL/texture.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index 3720045..ef18722 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -559,15 +559,27 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { TRACE(); - while(n--) { - TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, *textures); + gl_assert(TEXTURE_OBJECTS.element_size > 0); + + for(GLsizei i = 0; i < n; ++i) { + GLuint id = textures[i]; + if(id == 0) { + /* Zero is the "default texture" and we never allow deletion of it */ + continue; + } + + TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, id); if(txr) { - /* Make sure we update framebuffer objects that have this texture attached */ - _glWipeTextureOnFramebuffers(*textures); + gl_assert(txr->index == id); - if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) { - TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; + /* Make sure we update framebuffer objects that have this texture attached */ + _glWipeTextureOnFramebuffers(id); + + for(GLuint j = 0; j < MAX_GLDC_TEXTURE_UNITS; ++j) { + if(txr == TEXTURE_UNITS[j]) { + TEXTURE_UNITS[j] = NULL; + } } if(txr->data) { @@ -588,12 +600,14 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { free(txr->palette); txr->palette = NULL; } - } - named_array_release(&TEXTURE_OBJECTS, *textures); - *textures = 0; - textures++; + named_array_release(&TEXTURE_OBJECTS, id); + textures[i] = 0; + txr->index = 0; + } } + + gl_assert(TEXTURE_OBJECTS.element_size > 0); } void APIENTRY glBindTexture(GLenum target, GLuint texture) { From d81472ef5788995958fdc24250b59f1e4f7a104f Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Wed, 17 May 2023 20:39:58 +0100 Subject: [PATCH 06/12] Liberally assert stuff --- GL/texture.c | 63 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/GL/texture.c b/GL/texture.c index ef18722..b3126f6 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -505,6 +505,8 @@ GLubyte _glInitTextures() { #endif yalloc_init(YALLOC_BASE, YALLOC_SIZE); + + gl_assert(TEXTURE_OBJECTS.element_size > 0); return 1; } @@ -513,10 +515,12 @@ TextureObject* _glGetTexture0() { } TextureObject* _glGetTexture1() { + gl_assert(1 < MAX_GLDC_TEXTURE_UNITS); return TEXTURE_UNITS[1]; } TextureObject* _glGetBoundTexture() { + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); return TEXTURE_UNITS[ACTIVE_TEXTURE]; } @@ -541,6 +545,8 @@ GLboolean APIENTRY glIsTexture(GLuint texture) { void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { TRACE(); + gl_assert(TEXTURE_OBJECTS.element_size > 0); + while(n--) { GLuint id = 0; TextureObject* txr = (TextureObject*) named_array_alloc(&TEXTURE_OBJECTS, &id); @@ -550,10 +556,14 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { _glInitializeTextureObject(txr, id); - *textures = id; + gl_assert(txr->index == id); + + *textures = id; textures++; } + + gl_assert(TEXTURE_OBJECTS.element_size > 0); } void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { @@ -619,19 +629,21 @@ void APIENTRY glBindTexture(GLenum target, GLuint texture) { return; } - if(texture) { - /* If this didn't come from glGenTextures, then we should initialize the - * texture the first time it's bound */ - if(!named_array_used(&TEXTURE_OBJECTS, texture)) { - TextureObject* txr = named_array_reserve(&TEXTURE_OBJECTS, texture); - _glInitializeTextureObject(txr, texture); - } + TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, texture); - TEXTURE_UNITS[ACTIVE_TEXTURE] = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, texture); - } else { - TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; + /* If this didn't come from glGenTextures, then we should initialize the + * texture the first time it's bound */ + if(!txr) { + TextureObject* txr = named_array_reserve(&TEXTURE_OBJECTS, texture); + _glInitializeTextureObject(txr, texture); } + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); + TEXTURE_UNITS[ACTIVE_TEXTURE] = txr; + gl_assert(TEXTURE_UNITS[ACTIVE_TEXTURE]->index == texture); + + gl_assert(TEXTURE_OBJECTS.element_size > 0); + _glGPUStateMarkDirty(); } @@ -643,9 +655,11 @@ void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) { GLint target_values [] = {GL_TEXTURE_ENV, GL_TEXTURE_FILTER_CONTROL_EXT, 0}; failures += _glCheckValidEnum(target, target_values, __func__); + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE]; if(!active) { + _glKosThrowError(GL_INVALID_OPERATION, __func__); return; } @@ -809,13 +823,15 @@ void APIENTRY glCompressedTexImage2DARB(GLenum target, } } - if(TEXTURE_UNITS[ACTIVE_TEXTURE] == NULL) { + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); + TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE]; + GLuint original_id = active->index; + + if(!active) { _glKosThrowError(GL_INVALID_OPERATION, __func__); return; } - TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE]; - /* Set the required mipmap count */ active->width = width; active->height = height; @@ -845,6 +861,8 @@ void APIENTRY glCompressedTexImage2DARB(GLenum target, FASTCPY(active->data, data, imageSize); } + gl_assert(original_id == active->index); + _glGPUStateMarkDirty(); } @@ -1345,6 +1363,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, } else { /* Mipmap Errors, kos crashes if 1x1 */ if((h < 2) || (w < 2)){ + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); gl_assert(TEXTURE_UNITS[ACTIVE_TEXTURE]); TEXTURE_UNITS[ACTIVE_TEXTURE]->mipmap |= (1 << level); return; @@ -1370,21 +1389,23 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, return; } - if(!TEXTURE_UNITS[ACTIVE_TEXTURE]) { + gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); + TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE]; + + if(!active) { INFO_MSG("Called glTexImage2D on unbound texture"); _glKosThrowError(GL_INVALID_OPERATION, __func__); return; } + gl_assert(active); + GLuint original_id = active->index; + GLboolean isPaletted = (internalFormat == GL_COLOR_INDEX8_EXT || internalFormat == GL_COLOR_INDEX4_EXT) ? GL_TRUE : GL_FALSE; /* Calculate the format that we need to convert the data to */ GLuint pvr_format = _determinePVRFormat(internalFormat, type); - TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE]; - - gl_assert(active); - if(active->data && (level == 0)) { /* pre-existing texture - check if changed */ if(active->width != width || @@ -1453,6 +1474,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, /* If we run out of PVR memory just return */ if(!active->data) { _glKosThrowError(GL_OUT_OF_MEMORY, __func__); + gl_assert(active->index == original_id); return; } @@ -1500,6 +1522,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, if(!data) { /* No data? Do nothing! */ + gl_assert(active->index == original_id); return; } else if(!needsConversion && !needsTwiddling) { gl_assert(targetData); @@ -1508,6 +1531,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, /* No conversion? Just copy the data, and the pvr_format is correct */ FASTCPY(targetData, data, bytes); + gl_assert(active->index == original_id); return; } else if(needsConversion) { TextureConversionFunc convert = _determineConversion( @@ -1582,6 +1606,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, conversionBuffer = NULL; } + gl_assert(active->index == original_id); _glGPUStateMarkDirty(); } From e57b5033559e51d8931daea5fb9fbeb027d8261f Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Thu, 18 May 2023 16:44:11 +0100 Subject: [PATCH 07/12] Fix memory errors --- containers/aligned_vector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index 15729ae..f75459a 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -23,6 +23,7 @@ void aligned_vector_init(AlignedVector* vector, uint32_t element_size) { hdr->size = 0; hdr->capacity = ALIGNED_VECTOR_CHUNK_SIZE; hdr->element_size = element_size; + vector->data = NULL; /* Reserve some initial capacity. This will do the allocation but not set up the header */ void* ptr = aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE); @@ -37,7 +38,7 @@ void aligned_vector_shrink_to_fit(AlignedVector* vector) { free(vector->data); /* Reallocate the header */ - vector->data = memalign(0x20, sizeof(AlignedVectorHeader)); + vector->data = NULL; hdr->size = hdr->capacity = 0; hdr->element_size = element_size; } else { From bd47f333d6557ebd34d72127718da0f2ddc583c2 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 20 May 2023 07:43:57 +0100 Subject: [PATCH 08/12] Add more assertions --- containers/aligned_vector.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/containers/aligned_vector.h b/containers/aligned_vector.h index 5109b8b..d700b86 100644 --- a/containers/aligned_vector.h +++ b/containers/aligned_vector.h @@ -135,6 +135,14 @@ AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) { return vector->data; } +#define av_assert(x) \ + do {\ + if(!(x)) {\ + fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\ + exit(1);\ + }\ + } while(0); \ + /* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */ AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) { void* ret = NULL; @@ -149,17 +157,17 @@ AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_ ret = aligned_vector_at(vector, previous_count); - assert(hdr->size == element_count); - assert(hdr->size <= hdr->capacity); + av_assert(hdr->size == element_count); + av_assert(hdr->size <= hdr->capacity); } else if(previous_count < element_count) { /* So we grew, but had the capacity, just get a pointer to * where we were */ hdr->size = element_count; - assert(hdr->size < hdr->capacity); + av_assert(hdr->size < hdr->capacity); ret = aligned_vector_at(vector, previous_count); } else if(hdr->size != element_count) { hdr->size = element_count; - assert(hdr->size < hdr->capacity); + av_assert(hdr->size < hdr->capacity); } return ret; @@ -206,7 +214,7 @@ void aligned_vector_cleanup(AlignedVector* vector); AV_FORCE_INLINE void* aligned_vector_back(AlignedVector* vector){ AlignedVectorHeader* hdr = &vector->hdr; - return aligned_vector_at(vector, hdr->size - 1); + return aligned_vector_at(vector, hdr->size ? hdr->size - 1 : 0); } #ifdef __cplusplus From 1e3896e699ab37b5b7b9a3dd74fa133cfb6d72a2 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 20 May 2023 07:44:55 +0100 Subject: [PATCH 09/12] Clean up --- GL/draw.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 3548bd1..3c722e3 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1170,6 +1170,7 @@ void _glInitSubmissionTarget() { GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) { + SubmissionTarget* const target = &SUBMISSION_TARGET; AlignedVector* const extras = target->extras; @@ -1214,12 +1215,19 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL gl_assert(mode != GL_POLYGON); target->output = _glActivePolyList(); - GLboolean header_required = (aligned_vector_header(&target->output->vector)->size == 0) || _glGPUStateIsDirty(); + gl_assert(target->output); + gl_assert(extras); + + uint32_t vector_size = aligned_vector_size(&target->output->vector); + + GLboolean header_required = (vector_size == 0) || _glGPUStateIsDirty(); target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; - target->header_offset = aligned_vector_header(&target->output->vector)->size; - target->start_offset = target->header_offset + (header_required); + target->header_offset = vector_size; + target->start_offset = target->header_offset + (header_required ? 1 : 0); + gl_assert(target->header_offset >= 0); + gl_assert(target->start_offset >= target->header_offset); gl_assert(target->count); /* Make sure we have enough room for all the "extra" data */ From 5865d573841dae6627ef90f01a8c9b9b6fe8141c Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 20 May 2023 07:45:16 +0100 Subject: [PATCH 10/12] Wait for the store queues to finish when we've uploaded everything --- GL/platforms/sh4.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index a69f286..e375772 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -47,6 +47,8 @@ GL_FORCE_INLINE float _glFastInvert(float x) { } GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { + TRACE(); + const float f = _glFastInvert(vertex->w); /* Convert to NDC and apply viewport */ @@ -68,8 +70,17 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { volatile uint32_t *sq = SQ_BASE_ADDRESS; -static inline void _glFlushBuffer() {} +static inline void _glFlushBuffer() { + TRACE(); + + /* Wait for both store queues to complete */ + sq = (uint32_t*) 0xe0000000; + sq[0] = sq[8] = 0; +} + static inline void _glPushHeaderOrVertex(Vertex* v) { + TRACE(); + uint32_t* s = (uint32_t*) v; sq[0] = *(s++); sq[1] = *(s++); @@ -114,6 +125,8 @@ static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888; static volatile uint32_t *QACR = (uint32_t*) 0xFF000038; void SceneListSubmit(Vertex* v2, int n) { + TRACE(); + /* You need at least a header, and 3 vertices to render anything */ if(n < 4) { return; From f6713bc7787c29799b90463e9d58091eed6b166b Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 20 May 2023 07:45:45 +0100 Subject: [PATCH 11/12] Speed up the software renderer --- GL/platforms/software.c | 104 +++++++++++----------------------------- 1 file changed, 28 insertions(+), 76 deletions(-) diff --git a/GL/platforms/software.c b/GL/platforms/software.c index 9a27c46..59d3859 100644 --- a/GL/platforms/software.c +++ b/GL/platforms/software.c @@ -30,81 +30,7 @@ static VideoMode vid_mode = { #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MAX(x, y) ((x) > (y) ? (x) : (y)) -static void DrawTriangle(Vertex* v0, Vertex* v1, Vertex* v2) { - // Compute triangle bounding box. - - int minX = MIN(MIN(v0->xyz[0], v1->xyz[0]), v2->xyz[0]); - int maxX = MAX(MAX(v0->xyz[0], v1->xyz[0]), v2->xyz[0]); - int minY = MIN(MIN(v0->xyz[1], v1->xyz[1]), v2->xyz[1]); - int maxY = MAX(MAX(v0->xyz[1], v1->xyz[1]), v2->xyz[1]); - - // Clip to scissor rect. - - minX = MAX(minX, 0); - maxX = MIN(maxX, vid_mode.width); - minY = MAX(minY, 0); - maxY = MIN(maxY, vid_mode.height); - - // Compute edge equations. - - EdgeEquation e0, e1, e2; - EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); - EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); - EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); - - float area = 0.5 * (e0.c + e1.c + e2.c); - - /* This is very ugly. I don't understand the math properly - * so I just swap the vertex order if something is back-facing - * and we want to render it. Patches welcome! */ -#define REVERSE_WINDING() \ - Vertex* tv = v0; \ - v0 = v1; \ - v1 = tv; \ - EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); \ - EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); \ - EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); \ - area = 0.5f * (e0.c + e1.c + e2.c) \ - - // Check if triangle is backfacing. - if(CULL_MODE == GPU_CULLING_CCW) { - if(area < 0) { - return; - } - } else if(CULL_MODE == GPU_CULLING_CW) { - if(area < 0) { - // We only draw front-facing polygons, so swap - // the back to front and draw - REVERSE_WINDING(); - } else { - // Front facing, so bail - return; - } - } else if(area < 0) { - /* We're not culling, but this is backfacing, so swap vertices and edges */ - REVERSE_WINDING(); - } - - ParameterEquation r, g, b; - - ParameterEquationInit(&r, v0->bgra[2], v1->bgra[2], v2->bgra[2], &e0, &e1, &e2, area); - ParameterEquationInit(&g, v0->bgra[1], v1->bgra[1], v2->bgra[1], &e0, &e1, &e2, area); - ParameterEquationInit(&b, v0->bgra[0], v1->bgra[0], v2->bgra[0], &e0, &e1, &e2, area); - - // Add 0.5 to sample at pixel centers. - for (float x = minX + 0.5f, xm = maxX + 0.5f; x <= xm; x += 1.0f) - for (float y = minY + 0.5f, ym = maxY + 0.5f; y <= ym; y += 1.0f) - { - if (EdgeEquationTestPoint(&e0, x, y) && EdgeEquationTestPoint(&e1, x, y) && EdgeEquationTestPoint(&e2, x, y)) { - int rint = ParameterEquationEvaluate(&r, x, y); - int gint = ParameterEquationEvaluate(&g, x, y); - int bint = ParameterEquationEvaluate(&b, x, y); - SDL_SetRenderDrawColor(RENDERER, rint, gint, bint, 255); - SDL_RenderDrawPoint(RENDERER, x, y); - } - } -} - +AlignedVector vbuffer; void InitGPU(_Bool autosort, _Bool fsaa) { SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS); @@ -120,6 +46,8 @@ void InitGPU(_Bool autosort, _Bool fsaa) { RENDERER = SDL_CreateRenderer( WINDOW, -1, SDL_RENDERER_ACCELERATED ); + + aligned_vector_init(&vbuffer, sizeof(SDL_Vertex)); } void SceneBegin() { @@ -531,13 +459,37 @@ void SceneListFinish() { Vertex* v0 = (Vertex*) (flags - step - step); Vertex* v1 = (Vertex*) (flags - step); Vertex* v2 = (Vertex*) (flags); - (vidx % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2); + + SDL_Vertex sv0 = { + {v0->xyz[0], v0->xyz[1]}, + {v0->bgra[2], v0->bgra[1], v0->bgra[0], v0->bgra[3]}, + {v0->uv[0], v0->uv[1]} + }; + + SDL_Vertex sv1 = { + {v1->xyz[0], v1->xyz[1]}, + {v1->bgra[2], v1->bgra[1], v1->bgra[0], v1->bgra[3]}, + {v1->uv[0], v1->uv[1]} + }; + + SDL_Vertex sv2 = { + {v2->xyz[0], v2->xyz[1]}, + {v2->bgra[2], v2->bgra[1], v2->bgra[0], v2->bgra[3]}, + {v2->uv[0], v2->uv[1]} + }; + + aligned_vector_push_back(&vbuffer, &sv0, 1); + aligned_vector_push_back(&vbuffer, &sv1, 1); + aligned_vector_push_back(&vbuffer, &sv2, 1); } if((*flags) == GPU_CMD_VERTEX_EOL) { vidx = 0; } } + + SDL_SetRenderDrawColor(RENDERER, 255, 255, 255, 255); + SDL_RenderGeometry(RENDERER, NULL, aligned_vector_front(&vbuffer), aligned_vector_size(&vbuffer), NULL, 0); } void SceneFinish() { From 026bdeff098ac1018b84864718c1c340ddd70356 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sat, 20 May 2023 07:47:39 +0100 Subject: [PATCH 12/12] Fix infuriating memory corruption bug --- GL/private.h | 3 ++- GL/texture.c | 32 +++++++++++++++++--------------- containers/named_array.c | 1 - 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/GL/private.h b/GL/private.h index f945ccb..b0fe41a 100644 --- a/GL/private.h +++ b/GL/private.h @@ -164,7 +164,8 @@ typedef struct { GLboolean isCompressed; GLboolean isPaletted; //50 -} TextureObject; + GLubyte padding[14]; // Pad to 64-bytes +} __attribute__((aligned(32))) TextureObject; typedef struct { GLfloat emissive[4]; diff --git a/GL/texture.c b/GL/texture.c index b3126f6..e18c2e4 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -21,7 +21,7 @@ static TextureObject* TEXTURE_UNITS[MAX_GLDC_TEXTURE_UNITS] = {NULL, NULL}; static NamedArray TEXTURE_OBJECTS; GLubyte ACTIVE_TEXTURE = 0; -static TexturePalette* SHARED_PALETTES[MAX_GLDC_SHARED_PALETTES]; +static TexturePalette* SHARED_PALETTES[MAX_GLDC_SHARED_PALETTES] = {NULL, NULL, NULL, NULL}; static GLuint _determinePVRFormat(GLint internalFormat, GLenum type); @@ -122,8 +122,11 @@ static void _glReleasePaletteSlot(GLshort slot, GLushort size) gl_assert(size == 16 || size == 256); if (size == 16) { - GLushort bank = slot / MAX_GLDC_PALETTE_SLOTS; - GLushort subbank = slot % MAX_GLDC_PALETTE_SLOTS; + GLushort bank = slot / MAX_GLDC_4BPP_PALETTE_SLOTS; + GLushort subbank = slot % MAX_GLDC_4BPP_PALETTE_SLOTS; + + gl_assert(bank < MAX_GLDC_PALETTE_SLOTS); + gl_assert(subbank < MAX_GLDC_4BPP_PALETTE_SLOTS); SUBBANKS_USED[bank][subbank] = GL_FALSE; @@ -135,6 +138,7 @@ static void _glReleasePaletteSlot(GLshort slot, GLushort size) BANKS_USED[bank] = GL_FALSE; } else { + gl_assert(slot < MAX_GLDC_PALETTE_SLOTS); BANKS_USED[slot] = GL_FALSE; for (i = 0; i < MAX_GLDC_4BPP_PALETTE_SLOTS; ++i) { SUBBANKS_USED[slot][i] = GL_FALSE; @@ -475,18 +479,18 @@ static void _glInitializeTextureObject(TextureObject* txr, unsigned int id) { } GLubyte _glInitTextures() { - - uint32_t i; - named_array_init(&TEXTURE_OBJECTS, sizeof(TextureObject), MAX_TEXTURE_COUNT); // Reserve zero so that it is never given to anyone as an ID! named_array_reserve(&TEXTURE_OBJECTS, 0); // Initialize zero as an actual texture object though because apparently it is! - _glInitializeTextureObject((TextureObject*) named_array_get(&TEXTURE_OBJECTS, 0), 0); + TextureObject* default_tex = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, 0); + _glInitializeTextureObject(default_tex, 0); + TEXTURE_UNITS[0] = default_tex; + TEXTURE_UNITS[1] = default_tex; - for (i=0; i < MAX_GLDC_SHARED_PALETTES;i++){ + for(int i = 0; i < MAX_GLDC_SHARED_PALETTES; i++){ SHARED_PALETTES[i] = _initTexturePalette(); } @@ -534,6 +538,7 @@ void APIENTRY glActiveTextureARB(GLenum texture) { ACTIVE_TEXTURE = texture & 0xF; gl_assert(ACTIVE_TEXTURE < MAX_GLDC_TEXTURE_UNITS); + gl_assert(ACTIVE_TEXTURE >= 0); gl_assert(TEXTURE_OBJECTS.element_size > 0); } @@ -547,10 +552,9 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { gl_assert(TEXTURE_OBJECTS.element_size > 0); - while(n--) { + for(GLsizei i = 0; i < n; ++i) { GLuint id = 0; TextureObject* txr = (TextureObject*) named_array_alloc(&TEXTURE_OBJECTS, &id); - gl_assert(txr); gl_assert(id); // Generated IDs must never be zero @@ -559,8 +563,7 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { gl_assert(txr->index == id); - *textures = id; - textures++; + textures[i] = id; } gl_assert(TEXTURE_OBJECTS.element_size > 0); @@ -588,7 +591,8 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { for(GLuint j = 0; j < MAX_GLDC_TEXTURE_UNITS; ++j) { if(txr == TEXTURE_UNITS[j]) { - TEXTURE_UNITS[j] = NULL; + // Reset to the default texture + TEXTURE_UNITS[j] = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, 0); } } @@ -612,8 +616,6 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { } named_array_release(&TEXTURE_OBJECTS, id); - textures[i] = 0; - txr->index = 0; } } diff --git a/containers/named_array.c b/containers/named_array.c index 1bf06c9..4f6fad3 100644 --- a/containers/named_array.c +++ b/containers/named_array.c @@ -68,7 +68,6 @@ void* named_array_reserve(NamedArray* array, unsigned int id) { void named_array_release(NamedArray* array, unsigned int new_id) { unsigned int i = new_id / 8; unsigned int j = new_id % 8; - array->used_markers[i] &= (unsigned char) ~(1 << j); }