From a5891056db642af1a4f1092da0534aeddb53ea51 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Tue, 16 May 2023 13:31:44 +0100 Subject: [PATCH] Many bug fixes and optimisations --- CMakeLists.txt | 22 +- GL/immediate.c | 49 ++-- GL/matrix.c | 34 +-- GL/platforms/sh4.c | 469 ++++++++++++++++----------------- GL/platforms/software.c | 559 ++++++++++++++++++++-------------------- GL/state.c | 3 +- GL/texture.c | 51 ++-- samples/cubes/main.cpp | 2 +- samples/nehe02/main.c | 4 +- samples/nehe10/main.c | 53 +++- tests/zclip/main.cpp | 2 +- 11 files changed, 650 insertions(+), 598 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f9aadb8..41a2b07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.9) project(GLdc) +set(CMAKE_VERBOSE_MAKEFILE ON) + # set the default backend if(PLATFORM_DREAMCAST) set(BACKEND "kospvr" CACHE STRING "Backend to use") @@ -8,6 +10,9 @@ else() set(BACKEND "software" CACHE STRING "Backend to use") endif() +include(CheckIPOSupported) +check_ipo_supported(RESULT FLTO_SUPPORTED OUTPUT FLTO_ERROR) + # List of possible backends set_property(CACHE BACKEND PROPERTY STRINGS kospvr software) @@ -49,10 +54,10 @@ else() set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math") endif() -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing") -set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions") +set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra") @@ -106,6 +111,10 @@ endif() add_library(GLdc STATIC ${SOURCES}) +if(FLTO_SUPPORTED) + set_property(TARGET GLdc PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +endif() + if(NOT PLATFORM_DREAMCAST) set_target_properties(GLdc PROPERTIES COMPILE_OPTIONS "-m32" @@ -129,6 +138,13 @@ function(gen_sample sample) add_executable(${sample} ${SAMPLE_SRCS}) + if(FLTO_SUPPORTED) + # FIXME: Cubes + LTO causes an ICE + if(NOT ${sample} MATCHES "cubes") + set_property(TARGET ${sample} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() + endif() + if(PLATFORM_DREAMCAST) if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk") message("Generating romdisk for sample: ${sample}") diff --git a/GL/immediate.c b/GL/immediate.c index 69dd7a4..afe80c5 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -17,10 +17,10 @@ extern inline GLuint _glRecalcFastPath(); GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; -static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f}; -static GLubyte COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */ -static GLfloat UV_COORD[2] = {0.0f, 0.0f}; -static GLfloat ST_COORD[2] = {0.0f, 0.0f}; +static GLfloat __attribute__((aligned(32))) NORMAL[3] = {0.0f, 0.0f, 1.0f}; +static GLubyte __attribute__((aligned(32))) COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */ +static GLfloat __attribute__((aligned(32))) UV_COORD[2] = {0.0f, 0.0f}; +static GLfloat __attribute__((aligned(32))) ST_COORD[2] = {0.0f, 0.0f}; static AlignedVector VERTICES; static AttribPointerList IM_ATTRIBS; @@ -30,7 +30,7 @@ static AttribPointerList IM_ATTRIBS; can be applied faster */ static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0; -typedef struct { +typedef struct __attribute__((aligned(32))) { GLfloat x; GLfloat y; GLfloat z; @@ -161,30 +161,27 @@ void APIENTRY glColor3fv(const GLfloat* v) { void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - uint32_t cap = aligned_vector_capacity(&VERTICES); IMVertex* vert = aligned_vector_extend(&VERTICES, 1); - if(cap != aligned_vector_capacity(&VERTICES)) { - /* Resizing could've invalidated the pointers */ - IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES); - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); - IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t); - } - vert->x = x; - vert->y = y; - vert->z = z; - vert->u = UV_COORD[0]; - vert->v = UV_COORD[1]; - vert->s = ST_COORD[0]; - vert->t = ST_COORD[1]; + /* Resizing could've invalidated the pointers */ + IM_ATTRIBS.vertex.ptr = VERTICES.data; + IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12; + IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8; + IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8; + IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4; - *((uint32_t*) vert->bgra) = *((uint32_t*) COLOR); - - vert->nx = NORMAL[0]; - vert->ny = NORMAL[1]; - vert->nz = NORMAL[2]; + uint32_t* dest = (uint32_t*) &vert->x; + *(dest++) = *((uint32_t*) &x); + *(dest++) = *((uint32_t*) &y); + *(dest++) = *((uint32_t*) &z); + *(dest++) = *((uint32_t*) &UV_COORD[0]); + *(dest++) = *((uint32_t*) &UV_COORD[1]); + *(dest++) = *((uint32_t*) &ST_COORD[0]); + *(dest++) = *((uint32_t*) &ST_COORD[1]); + *(dest++) = *((uint32_t*) COLOR); + *(dest++) = *((uint32_t*) &NORMAL[0]); + *(dest++) = *((uint32_t*) &NORMAL[1]); + *(dest++) = *((uint32_t*) &NORMAL[2]); } void APIENTRY glVertex3fv(const GLfloat* v) { diff --git a/GL/matrix.c b/GL/matrix.c index 73ca6fd..32afff4 100644 --- a/GL/matrix.c +++ b/GL/matrix.c @@ -13,8 +13,8 @@ GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2; GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2; -static Stack MATRIX_STACKS[3]; // modelview, projection, texture -static Matrix4x4 NORMAL_MATRIX __attribute__((aligned(32))); +static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture +static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX; Viewport VIEWPORT = { 0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f @@ -23,7 +23,7 @@ Viewport VIEWPORT = { static GLenum MATRIX_MODE = GL_MODELVIEW; static GLubyte MATRIX_IDX = 0; -static const Matrix4x4 IDENTITY = { +static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, @@ -106,7 +106,11 @@ void APIENTRY glMatrixMode(GLenum mode) { } void APIENTRY glPushMatrix() { - stack_push(MATRIX_STACKS + MATRIX_IDX, stack_top(MATRIX_STACKS + MATRIX_IDX)); + void* top = stack_top(MATRIX_STACKS + MATRIX_IDX); + assert(top); + void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top); + (void) ret; + assert(ret); } void APIENTRY glPopMatrix() { @@ -127,10 +131,16 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) { 0.0f, 0.0f, 1.0f, 0.0f, x, y, z, 1.0f }; + void* top = stack_top(MATRIX_STACKS + MATRIX_IDX); + assert(top); - UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); + UploadMatrix4x4(top); MultiplyMatrix4x4(&trn); - DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); + + top = stack_top(MATRIX_STACKS + MATRIX_IDX); + assert(top); + + DownloadMatrix4x4(top); if(MATRIX_MODE == GL_MODELVIEW) { recalculateNormalMatrix(); @@ -270,18 +280,10 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right, /* Multiply the current matrix by an arbitrary matrix */ void glMultMatrixf(const GLfloat *m) { Matrix4x4 TEMP __attribute__((aligned(32))); - const Matrix4x4 *pMatrix; - - if (((GLint)m)&0xf){ /* Unaligned matrix */ - pMatrix = &TEMP; - MEMCPY4(TEMP, m, sizeof(Matrix4x4)); - } - else{ - pMatrix = (const Matrix4x4*) m; - } + MEMCPY4(TEMP, m, sizeof(Matrix4x4)); UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); - MultiplyMatrix4x4(pMatrix); + MultiplyMatrix4x4(&TEMP); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); if(MATRIX_MODE == GL_MODELVIEW) { diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index addc6fe..a69f286 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -146,23 +146,22 @@ void SceneListSubmit(Vertex* v2, int n) { for(int i = 0; i < n; ++i, ++v2) { PREFETCH(v2 + 1); switch(v2->flags) { - case GPU_CMD_VERTEX_EOL: - if(counter < 2) { - continue; - } - - counter = 0; - break; - case GPU_CMD_VERTEX: - ++counter; - if(counter < 3) { - continue; - } - break; - default: - _glPushHeaderOrVertex(v2); - counter = 0; + case GPU_CMD_VERTEX_EOL: + if(counter < 2) { continue; + } + counter = 0; + break; + case GPU_CMD_VERTEX: + ++counter; + if(counter < 3) { + continue; + } + break; + default: + _glPushHeaderOrVertex(v2); + counter = 0; + continue; }; Vertex* const v0 = v2 - 2; @@ -176,252 +175,254 @@ void SceneListSubmit(Vertex* v2, int n) { ); switch(visible_mask) { - case 15: /* All visible, but final vertex in strip */ + case 15: /* All visible, but final vertex in strip */ + { + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v1, h); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2, h); + _glPushHeaderOrVertex(v2); + } + break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + break; + case 9: + /* First vertex was visible, last in strip */ { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(v0, h); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(v1, h); - _glPushHeaderOrVertex(v1); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(v2, h); - _glPushHeaderOrVertex(v2); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); } break; - case 7: - /* All visible, push the first vertex and move on */ + case 1: + /* First vertex was visible, but not last in strip */ + { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(v0, h); _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(b); + } break; - case 9: - /* First vertex was visible, last in strip */ - { - Vertex __attribute__((aligned(32))) scratch[2]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - } - break; - case 1: - /* First vertex was visible, but not last in strip */ - { - Vertex __attribute__((aligned(32))) scratch[2]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(b); - } - break; - case 10: - case 2: - /* Second vertex was visible. In self case we need to create a triangle and produce + case 10: + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce two new vertices: 1-2, and 2-3. */ - { - Vertex __attribute__((aligned(32))) scratch[2]; - Vertex* a = &scratch[0]; - Vertex* c = &scratch[1]; + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - memcpy_vertex(c, v1); + memcpy_vertex(c, v1); - _glClipEdge(v0, c, a); - a->flags = GPU_CMD_VERTEX; + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); + _glClipEdge(v1, v2, b); + b->flags = v2->flags; - _glClipEdge(c, v2, a); - a->flags = v2->flags; + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(a, h); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + } + break; + case 11: + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glClipEdge(v1, v2, a); + a->flags = v2->flags; + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(a); + } + break; + case 12: + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + + _glClipEdge(v2, v0, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + if(counter % 2 == 1) { _glPushHeaderOrVertex(a); } + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + } break; - case 11: - case 3: /* First and second vertex were visible */ - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; + case 13: + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - memcpy_vertex(c, v1); + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; - _glClipEdge(v1, v2, a); - a->flags = v2->flags; + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(c); - _glPushHeaderOrVertex(a); - } + c->flags = GPU_CMD_VERTEX_EOL; + _glPushHeaderOrVertex(c); + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + } + break; + case 14: + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[4]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + Vertex* d = &scratch[3]; + + memcpy_vertex(c, v1); + memcpy_vertex(d, v2); + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(d, h); + _glPushHeaderOrVertex(d); + } + break; + case 8: + default: break; - case 12: - case 4: - /* Third vertex was visible. */ - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - - _glClipEdge(v2, v0, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - if(counter % 2 == 1) { - _glPushHeaderOrVertex(a); - } - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - } - break; - case 13: - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - c->flags = GPU_CMD_VERTEX; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - c->flags = GPU_CMD_VERTEX_EOL; - _glPushHeaderOrVertex(c); - } - break; - case 5: /* First and third vertex were visible */ - { - Vertex __attribute__((aligned(32))) scratch[3]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - c->flags = GPU_CMD_VERTEX; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); - } - break; - case 14: - case 6: /* Second and third vertex were visible */ - { - Vertex __attribute__((aligned(32))) scratch[4]; - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - Vertex* d = &scratch[3]; - - memcpy_vertex(c, v1); - memcpy_vertex(d, v2); - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(d, h); - _glPushHeaderOrVertex(d); - } - break; - default: - break; } } diff --git a/GL/platforms/software.c b/GL/platforms/software.c index 6b5a9ee..9a27c46 100644 --- a/GL/platforms/software.c +++ b/GL/platforms/software.c @@ -162,7 +162,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { } } -GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { +GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) { #ifndef NDEBUG if(glIsVertex(v->flags)) { gl_assert(!isnan(v->xyz[2])); @@ -177,335 +177,329 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { BUFFER[vertex_counter++] = *v; } -static struct { - Vertex* v; - int visible; -} triangle[3]; +static inline void _glFlushBuffer() {} -static int tri_count = 0; -static int strip_count = 0; - -GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) { - const int MASK1 = 0x00FF00FF; - const int MASK2 = 0xFF00FF00; - - const int f2 = 256 * t; - const int f1 = 256 - f2; - - const uint32_t a = *(uint32_t*) v1; - const uint32_t b = *(uint32_t*) v2; - - *((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) | - (((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2); -} GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { - /* Clipping time! */ + const static float o = 0.003921569f; // 1 / 255 const float d0 = v1->w + v1->xyz[2]; const float d1 = v2->w + v2->xyz[2]; + const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f; + const float invt = 1.0f - t; - const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f; + vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0]; + vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1]; + vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2]; - float t = (d0 / (d0 - d1)) + epsilon; + vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0]; + vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1]; - t = (t > 1.0f) ? 1.0f : t; - t = (t < 0.0f) ? 0.0f : t; + vout->w = invt * v1->w + t * v2->w; - vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); - vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); - vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); - vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w); + const float m = 255 * t; + const float n = 255 - m; - vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); - vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); - - interpolateColour(v1->bgra, v2->bgra, t, vout->bgra); + vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o; + vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o; + vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o; + vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o; } -GL_FORCE_INLINE void ClearTriangle() { - tri_count = 0; -} - -GL_FORCE_INLINE void ShiftTriangle() { - if(!tri_count) { +void SceneListSubmit(Vertex* v2, int n) { + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) { return; } - tri_count--; - triangle[0] = triangle[1]; - triangle[1] = triangle[2]; - -#ifndef NDEBUG - triangle[2].v = NULL; - triangle[2].visible = false; -#endif -} - -GL_FORCE_INLINE void ShiftRotateTriangle() { - if(!tri_count) { - return; - } - - if(triangle[0].v < triangle[1].v) { - triangle[0] = triangle[2]; - } else { - triangle[1] = triangle[2]; - } - - tri_count--; -} - -void SceneListSubmit(Vertex* src, int n) { - /* Perform perspective divide on each vertex */ - Vertex* vertex = (Vertex*) src; - const float h = GetVideoMode()->height; - /* If Z-clipping is disabled, just fire everything over to the buffer */ - if(!ZNEAR_CLIPPING_ENABLED) { - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - if(glIsVertex(vertex->flags)) { - _glPerspectiveDivideVertex(vertex, h); - } - _glSubmitHeaderOrVertex(vertex); - } + uint8_t visible_mask = 0; + uint8_t counter = 0; - return; - } - - tri_count = 0; - strip_count = 0; - -#if CLIP_DEBUG - printf("----\n"); -#endif - - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - - bool is_last_in_strip = glIsLastVertex(vertex->flags); - - /* Wait until we fill the triangle */ - if(tri_count < 3) { - if(glIsVertex(vertex->flags)) { - triangle[tri_count].v = vertex; - triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w; - tri_count++; - strip_count++; - } else { - /* We hit a header */ - tri_count = 0; - strip_count = 0; - _glSubmitHeaderOrVertex(vertex); - } - - if(tri_count < 3) { + for(int i = 0; i < n; ++i, ++v2) { + PREFETCH(v2 + 1); + switch(v2->flags) { + case GPU_CMD_VERTEX_EOL: + if(counter < 2) { + continue; + } + counter = 0; + break; + case GPU_CMD_VERTEX: + ++counter; + if(counter < 3) { + continue; + } + break; + default: + _glPushHeaderOrVertex(v2); + counter = 0; continue; - } + }; + + Vertex* const v0 = v2 - 2; + Vertex* const v1 = v2 - 1; + + visible_mask = ( + (v0->xyz[2] > -v0->w) << 0 | + (v1->xyz[2] > -v1->w) << 1 | + (v2->xyz[2] > -v2->w) << 2 | + (counter == 0) << 3 + ); + + switch(visible_mask) { + case 15: /* All visible, but final vertex in strip */ + { + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v1, h); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2, h); + _glPushHeaderOrVertex(v2); } + break; + case 7: + /* All visible, push the first vertex and move on */ + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + break; + case 9: + /* First vertex was visible, last in strip */ + { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; -#if CLIP_DEBUG - printf("SC: %d\n", strip_count); -#endif + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - /* If we got here, then triangle contains 3 vertices */ - int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); - if(visible_mask == 7) { -#if CLIP_DEBUG - printf("Visible\n"); -#endif - /* All the vertices are visible! We divide and submit v0, then shift */ - _glPerspectiveDivideVertex(vertex - 2, h); - _glSubmitHeaderOrVertex(vertex - 2); + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX_EOL; - if(is_last_in_strip) { - _glPerspectiveDivideVertex(vertex - 1, h); - _glSubmitHeaderOrVertex(vertex - 1); - _glPerspectiveDivideVertex(vertex, h); - _glSubmitHeaderOrVertex(vertex); - tri_count = 0; - strip_count = 0; + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + } + break; + case 1: + /* First vertex was visible, but not last in strip */ + { + Vertex __attribute__((aligned(32))) scratch[2]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(b); + } + break; + case 10: + case 2: + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = v2->flags; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + } + break; + case 11: + case 3: /* First and second vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); + + _glClipEdge(v1, v2, a); + a->flags = v2->flags; + + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(a); + } + break; + case 12: + case 4: + /* Third vertex was visible. */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + + _glClipEdge(v2, v0, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); + + if(counter % 2 == 1) { + _glPushHeaderOrVertex(a); } - ShiftRotateTriangle(); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); - } else if(visible_mask) { - /* Clipping time! + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + } + break; + case 13: + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - There are 6 distinct possibilities when clipping a triangle. 3 of them result - in another triangle, 3 of them result in a quadrilateral. + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; - Assuming you iterate the edges of the triangle in order, and create a new *visible* - vertex when you cross the plane, and discard vertices behind the plane, then the only - difference between the two cases is that the final two vertices that need submitting have - to be reversed. + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may - be used in a subsequent triangle in the strip and would end up being double divided. - */ -#if CLIP_DEBUG - printf("Clip: %d, SC: %d\n", visible_mask, strip_count); - printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1); -#endif - Vertex tmp; - if(strip_count > 3) { -#if CLIP_DEBUG - printf("Flush\n"); -#endif - tmp = *(vertex - 2); - /* If we had triangles ahead of this one, submit and finalize */ - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; - tmp = *(vertex - 1); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); - switch(visible_mask) { - case 1: { - /* 0, 0a, 2a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 2: { - /* 0a, 1, 1a */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + c->flags = GPU_CMD_VERTEX_EOL; + _glPushHeaderOrVertex(c); + } + break; + case 5: /* First and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[3]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 3: { - /* 0, 1, 2a, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(v0, h); + _glPushHeaderOrVertex(v0); - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 4: { - /* 1a, 2, 2a */ - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + } + break; + case 14: + case 6: /* Second and third vertex were visible */ + { + Vertex __attribute__((aligned(32))) scratch[4]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + Vertex* d = &scratch[3]; - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 5: { - /* 0, 0a, 2, 1a */ - tmp = *triangle[0].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + memcpy_vertex(c, v1); + memcpy_vertex(d, v2); - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; - _glClipEdge(triangle[1].v, triangle[2].v, &tmp); - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - case 6: { - /* 0a, 1, 2a, 2 */ - _glClipEdge(triangle[0].v, triangle[1].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(a, h); + _glPushHeaderOrVertex(a); - tmp = *triangle[1].v; - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(c, h); + _glPushHeaderOrVertex(c); - _glClipEdge(triangle[2].v, triangle[0].v, &tmp); - tmp.flags = GPU_CMD_VERTEX; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); + _glPerspectiveDivideVertex(b, h); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); - tmp = *triangle[2].v; - tmp.flags = GPU_CMD_VERTEX_EOL; - _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(&tmp); - } break; - default: - break; - } - - /* If this was the last in the strip, we don't need to - submit anything else, we just wipe the tri_count */ - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } else { - ShiftRotateTriangle(); - strip_count = 2; - } - } else { - /* Invisible? Move to the next in the strip */ - - if(is_last_in_strip) { - tri_count = 0; - strip_count = 0; - } - strip_count = 2; - ShiftRotateTriangle(); + _glPerspectiveDivideVertex(d, h); + _glPushHeaderOrVertex(d); + } + break; + case 8: + default: + break; } } + + _glFlushBuffer(); } void SceneListFinish() { @@ -548,7 +542,6 @@ void SceneListFinish() { void SceneFinish() { SDL_RenderPresent(RENDERER); - return; /* Only sensible place to hook the quit signal */ SDL_Event e; while (SDL_PollEvent(&e)) { diff --git a/GL/state.c b/GL/state.c index e1bfe14..2a16367 100644 --- a/GL/state.c +++ b/GL/state.c @@ -255,7 +255,8 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) { context->txr2.enable = GPU_TEXTURE_DISABLE; context->txr2.alpha = GPU_TXRALPHA_DISABLE; - if(!TEXTURES_ENABLED[textureUnit] || !tx1) { + if(!TEXTURES_ENABLED[textureUnit] || !tx1 || !tx1->data) { + context->txr.base = NULL; return; } diff --git a/GL/texture.c b/GL/texture.c index 016be45..3d2dd77 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -39,6 +39,7 @@ static void* yalloc_alloc_and_defrag(size_t size) { if(!ret) { /* Tried to allocate, but out of room, let's try defragging * and repeating the alloc */ + fprintf(stderr, "Ran out of memory, defragmenting\n"); glDefragmentTextureMemory_KOS(); ret = yalloc_alloc(YALLOC_BASE, size); } @@ -537,6 +538,7 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) { GLuint id = 0; TextureObject* txr = (TextureObject*) named_array_alloc(&TEXTURE_OBJECTS, &id); + gl_assert(txr); gl_assert(id); // Generated IDs must never be zero _glInitializeTextureObject(txr, id); @@ -553,31 +555,32 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) { while(n--) { TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, *textures); - /* Make sure we update framebuffer objects that have this texture attached */ - _glWipeTextureOnFramebuffers(*textures); + if(txr) { + /* Make sure we update framebuffer objects that have this texture attached */ + _glWipeTextureOnFramebuffers(*textures); - if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) { - TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; - } - - if(txr->data) { - yalloc_free(YALLOC_BASE, txr->data); - txr->data = NULL; - } - - if(txr->palette && txr->palette->data) { - - if (txr->palette->bank > -1) { - _glReleasePaletteSlot(txr->palette->bank, txr->palette->size); - txr->palette->bank = -1; + if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) { + TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; } - free(txr->palette->data); - txr->palette->data = NULL; - } - if(txr->palette) { - free(txr->palette); - txr->palette = NULL; + if(txr->data) { + yalloc_free(YALLOC_BASE, txr->data); + txr->data = NULL; + } + + if(txr->palette && txr->palette->data) { + if (txr->palette->bank > -1) { + _glReleasePaletteSlot(txr->palette->bank, txr->palette->size); + txr->palette->bank = -1; + } + free(txr->palette->data); + txr->palette->data = NULL; + } + + if(txr->palette) { + free(txr->palette); + txr->palette = NULL; + } } named_array_release(&TEXTURE_OBJECTS, *textures); @@ -820,6 +823,8 @@ void APIENTRY glCompressedTexImage2DARB(GLenum target, if(data) { FASTCPY(active->data, data, imageSize); } + + _glGPUStateMarkDirty(); } static GLint _cleanInternalFormat(GLint internalFormat) { @@ -1555,6 +1560,8 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, free(conversionBuffer); conversionBuffer = NULL; } + + _glGPUStateMarkDirty(); } void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) { diff --git a/samples/cubes/main.cpp b/samples/cubes/main.cpp index 59a9f1b..1599466 100644 --- a/samples/cubes/main.cpp +++ b/samples/cubes/main.cpp @@ -328,7 +328,7 @@ void updateLogic() glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal); // Apply cube transformation (identity matrix) - glMultMatrixf(cubeTransformationMatrix); + glLoadIdentity(); updateCubes(dt); diff --git a/samples/nehe02/main.c b/samples/nehe02/main.c index 1b2e3e0..7775721 100644 --- a/samples/nehe02/main.c +++ b/samples/nehe02/main.c @@ -9,7 +9,7 @@ /* A general OpenGL initialization function. Sets all of the initial parameters. */ void InitGL(int Width, int Height) // We call this right after our OpenGL window is created. { - glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black + glClearColor(0.0f, 0.0f, 1.0f, 0.0f); // This Will Clear The Background Color To Black glClearDepth(1.0); // Enables Clearing Of The Depth Buffer glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do glEnable(GL_DEPTH_TEST); // Enables Depth Testing @@ -20,7 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window - glMatrixMode(GL_MODELVIEW); + glMatrixMode(GL_MODELVIEW); } /* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */ diff --git a/samples/nehe10/main.c b/samples/nehe10/main.c index faa3bc0..aa6c87f 100644 --- a/samples/nehe10/main.c +++ b/samples/nehe10/main.c @@ -10,6 +10,8 @@ #ifdef __DREAMCAST__ #include +#else +#include #endif #include @@ -17,7 +19,9 @@ #include #include +#include #include +#include #include "../loadbmp.h" @@ -84,7 +88,16 @@ void SetupWorld() int numtriangles; FILE *filein; char oneline[255]; +#ifdef __DREAMCAST__ filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From +#else + filein = fopen("../samples/nehe10/romdisk/world.txt", "rt"); +#endif + + if(!filein) { + fprintf(stderr, "Failed to load world file\n"); + exit(1); + } readstr(filein,oneline); sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles); @@ -228,6 +241,13 @@ void DrawGLScene(void) { } int ReadController(void) { + bool start = false; + bool up = false; + bool down = false; + bool left = false; + bool right = false; + + #ifdef __DREAMCAST__ maple_device_t *cont; cont_state_t *state; @@ -241,10 +261,27 @@ int ReadController(void) { return 0; } - if(state->buttons & CONT_START) - return 0; + start = (state->buttons & CONT_START); + up = (state->buttons & CONT_DPAD_UP); + down = (state->buttons & CONT_DPAD_DOWN); + left = (state->buttons & CONT_DPAD_LEFT); + right = (state->buttons & CONT_DPAD_RIGHT); - if(state->buttons & CONT_DPAD_UP) { +#else + int num_keys = 0; + uint8_t* state = SDL_GetKeyboardState(&num_keys); + start = state[SDL_SCANCODE_RETURN]; + up = state[SDL_SCANCODE_UP]; + down = state[SDL_SCANCODE_DOWN]; + left = state[SDL_SCANCODE_LEFT]; + right = state[SDL_SCANCODE_RIGHT]; +#endif + + if(start) { + return 0; + } + + if(up) { xpos -= (float)sin(heading*piover180) * 0.05f; zpos -= (float)cos(heading*piover180) * 0.05f; if (walkbiasangle >= 359.0f) @@ -258,8 +295,7 @@ int ReadController(void) { walkbias = (float)sin(walkbiasangle * piover180)/20.0f; } - - if(state->buttons & CONT_DPAD_DOWN) { + if(down) { xpos += (float)sin(heading*piover180) * 0.05f; zpos += (float)cos(heading*piover180) * 0.05f; if (walkbiasangle <= 1.0f) @@ -273,18 +309,17 @@ int ReadController(void) { walkbias = (float)sin(walkbiasangle * piover180)/20.0f; } - - if(state->buttons & CONT_DPAD_LEFT) { + if(left) { heading += 1.0f; yrot = heading; } - if(state->buttons & CONT_DPAD_RIGHT) { + if(right) { heading -= 1.0f; yrot = heading; } -#endif + /* Switch to the blended polygon list if needed */ if(blend) { diff --git a/tests/zclip/main.cpp b/tests/zclip/main.cpp index 7b7e316..adada72 100644 --- a/tests/zclip/main.cpp +++ b/tests/zclip/main.cpp @@ -177,7 +177,7 @@ void SceneListSubmit(void* src, int n) { _glSubmitHeaderOrVertex(d, &queue[queue_tail]); break; case GPU_CMD_VERTEX_EOL: - last_vertex = true; + last_vertex = true; // fallthru case GPU_CMD_VERTEX: visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2; assert(visible_mask < 15);