Many bug fixes and optimisations

This commit is contained in:
Luke Benstead 2023-05-16 13:31:44 +01:00
parent 9cffe14ad6
commit a5891056db
11 changed files with 650 additions and 598 deletions

View File

@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.9)
project(GLdc) project(GLdc)
set(CMAKE_VERBOSE_MAKEFILE ON)
# set the default backend # set the default backend
if(PLATFORM_DREAMCAST) if(PLATFORM_DREAMCAST)
set(BACKEND "kospvr" CACHE STRING "Backend to use") set(BACKEND "kospvr" CACHE STRING "Backend to use")
@ -8,6 +10,9 @@ else()
set(BACKEND "software" CACHE STRING "Backend to use") set(BACKEND "software" CACHE STRING "Backend to use")
endif() endif()
include(CheckIPOSupported)
check_ipo_supported(RESULT FLTO_SUPPORTED OUTPUT FLTO_ERROR)
# List of possible backends # List of possible backends
set_property(CACHE BACKEND PROPERTY STRINGS kospvr software) set_property(CACHE BACKEND PROPERTY STRINGS kospvr software)
@ -49,10 +54,10 @@ else()
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math")
endif() endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
@ -106,6 +111,10 @@ endif()
add_library(GLdc STATIC ${SOURCES}) add_library(GLdc STATIC ${SOURCES})
if(FLTO_SUPPORTED)
set_property(TARGET GLdc PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
if(NOT PLATFORM_DREAMCAST) if(NOT PLATFORM_DREAMCAST)
set_target_properties(GLdc PROPERTIES set_target_properties(GLdc PROPERTIES
COMPILE_OPTIONS "-m32" COMPILE_OPTIONS "-m32"
@ -129,6 +138,13 @@ function(gen_sample sample)
add_executable(${sample} ${SAMPLE_SRCS}) add_executable(${sample} ${SAMPLE_SRCS})
if(FLTO_SUPPORTED)
# FIXME: Cubes + LTO causes an ICE
if(NOT ${sample} MATCHES "cubes")
set_property(TARGET ${sample} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
endif()
if(PLATFORM_DREAMCAST) if(PLATFORM_DREAMCAST)
if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk") if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk")
message("Generating romdisk for sample: ${sample}") message("Generating romdisk for sample: ${sample}")

View File

@ -17,10 +17,10 @@ extern inline GLuint _glRecalcFastPath();
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f}; static GLfloat __attribute__((aligned(32))) NORMAL[3] = {0.0f, 0.0f, 1.0f};
static GLubyte COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */ static GLubyte __attribute__((aligned(32))) COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat __attribute__((aligned(32))) UV_COORD[2] = {0.0f, 0.0f};
static GLfloat ST_COORD[2] = {0.0f, 0.0f}; static GLfloat __attribute__((aligned(32))) ST_COORD[2] = {0.0f, 0.0f};
static AlignedVector VERTICES; static AlignedVector VERTICES;
static AttribPointerList IM_ATTRIBS; static AttribPointerList IM_ATTRIBS;
@ -30,7 +30,7 @@ static AttribPointerList IM_ATTRIBS;
can be applied faster */ can be applied faster */
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0; static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
typedef struct { typedef struct __attribute__((aligned(32))) {
GLfloat x; GLfloat x;
GLfloat y; GLfloat y;
GLfloat z; GLfloat z;
@ -161,30 +161,27 @@ void APIENTRY glColor3fv(const GLfloat* v) {
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
uint32_t cap = aligned_vector_capacity(&VERTICES);
IMVertex* vert = aligned_vector_extend(&VERTICES, 1); IMVertex* vert = aligned_vector_extend(&VERTICES, 1);
if(cap != aligned_vector_capacity(&VERTICES)) {
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES);
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t);
}
vert->x = x; /* Resizing could've invalidated the pointers */
vert->y = y; IM_ATTRIBS.vertex.ptr = VERTICES.data;
vert->z = z; IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12;
vert->u = UV_COORD[0]; IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8;
vert->v = UV_COORD[1]; IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8;
vert->s = ST_COORD[0]; IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4;
vert->t = ST_COORD[1];
*((uint32_t*) vert->bgra) = *((uint32_t*) COLOR); uint32_t* dest = (uint32_t*) &vert->x;
*(dest++) = *((uint32_t*) &x);
vert->nx = NORMAL[0]; *(dest++) = *((uint32_t*) &y);
vert->ny = NORMAL[1]; *(dest++) = *((uint32_t*) &z);
vert->nz = NORMAL[2]; *(dest++) = *((uint32_t*) &UV_COORD[0]);
*(dest++) = *((uint32_t*) &UV_COORD[1]);
*(dest++) = *((uint32_t*) &ST_COORD[0]);
*(dest++) = *((uint32_t*) &ST_COORD[1]);
*(dest++) = *((uint32_t*) COLOR);
*(dest++) = *((uint32_t*) &NORMAL[0]);
*(dest++) = *((uint32_t*) &NORMAL[1]);
*(dest++) = *((uint32_t*) &NORMAL[2]);
} }
void APIENTRY glVertex3fv(const GLfloat* v) { void APIENTRY glVertex3fv(const GLfloat* v) {

View File

@ -13,8 +13,8 @@
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2; GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2; GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2;
static Stack MATRIX_STACKS[3]; // modelview, projection, texture static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture
static Matrix4x4 NORMAL_MATRIX __attribute__((aligned(32))); static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX;
Viewport VIEWPORT = { Viewport VIEWPORT = {
0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f 0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f
@ -23,7 +23,7 @@ Viewport VIEWPORT = {
static GLenum MATRIX_MODE = GL_MODELVIEW; static GLenum MATRIX_MODE = GL_MODELVIEW;
static GLubyte MATRIX_IDX = 0; static GLubyte MATRIX_IDX = 0;
static const Matrix4x4 IDENTITY = { static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = {
1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f,
@ -106,7 +106,11 @@ void APIENTRY glMatrixMode(GLenum mode) {
} }
void APIENTRY glPushMatrix() { void APIENTRY glPushMatrix() {
stack_push(MATRIX_STACKS + MATRIX_IDX, stack_top(MATRIX_STACKS + MATRIX_IDX)); void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top);
(void) ret;
assert(ret);
} }
void APIENTRY glPopMatrix() { void APIENTRY glPopMatrix() {
@ -127,10 +131,16 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) {
0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f,
x, y, z, 1.0f x, y, z, 1.0f
}; };
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); UploadMatrix4x4(top);
MultiplyMatrix4x4(&trn); MultiplyMatrix4x4(&trn);
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
DownloadMatrix4x4(top);
if(MATRIX_MODE == GL_MODELVIEW) { if(MATRIX_MODE == GL_MODELVIEW) {
recalculateNormalMatrix(); recalculateNormalMatrix();
@ -270,18 +280,10 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Multiply the current matrix by an arbitrary matrix */ /* Multiply the current matrix by an arbitrary matrix */
void glMultMatrixf(const GLfloat *m) { void glMultMatrixf(const GLfloat *m) {
Matrix4x4 TEMP __attribute__((aligned(32))); Matrix4x4 TEMP __attribute__((aligned(32)));
const Matrix4x4 *pMatrix; MEMCPY4(TEMP, m, sizeof(Matrix4x4));
if (((GLint)m)&0xf){ /* Unaligned matrix */
pMatrix = &TEMP;
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
}
else{
pMatrix = (const Matrix4x4*) m;
}
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
MultiplyMatrix4x4(pMatrix); MultiplyMatrix4x4(&TEMP);
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
if(MATRIX_MODE == GL_MODELVIEW) { if(MATRIX_MODE == GL_MODELVIEW) {

View File

@ -146,23 +146,22 @@ void SceneListSubmit(Vertex* v2, int n) {
for(int i = 0; i < n; ++i, ++v2) { for(int i = 0; i < n; ++i, ++v2) {
PREFETCH(v2 + 1); PREFETCH(v2 + 1);
switch(v2->flags) { switch(v2->flags) {
case GPU_CMD_VERTEX_EOL: case GPU_CMD_VERTEX_EOL:
if(counter < 2) { if(counter < 2) {
continue;
}
counter = 0;
break;
case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue;
}
break;
default:
_glPushHeaderOrVertex(v2);
counter = 0;
continue; continue;
}
counter = 0;
break;
case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue;
}
break;
default:
_glPushHeaderOrVertex(v2);
counter = 0;
continue;
}; };
Vertex* const v0 = v2 - 2; Vertex* const v0 = v2 - 2;
@ -176,252 +175,254 @@ void SceneListSubmit(Vertex* v2, int n) {
); );
switch(visible_mask) { switch(visible_mask) {
case 15: /* All visible, but final vertex in strip */ case 15: /* All visible, but final vertex in strip */
{
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h);
_glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2, h);
_glPushHeaderOrVertex(v2);
}
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{ {
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h); _glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(v1); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(v2, h); _glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(v2); _glPushHeaderOrVertex(b);
} }
break; break;
case 7: case 1:
/* All visible, push the first vertex and move on */ /* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break; break;
case 9: case 10:
/* First vertex was visible, last in strip */ case 2:
{ /* Second vertex was visible. In self case we need to create a triangle and produce
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 1:
/* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */ two new vertices: 1-2, and 2-3. */
{ {
Vertex __attribute__((aligned(32))) scratch[2]; Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* c = &scratch[1]; Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1); memcpy_vertex(c, v1);
_glClipEdge(v0, c, a); _glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h); _glClipEdge(v1, v2, b);
_glPushHeaderOrVertex(a); b->flags = v2->flags;
_glClipEdge(c, v2, a); _glPerspectiveDivideVertex(a, h);
a->flags = v2->flags; _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h); _glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(a, h); _glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 11:
case 3: /* First and second vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a); _glPushHeaderOrVertex(a);
} }
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
}
break; break;
case 11: case 13:
case 3: /* First and second vertex were visible */ {
{ Vertex __attribute__((aligned(32))) scratch[3];
Vertex __attribute__((aligned(32))) scratch[3]; Vertex* a = &scratch[0];
Vertex* a = &scratch[0]; Vertex* b = &scratch[1];
Vertex* b = &scratch[1]; Vertex* c = &scratch[2];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1); memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b); _glClipEdge(v0, v1, a);
b->flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h); _glClipEdge(v1, v2, b);
_glPushHeaderOrVertex(v0); b->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, a); _glPerspectiveDivideVertex(v0, h);
a->flags = v2->flags; _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(c, h); _glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h); _glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(b); _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h); c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a); }
} break;
case 5: /* First and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
}
break;
case 8:
default:
break; break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
}
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
}
break;
case 13:
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
}
break;
default:
break;
} }
} }

View File

@ -162,7 +162,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
} }
} }
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) {
#ifndef NDEBUG #ifndef NDEBUG
if(glIsVertex(v->flags)) { if(glIsVertex(v->flags)) {
gl_assert(!isnan(v->xyz[2])); gl_assert(!isnan(v->xyz[2]));
@ -177,335 +177,329 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
BUFFER[vertex_counter++] = *v; BUFFER[vertex_counter++] = *v;
} }
static struct { static inline void _glFlushBuffer() {}
Vertex* v;
int visible;
} triangle[3];
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
const int f2 = 256 * t;
const int f1 = 256 - f2;
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
}
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */ const static float o = 0.003921569f; // 1 / 255
const float d0 = v1->w + v1->xyz[2]; const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2]; const float d1 = v2->w + v2->xyz[2];
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
const float invt = 1.0f - t;
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f; vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
float t = (d0 / (d0 - d1)) + epsilon; vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
t = (t > 1.0f) ? 1.0f : t; vout->w = invt * v1->w + t * v2->w;
t = (t < 0.0f) ? 0.0f : t;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); const float m = 255 * t;
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); const float n = 255 - m;
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra); vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
} }
GL_FORCE_INLINE void ClearTriangle() { void SceneListSubmit(Vertex* v2, int n) {
tri_count = 0; /* You need at least a header, and 3 vertices to render anything */
} if(n < 4) {
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
return; return;
} }
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
}
void SceneListSubmit(Vertex* src, int n) {
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height; const float h = GetVideoMode()->height;
/* If Z-clipping is disabled, just fire everything over to the buffer */ uint8_t visible_mask = 0;
if(!ZNEAR_CLIPPING_ENABLED) { uint8_t counter = 0;
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
}
return; for(int i = 0; i < n; ++i, ++v2) {
} PREFETCH(v2 + 1);
switch(v2->flags) {
tri_count = 0; case GPU_CMD_VERTEX_EOL:
strip_count = 0; if(counter < 2) {
continue;
#if CLIP_DEBUG }
printf("----\n"); counter = 0;
#endif break;
case GPU_CMD_VERTEX:
for(int i = 0; i < n; ++i, ++vertex) { ++counter;
PREFETCH(vertex + 1); if(counter < 3) {
continue;
bool is_last_in_strip = glIsLastVertex(vertex->flags); }
break;
/* Wait until we fill the triangle */ default:
if(tri_count < 3) { _glPushHeaderOrVertex(v2);
if(glIsVertex(vertex->flags)) { counter = 0;
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
continue; continue;
} };
Vertex* const v0 = v2 - 2;
Vertex* const v1 = v2 - 1;
visible_mask = (
(v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 |
(counter == 0) << 3
);
switch(visible_mask) {
case 15: /* All visible, but final vertex in strip */
{
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h);
_glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2, h);
_glPushHeaderOrVertex(v2);
} }
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
#if CLIP_DEBUG _glClipEdge(v0, v1, a);
printf("SC: %d\n", strip_count); a->flags = GPU_CMD_VERTEX;
#endif
/* If we got here, then triangle contains 3 vertices */ _glClipEdge(v2, v0, b);
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); b->flags = GPU_CMD_VERTEX_EOL;
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
if(is_last_in_strip) { _glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(vertex - 1, h); _glPushHeaderOrVertex(v0);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h); _glPerspectiveDivideVertex(a, h);
_glSubmitHeaderOrVertex(vertex); _glPushHeaderOrVertex(a);
tri_count = 0;
strip_count = 0; _glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 1:
/* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = v2->flags;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 11:
case 3: /* First and second vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
} }
ShiftRotateTriangle(); _glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
} else if(visible_mask) { _glPerspectiveDivideVertex(c, h);
/* Clipping time! _glPushHeaderOrVertex(c);
}
break;
case 13:
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
There are 6 distinct possibilities when clipping a triangle. 3 of them result memcpy_vertex(c, v2);
in another triangle, 3 of them result in a quadrilateral. c->flags = GPU_CMD_VERTEX;
Assuming you iterate the edges of the triangle in order, and create a new *visible* _glClipEdge(v0, v1, a);
vertex when you cross the plane, and discard vertices behind the plane, then the only a->flags = GPU_CMD_VERTEX;
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may _glClipEdge(v1, v2, b);
be used in a subsequent triangle in the strip and would end up being double divided. b->flags = GPU_CMD_VERTEX;
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *(vertex - 1); _glPerspectiveDivideVertex(v0, h);
tmp.flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
switch(visible_mask) { _glPerspectiveDivideVertex(a, h);
case 1: { _glPushHeaderOrVertex(a);
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp); _glPerspectiveDivideVertex(c, h);
tmp.flags = GPU_CMD_VERTEX; _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&tmp, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(&tmp); _glPushHeaderOrVertex(b);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp); c->flags = GPU_CMD_VERTEX_EOL;
tmp.flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&tmp, h); }
_glSubmitHeaderOrVertex(&tmp); break;
} break; case 5: /* First and third vertex were visible */
case 2: { {
/* 0a, 1, 1a */ Vertex __attribute__((aligned(32))) scratch[3];
_glClipEdge(triangle[0].v, triangle[1].v, &tmp); Vertex* a = &scratch[0];
tmp.flags = GPU_CMD_VERTEX; Vertex* b = &scratch[1];
_glPerspectiveDivideVertex(&tmp, h); Vertex* c = &scratch[2];
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v; memcpy_vertex(c, v2);
tmp.flags = GPU_CMD_VERTEX; c->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp); _glClipEdge(v0, v1, a);
tmp.flags = GPU_CMD_VERTEX_EOL; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v; _glClipEdge(v1, v2, b);
tmp.flags = GPU_CMD_VERTEX; b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp); _glPerspectiveDivideVertex(v0, h);
tmp.flags = GPU_CMD_VERTEX; _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp); _glPerspectiveDivideVertex(a, h);
tmp.flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v; _glPerspectiveDivideVertex(c, h);
tmp.flags = GPU_CMD_VERTEX; _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&tmp, h); _glPerspectiveDivideVertex(b, h);
_glSubmitHeaderOrVertex(&tmp); _glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
_glClipEdge(triangle[2].v, triangle[0].v, &tmp); memcpy_vertex(c, v1);
tmp.flags = GPU_CMD_VERTEX_EOL; memcpy_vertex(d, v2);
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp); _glClipEdge(v0, v1, a);
tmp.flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v; _glClipEdge(v2, v0, b);
tmp.flags = GPU_CMD_VERTEX; b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp); _glPerspectiveDivideVertex(a, h);
tmp.flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v; _glPerspectiveDivideVertex(c, h);
tmp.flags = GPU_CMD_VERTEX; _glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp); _glPerspectiveDivideVertex(b, h);
tmp.flags = GPU_CMD_VERTEX; _glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(&tmp, h); _glPushHeaderOrVertex(c);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v; _glPerspectiveDivideVertex(d, h);
tmp.flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(d);
_glPerspectiveDivideVertex(&tmp, h); }
_glSubmitHeaderOrVertex(&tmp); break;
} break; case 8:
default: default:
break; break;
}
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
} else {
/* Invisible? Move to the next in the strip */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
ShiftRotateTriangle();
} }
} }
_glFlushBuffer();
} }
void SceneListFinish() { void SceneListFinish() {
@ -548,7 +542,6 @@ void SceneListFinish() {
void SceneFinish() { void SceneFinish() {
SDL_RenderPresent(RENDERER); SDL_RenderPresent(RENDERER);
return;
/* Only sensible place to hook the quit signal */ /* Only sensible place to hook the quit signal */
SDL_Event e; SDL_Event e;
while (SDL_PollEvent(&e)) { while (SDL_PollEvent(&e)) {

View File

@ -255,7 +255,8 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
context->txr2.enable = GPU_TEXTURE_DISABLE; context->txr2.enable = GPU_TEXTURE_DISABLE;
context->txr2.alpha = GPU_TXRALPHA_DISABLE; context->txr2.alpha = GPU_TXRALPHA_DISABLE;
if(!TEXTURES_ENABLED[textureUnit] || !tx1) { if(!TEXTURES_ENABLED[textureUnit] || !tx1 || !tx1->data) {
context->txr.base = NULL;
return; return;
} }

View File

@ -39,6 +39,7 @@ static void* yalloc_alloc_and_defrag(size_t size) {
if(!ret) { if(!ret) {
/* Tried to allocate, but out of room, let's try defragging /* Tried to allocate, but out of room, let's try defragging
* and repeating the alloc */ * and repeating the alloc */
fprintf(stderr, "Ran out of memory, defragmenting\n");
glDefragmentTextureMemory_KOS(); glDefragmentTextureMemory_KOS();
ret = yalloc_alloc(YALLOC_BASE, size); ret = yalloc_alloc(YALLOC_BASE, size);
} }
@ -537,6 +538,7 @@ void APIENTRY glGenTextures(GLsizei n, GLuint *textures) {
GLuint id = 0; GLuint id = 0;
TextureObject* txr = (TextureObject*) named_array_alloc(&TEXTURE_OBJECTS, &id); TextureObject* txr = (TextureObject*) named_array_alloc(&TEXTURE_OBJECTS, &id);
gl_assert(txr);
gl_assert(id); // Generated IDs must never be zero gl_assert(id); // Generated IDs must never be zero
_glInitializeTextureObject(txr, id); _glInitializeTextureObject(txr, id);
@ -553,31 +555,32 @@ void APIENTRY glDeleteTextures(GLsizei n, GLuint *textures) {
while(n--) { while(n--) {
TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, *textures); TextureObject* txr = (TextureObject*) named_array_get(&TEXTURE_OBJECTS, *textures);
/* Make sure we update framebuffer objects that have this texture attached */ if(txr) {
_glWipeTextureOnFramebuffers(*textures); /* Make sure we update framebuffer objects that have this texture attached */
_glWipeTextureOnFramebuffers(*textures);
if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) { if(txr == TEXTURE_UNITS[ACTIVE_TEXTURE]) {
TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL; TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL;
}
if(txr->data) {
yalloc_free(YALLOC_BASE, txr->data);
txr->data = NULL;
}
if(txr->palette && txr->palette->data) {
if (txr->palette->bank > -1) {
_glReleasePaletteSlot(txr->palette->bank, txr->palette->size);
txr->palette->bank = -1;
} }
free(txr->palette->data);
txr->palette->data = NULL;
}
if(txr->palette) { if(txr->data) {
free(txr->palette); yalloc_free(YALLOC_BASE, txr->data);
txr->palette = NULL; txr->data = NULL;
}
if(txr->palette && txr->palette->data) {
if (txr->palette->bank > -1) {
_glReleasePaletteSlot(txr->palette->bank, txr->palette->size);
txr->palette->bank = -1;
}
free(txr->palette->data);
txr->palette->data = NULL;
}
if(txr->palette) {
free(txr->palette);
txr->palette = NULL;
}
} }
named_array_release(&TEXTURE_OBJECTS, *textures); named_array_release(&TEXTURE_OBJECTS, *textures);
@ -820,6 +823,8 @@ void APIENTRY glCompressedTexImage2DARB(GLenum target,
if(data) { if(data) {
FASTCPY(active->data, data, imageSize); FASTCPY(active->data, data, imageSize);
} }
_glGPUStateMarkDirty();
} }
static GLint _cleanInternalFormat(GLint internalFormat) { static GLint _cleanInternalFormat(GLint internalFormat) {
@ -1555,6 +1560,8 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
free(conversionBuffer); free(conversionBuffer);
conversionBuffer = NULL; conversionBuffer = NULL;
} }
_glGPUStateMarkDirty();
} }
void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) { void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) {

View File

@ -328,7 +328,7 @@ void updateLogic()
glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal); glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal);
// Apply cube transformation (identity matrix) // Apply cube transformation (identity matrix)
glMultMatrixf(cubeTransformationMatrix); glLoadIdentity();
updateCubes(dt); updateCubes(dt);

View File

@ -9,7 +9,7 @@
/* A general OpenGL initialization function. Sets all of the initial parameters. */ /* A general OpenGL initialization function. Sets all of the initial parameters. */
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created. void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
{ {
glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black glClearColor(0.0f, 0.0f, 1.0f, 0.0f); // This Will Clear The Background Color To Black
glClearDepth(1.0); // Enables Clearing Of The Depth Buffer glClearDepth(1.0); // Enables Clearing Of The Depth Buffer
glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do
glEnable(GL_DEPTH_TEST); // Enables Depth Testing glEnable(GL_DEPTH_TEST); // Enables Depth Testing
@ -20,7 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
glMatrixMode(GL_MODELVIEW); glMatrixMode(GL_MODELVIEW);
} }
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */ /* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */

View File

@ -10,6 +10,8 @@
#ifdef __DREAMCAST__ #ifdef __DREAMCAST__
#include <kos.h> #include <kos.h>
#else
#include <SDL.h>
#endif #endif
#include <stdio.h> #include <stdio.h>
@ -17,7 +19,9 @@
#include <GL/glu.h> #include <GL/glu.h>
#include <GL/glkos.h> #include <GL/glkos.h>
#include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h>
#include "../loadbmp.h" #include "../loadbmp.h"
@ -84,7 +88,16 @@ void SetupWorld()
int numtriangles; int numtriangles;
FILE *filein; FILE *filein;
char oneline[255]; char oneline[255];
#ifdef __DREAMCAST__
filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From
#else
filein = fopen("../samples/nehe10/romdisk/world.txt", "rt");
#endif
if(!filein) {
fprintf(stderr, "Failed to load world file\n");
exit(1);
}
readstr(filein,oneline); readstr(filein,oneline);
sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles); sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles);
@ -228,6 +241,13 @@ void DrawGLScene(void) {
} }
int ReadController(void) { int ReadController(void) {
bool start = false;
bool up = false;
bool down = false;
bool left = false;
bool right = false;
#ifdef __DREAMCAST__ #ifdef __DREAMCAST__
maple_device_t *cont; maple_device_t *cont;
cont_state_t *state; cont_state_t *state;
@ -241,10 +261,27 @@ int ReadController(void) {
return 0; return 0;
} }
if(state->buttons & CONT_START) start = (state->buttons & CONT_START);
return 0; up = (state->buttons & CONT_DPAD_UP);
down = (state->buttons & CONT_DPAD_DOWN);
left = (state->buttons & CONT_DPAD_LEFT);
right = (state->buttons & CONT_DPAD_RIGHT);
if(state->buttons & CONT_DPAD_UP) { #else
int num_keys = 0;
uint8_t* state = SDL_GetKeyboardState(&num_keys);
start = state[SDL_SCANCODE_RETURN];
up = state[SDL_SCANCODE_UP];
down = state[SDL_SCANCODE_DOWN];
left = state[SDL_SCANCODE_LEFT];
right = state[SDL_SCANCODE_RIGHT];
#endif
if(start) {
return 0;
}
if(up) {
xpos -= (float)sin(heading*piover180) * 0.05f; xpos -= (float)sin(heading*piover180) * 0.05f;
zpos -= (float)cos(heading*piover180) * 0.05f; zpos -= (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle >= 359.0f) if (walkbiasangle >= 359.0f)
@ -258,8 +295,7 @@ int ReadController(void) {
walkbias = (float)sin(walkbiasangle * piover180)/20.0f; walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
} }
if(down) {
if(state->buttons & CONT_DPAD_DOWN) {
xpos += (float)sin(heading*piover180) * 0.05f; xpos += (float)sin(heading*piover180) * 0.05f;
zpos += (float)cos(heading*piover180) * 0.05f; zpos += (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle <= 1.0f) if (walkbiasangle <= 1.0f)
@ -273,18 +309,17 @@ int ReadController(void) {
walkbias = (float)sin(walkbiasangle * piover180)/20.0f; walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
} }
if(left) {
if(state->buttons & CONT_DPAD_LEFT) {
heading += 1.0f; heading += 1.0f;
yrot = heading; yrot = heading;
} }
if(state->buttons & CONT_DPAD_RIGHT) { if(right) {
heading -= 1.0f; heading -= 1.0f;
yrot = heading; yrot = heading;
} }
#endif
/* Switch to the blended polygon list if needed */ /* Switch to the blended polygon list if needed */
if(blend) { if(blend) {

View File

@ -177,7 +177,7 @@ void SceneListSubmit(void* src, int n) {
_glSubmitHeaderOrVertex(d, &queue[queue_tail]); _glSubmitHeaderOrVertex(d, &queue[queue_tail]);
break; break;
case GPU_CMD_VERTEX_EOL: case GPU_CMD_VERTEX_EOL:
last_vertex = true; last_vertex = true; // fallthru
case GPU_CMD_VERTEX: case GPU_CMD_VERTEX:
visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2; visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2;
assert(visible_mask < 15); assert(visible_mask < 15);