From 99ae70a72bd656256b1093c9ba378d6df72992ac Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Mon, 13 Jun 2022 20:06:04 +0100 Subject: [PATCH] Make the software implementation use the same clipping code --- GL/platforms/software.c | 435 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 393 insertions(+), 42 deletions(-) diff --git a/GL/platforms/software.c b/GL/platforms/software.c index d37cccc..a6f1974 100644 --- a/GL/platforms/software.c +++ b/GL/platforms/software.c @@ -3,11 +3,14 @@ #include #include +#include "../private.h" #include "../platform.h" #include "software.h" #include "software/edge_equation.h" #include "software/parameter_equation.h" +#define CLIP_DEBUG 0 + static size_t AVAILABLE_VRAM = 16 * 1024 * 1024; static Matrix4x4 MATRIX; @@ -23,28 +26,16 @@ static VideoMode vid_mode = { 640, 480 }; - -typedef struct GPUVertex { - uint32_t flags; - float x; - float y; - float z; - float u; - float v; - uint8_t bgra[4]; - uint8_t obgra[4]; -} GPUVertex; - #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MAX(x, y) ((x) > (y) ? (x) : (y)) -static void DrawTriangle(GPUVertex* v0, GPUVertex* v1, GPUVertex* v2) { +static void DrawTriangle(Vertex* v0, Vertex* v1, Vertex* v2) { // Compute triangle bounding box. - int minX = MIN(MIN(v0->x, v1->x), v2->x); - int maxX = MAX(MAX(v0->x, v1->x), v2->x); - int minY = MIN(MIN(v0->y, v1->y), v2->y); - int maxY = MAX(MAX(v0->y, v1->y), v2->y); + int minX = MIN(MIN(v0->xyz[0], v1->xyz[0]), v2->xyz[0]); + int maxX = MAX(MAX(v0->xyz[0], v1->xyz[0]), v2->xyz[0]); + int minY = MIN(MIN(v0->xyz[1], v1->xyz[1]), v2->xyz[1]); + int maxY = MAX(MAX(v0->xyz[1], v1->xyz[1]), v2->xyz[1]); // Clip to scissor rect. @@ -56,9 +47,9 @@ static void DrawTriangle(GPUVertex* v0, GPUVertex* v1, GPUVertex* v2) { // Compute edge equations. EdgeEquation e0, e1, e2; - EdgeEquationInit(&e0, &v0->x, &v1->x); - EdgeEquationInit(&e1, &v1->x, &v2->x); - EdgeEquationInit(&e2, &v2->x, &v0->x); + EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); + EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); + EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); float area = 0.5 * (e0.c + e1.c + e2.c); @@ -66,12 +57,12 @@ static void DrawTriangle(GPUVertex* v0, GPUVertex* v1, GPUVertex* v2) { * so I just swap the vertex order if something is back-facing * and we want to render it. Patches welcome! */ #define REVERSE_WINDING() \ - GPUVertex* tv = v0; \ + Vertex* tv = v0; \ v0 = v1; \ v1 = tv; \ - EdgeEquationInit(&e0, &v0->x, &v1->x); \ - EdgeEquationInit(&e1, &v1->x, &v2->x); \ - EdgeEquationInit(&e2, &v2->x, &v0->x); \ + EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); \ + EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); \ + EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); \ area = 0.5f * (e0.c + e1.c + e2.c) \ // Check if triangle is backfacing. @@ -135,18 +126,382 @@ void SceneBegin() { SDL_RenderClear(RENDERER); } -void SceneListBegin(GPUList list) { +static Vertex BUFFER[1024 * 32]; +static uint32_t vertex_counter = 0; +GL_FORCE_INLINE bool glIsVertex(const float flags) { + return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX; +} + +GL_FORCE_INLINE bool glIsLastVertex(const float flags) { + return flags == GPU_CMD_VERTEX_EOL; +} + + +void SceneListBegin(GPUList list) { + vertex_counter = 0; +} + +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { + const float f = 1.0f / (vertex->w); + + /* Convert to NDC and apply viewport */ + vertex->xyz[0] = __builtin_fmaf( + VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth + ); + + vertex->xyz[1] = h - __builtin_fmaf( + VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight + ); + + if(vertex->w == 1.0f) { + vertex->xyz[2] = 1.0f / (1.0001f + vertex->xyz[2]); + } else { + vertex->xyz[2] = f; + } +} + +GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) { +#ifndef NDEBUG + if(glIsVertex(v->flags)) { + assert(!isnan(v->xyz[2])); + assert(!isnan(v->w)); + } +#endif + +#if CLIP_DEBUG + printf("Submitting: %x (%x)\n", v, v->flags); +#endif + + BUFFER[vertex_counter++] = *v; +} + +static struct { + Vertex* v; + int visible; +} triangle[3]; + +static int tri_count = 0; +static int strip_count = 0; + +GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) { + const int MASK1 = 0x00FF00FF; + const int MASK2 = 0xFF00FF00; + + const int f2 = 256 * t; + const int f1 = 256 - f2; + + const uint32_t a = *(uint32_t*) v1; + const uint32_t b = *(uint32_t*) v2; + + *((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) | + (((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2); +} + +GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) { + /* Clipping time! */ + const float d0 = v1->w + v1->xyz[2]; + const float d1 = v2->w + v2->xyz[2]; + + const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f; + + float t = (d0 / (d0 - d1)) + epsilon; + + t = (t > 1.0f) ? 1.0f : t; + t = (t < 0.0f) ? 0.0f : t; + + vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]); + vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]); + vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]); + vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w); + + vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]); + vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]); + + interpolateColour(v1->bgra, v2->bgra, t, vout->bgra); +} + +GL_FORCE_INLINE void ClearTriangle() { + tri_count = 0; +} + +GL_FORCE_INLINE void ShiftTriangle() { + if(!tri_count) { + return; + } + + tri_count--; + triangle[0] = triangle[1]; + triangle[1] = triangle[2]; + +#ifndef NDEBUG + triangle[2].v = NULL; + triangle[2].visible = false; +#endif +} + +GL_FORCE_INLINE void ShiftRotateTriangle() { + if(!tri_count) { + return; + } + + if(triangle[0].v < triangle[1].v) { + triangle[0] = triangle[2]; + } else { + triangle[1] = triangle[2]; + } + + tri_count--; } void SceneListSubmit(void* src, int n) { - uint32_t vertex_counter = 0; - const uint32_t* flags = (const uint32_t*) src; - uint32_t step = sizeof(GPUVertex) / sizeof(uint32_t); + /* Perform perspective divide on each vertex */ + Vertex* vertex = (Vertex*) src; - for(int i = 0; i < n; ++i, flags += step) { + const float h = GetVideoMode()->height; + + tri_count = 0; + strip_count = 0; + +#if CLIP_DEBUG + printf("----\n"); +#endif + + for(int i = 0; i < n; ++i, ++vertex) { + PREFETCH(vertex + 1); + + bool is_last_in_strip = glIsLastVertex(vertex->flags); + + /* Wait until we fill the triangle */ + if(tri_count < 3) { + if(glIsVertex(vertex->flags)) { + triangle[tri_count].v = vertex; + triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w; + tri_count++; + strip_count++; + } else { + /* We hit a header */ + tri_count = 0; + strip_count = 0; + _glSubmitHeaderOrVertex(vertex); + } + + if(tri_count < 3) { + continue; + } + } + +#if CLIP_DEBUG + printf("SC: %d\n", strip_count); +#endif + + /* If we got here, then triangle contains 3 vertices */ + int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2); + if(visible_mask == 7) { +#if CLIP_DEBUG + printf("Visible\n"); +#endif + /* All the vertices are visible! We divide and submit v0, then shift */ + _glPerspectiveDivideVertex(vertex - 2, h); + _glSubmitHeaderOrVertex(vertex - 2); + + if(is_last_in_strip) { + _glPerspectiveDivideVertex(vertex - 1, h); + _glSubmitHeaderOrVertex(vertex - 1); + _glPerspectiveDivideVertex(vertex, h); + _glSubmitHeaderOrVertex(vertex); + tri_count = 0; + strip_count = 0; + } + + ShiftRotateTriangle(); + + } else if(visible_mask) { + /* Clipping time! + + There are 6 distinct possibilities when clipping a triangle. 3 of them result + in another triangle, 3 of them result in a quadrilateral. + + Assuming you iterate the edges of the triangle in order, and create a new *visible* + vertex when you cross the plane, and discard vertices behind the plane, then the only + difference between the two cases is that the final two vertices that need submitting have + to be reversed. + + Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may + be used in a subsequent triangle in the strip and would end up being double divided. + */ +#if CLIP_DEBUG + printf("Clip: %d, SC: %d\n", visible_mask, strip_count); + printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1); +#endif + Vertex tmp; + if(strip_count > 3) { +#if CLIP_DEBUG + printf("Flush\n"); +#endif + tmp = *(vertex - 2); + /* If we had triangles ahead of this one, submit and finalize */ + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *(vertex - 1); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } + + switch(visible_mask) { + case 1: { + /* 0, 0a, 2a */ + tmp = *triangle[0].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } break; + case 2: { + /* 0a, 1, 1a */ + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *triangle[1].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } break; + case 3: { + /* 0, 1, 2a, 1a */ + tmp = *triangle[0].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *triangle[1].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } break; + case 4: { + /* 1a, 2, 2a */ + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *triangle[2].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } break; + case 5: { + /* 0, 0a, 2, 1a */ + tmp = *triangle[0].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *triangle[2].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[1].v, triangle[2].v, &tmp); + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } break; + case 6: { + /* 0a, 1, 2a, 2 */ + _glClipEdge(triangle[0].v, triangle[1].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *triangle[1].v; + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + _glClipEdge(triangle[2].v, triangle[0].v, &tmp); + tmp.flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + + tmp = *triangle[2].v; + tmp.flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(&tmp, h); + _glSubmitHeaderOrVertex(&tmp); + } break; + default: + break; + } + + /* If this was the last in the strip, we don't need to + submit anything else, we just wipe the tri_count */ + if(is_last_in_strip) { + tri_count = 0; + strip_count = 0; + } else { + ShiftRotateTriangle(); + strip_count = 2; + } + } else { + /* Invisible? Move to the next in the strip */ + + if(is_last_in_strip) { + tri_count = 0; + strip_count = 0; + } + strip_count = 2; + ShiftRotateTriangle(); + } + } +} + +void SceneListFinish() { + uint32_t vidx = 0; + const uint32_t* flags = (const uint32_t*) BUFFER; + uint32_t step = sizeof(Vertex) / sizeof(uint32_t); + + for(int i = 0; i < vertex_counter; ++i, flags += step) { if((*flags & GPU_CMD_POLYHDR) == GPU_CMD_POLYHDR) { - vertex_counter = 0; + vidx = 0; uint32_t mode1 = *(flags + 1); // Extract culling mode @@ -157,33 +512,29 @@ void SceneListSubmit(void* src, int n) { switch(*flags) { case GPU_CMD_VERTEX_EOL: case GPU_CMD_VERTEX: // Fallthrough - vertex_counter++; + vidx++; break; default: break; } } - if(vertex_counter > 2) { - GPUVertex* v0 = (GPUVertex*) (flags - step - step); - GPUVertex* v1 = (GPUVertex*) (flags - step); - GPUVertex* v2 = (GPUVertex*) (flags); - (vertex_counter % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2); + if(vidx > 2) { + Vertex* v0 = (Vertex*) (flags - step - step); + Vertex* v1 = (Vertex*) (flags - step); + Vertex* v2 = (Vertex*) (flags); + (vidx % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2); } if((*flags) == GPU_CMD_VERTEX_EOL) { - vertex_counter = 0; + vidx = 0; } } } -void SceneListFinish() { - -} - void SceneFinish() { SDL_RenderPresent(RENDERER); - + return; /* Only sensible place to hook the quit signal */ SDL_Event e; while (SDL_PollEvent(&e)) {