Merge branch 'super-clip' into 'master'

Super clip

See merge request simulant/GLdc!99
This commit is contained in:
Luke Benstead 2022-06-13 19:26:55 +00:00
commit 9b9db31f30
13 changed files with 1297 additions and 602 deletions

View File

@ -33,7 +33,6 @@ set(
containers/aligned_vector.c
containers/named_array.c
containers/stack.c
GL/clip.c
GL/draw.c
GL/error.c
GL/flush.c
@ -148,6 +147,7 @@ gen_sample(nehe06 samples/nehe06/main.c samples/loadbmp.c)
gen_sample(nehe06_vq samples/nehe06_vq/main.c)
gen_sample(nehe06_4444twid samples/nehe06_4444twid/main.c)
gen_sample(nehe08 samples/nehe08/main.c samples/nehe08/pvr-texture.c)
gen_sample(nehe10 samples/nehe10/main.c samples/loadbmp.c)
gen_sample(nehe20 samples/nehe20/main.c samples/loadbmp.c)
gen_sample(ortho2d samples/ortho2d/main.c)
gen_sample(paletted samples/paletted/main.c)

405
GL/clip.c
View File

@ -1,405 +0,0 @@
#include <float.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdbool.h>
#include <stdlib.h>
#ifdef _arch_dreamcast
#include <dc/pvr.h>
#else
#define PVR_PACK_COLOR(a, r, g, b) {}
#endif
#include "private.h"
#include "../containers/aligned_vector.h"
static unsigned char ZCLIP_ENABLED = 1;
unsigned char _glIsClippingEnabled() {
return ZCLIP_ENABLED;
}
void _glEnableClipping(unsigned char v) {
ZCLIP_ENABLED = v;
}
inline float _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout) {
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
/* We need to shift 't' a little, to avoid the possibility that a
* rounding error leaves the new vertex behind the near plane. We shift
* according to the direction we're clipping across the plane */
const float epsilon = (d0 < d1) ? -0.000001 : 0.000001;
float t = MATH_Fast_Divide(d0, (d0 - d1)) + epsilon;
vout->xyz[0] = MATH_fmac(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = MATH_fmac(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = MATH_fmac(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
/*
printf(
"(%f, %f, %f, %f) -> %f -> (%f, %f, %f, %f) = (%f, %f, %f)\n",
v1->xyz[0], v1->xyz[1], v1->xyz[2], v1->w, t,
v2->xyz[0], v2->xyz[1], v2->xyz[2], v2->w,
vout->xyz[0], vout->xyz[1], vout->xyz[2]
);*/
return t;
}
GL_FORCE_INLINE void interpolateFloat(const float v1, const float v2, const float t, float* out) {
*out = MATH_fmac(v2 - v1,t, v1);
}
GL_FORCE_INLINE void interpolateVec2(const float* v1, const float* v2, const float t, float* out) {
interpolateFloat(v1[0], v2[0], t, &out[0]);
interpolateFloat(v1[1], v2[1], t, &out[1]);
}
GL_FORCE_INLINE void interpolateVec3(const float* v1, const float* v2, const float t, float* out) {
interpolateFloat(v1[0], v2[0], t, &out[0]);
interpolateFloat(v1[1], v2[1], t, &out[1]);
interpolateFloat(v1[2], v2[2], t, &out[2]);
}
GL_FORCE_INLINE void interpolateVec4(const float* v1, const float* v2, const float t, float* out) {
interpolateFloat(v1[0], v2[0], t, &out[0]);
interpolateFloat(v1[1], v2[1], t, &out[1]);
interpolateFloat(v1[2], v2[2], t, &out[2]);
interpolateFloat(v1[3], v2[3], t, &out[3]);
}
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
out[0] = v1[0] + (uint32_t) (((float) (v2[0] - v1[0])) * t);
out[1] = v1[1] + (uint32_t) (((float) (v2[1] - v1[1])) * t);
out[2] = v1[2] + (uint32_t) (((float) (v2[2] - v1[2])) * t);
out[3] = v1[3] + (uint32_t) (((float) (v2[3] - v1[3])) * t);
}
const uint32_t VERTEX_CMD_EOL = 0xf0000000;
const uint32_t VERTEX_CMD = 0xe0000000;
typedef struct {
Vertex vertex[3];
VertexExtra extra[3];
uint8_t visible;
} Triangle;
void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) {
Vertex* last = NULL;
VertexExtra* veLast = NULL;
const Vertex* vertices = triangle->vertex;
const VertexExtra* extras = triangle->extra;
char* bgra = (char*) vertices[2].bgra;
/* Used when flat shading is enabled */
uint32_t finalColour = *((uint32_t*) bgra);
Vertex tmp;
VertexExtra veTmp;
uint8_t pushedCount = 0;
#define IS_VISIBLE(x) (visible & (1 << (2 - (x)))) > 0
#define PUSH_VERT(vert, ve) \
last = aligned_vector_push_back(&target->output->vector, vert, 1); \
last->flags = VERTEX_CMD; \
veLast = aligned_vector_push_back(target->extras, ve, 1); \
++pushedCount;
#define CLIP_TO_PLANE(vert1, ve1, vert2, ve2) \
do { \
float t = _glClipLineToNearZ((vert1), (vert2), &tmp); \
interpolateFloat((vert1)->w, (vert2)->w, t, &tmp.w); \
interpolateVec2((vert1)->uv, (vert2)->uv, t, tmp.uv); \
interpolateVec3((ve1)->nxyz, (ve2)->nxyz, t, veTmp.nxyz); \
interpolateVec2((ve1)->st, (ve2)->st, t, veTmp.st); \
if(flatShade) { \
interpolateColour((const uint8_t*) &finalColour, (const uint8_t*) &finalColour, t, tmp.bgra); \
} else { interpolateColour((vert1)->bgra, (vert2)->bgra, t, tmp.bgra); } \
} while(0); \
uint8_t v0 = IS_VISIBLE(0);
uint8_t v1 = IS_VISIBLE(1);
uint8_t v2 = IS_VISIBLE(2);
if(v0) {
PUSH_VERT(&vertices[0], &extras[0]);
}
if(v0 != v1) {
CLIP_TO_PLANE(&vertices[0], &extras[0], &vertices[1], &extras[1]);
PUSH_VERT(&tmp, &veTmp);
}
if(v1) {
PUSH_VERT(&vertices[1], &extras[1]);
}
if(v1 != v2) {
CLIP_TO_PLANE(&vertices[1], &extras[1], &vertices[2], &extras[2]);
PUSH_VERT(&tmp, &veTmp);
}
if(v2) {
PUSH_VERT(&vertices[2], &extras[2]);
}
if(v2 != v0) {
CLIP_TO_PLANE(&vertices[2], &extras[2], &vertices[0], &extras[0]);
PUSH_VERT(&tmp, &veTmp);
}
if(pushedCount == 4) {
Vertex* prev = last - 1;
VertexExtra* prevVe = veLast - 1;
tmp = *prev;
veTmp = *prevVe;
*prev = *last;
*prevVe = *veLast;
*last = tmp;
*veLast = veTmp;
prev->flags = VERTEX_CMD;
last->flags = VERTEX_CMD_EOL;
} else {
/* Set the last flag to the end of the new strip */
last->flags = VERTEX_CMD_EOL;
}
}
static inline void markDead(Vertex* vert) {
vert->flags = VERTEX_CMD_EOL;
// If we're debugging, wipe out the xyz
#ifndef NDEBUG
typedef union {
float* f;
int* i;
} cast;
cast v1, v2, v3;
v1.f = &vert->xyz[0];
v2.f = &vert->xyz[1];
v3.f = &vert->xyz[2];
*v1.i = 0xDEADBEEF;
*v2.i = 0xDEADBEEF;
*v3.i = 0xDEADBEEF;
#endif
}
#define B000 0
#define B111 7
#define B100 4
#define B010 2
#define B001 1
#define B101 5
#define B011 3
#define B110 6
#define MAX_CLIP_TRIANGLES 255
void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
static Triangle TO_CLIP[MAX_CLIP_TRIANGLES];
static uint8_t CLIP_COUNT = 0;
CLIP_COUNT = 0;
Vertex* vertex = _glSubmissionTargetStart(target);
const Vertex* end = _glSubmissionTargetEnd(target);
const Vertex* start = vertex;
int32_t triangle = -1;
/* Go to the (potential) end of the first triangle */
vertex++;
uint32_t vi1, vi2, vi3;
while(vertex < end) {
vertex++;
triangle++;
uint8_t even = (triangle % 2) == 0;
Vertex* v1 = (even) ? vertex - 2 : vertex - 1;
Vertex* v2 = (even) ? vertex - 1 : vertex - 2;
Vertex* v3 = vertex;
/* Skip ahead if we don't have a complete triangle yet */
if(v1->flags != VERTEX_CMD || v2->flags != VERTEX_CMD) {
triangle = -1;
continue;
}
/* Indexes into extras array */
vi1 = v1 - start;
vi2 = v2 - start;
vi3 = v3 - start;
/*
* A vertex is visible if it's in front of the camera (W > 0)
* and it's in front of the near plane (Z > -W)
*/
#define _VERT_VISIBLE(v) \
(v->w >= 0 && v->xyz[2] >= -v->w) \
uint8_t visible = (
(_VERT_VISIBLE(v1) ? 4 : 0) |
(_VERT_VISIBLE(v2) ? 2 : 0) |
(_VERT_VISIBLE(v3) ? 1 : 0)
);
switch(visible) {
case B111:
/* All visible? Do nothing */
continue;
break;
case B000:
/*
It is not possible that this is any trangle except the first
in a strip. That's because:
- It's either the first triangle submitted
- A previous triangle must have been clipped and the strip
restarted behind the plane
So, we effectively reboot the strip. We mark the first vertex
as the end (so it's ignored) then mark the next two as the
start of a new strip. Then if the next triangle crosses
back into view, we clip correctly. This will potentially
result in a bunch of pointlessly submitted vertices.
FIXME: Skip submitting those verts
*/
/* Even though this is always the first in the strip, it can also
* be the last */
if(v3->flags == VERTEX_CMD_EOL) {
/* Wipe out the triangle */
markDead(v1);
markDead(v2);
markDead(v3);
} else {
markDead(v1);
swapVertex(v2, v3);
triangle = -1;
v2->flags = VERTEX_CMD;
v3->flags = VERTEX_CMD;
}
break;
case B100:
case B010:
case B001:
case B101:
case B011:
case B110:
assert(CLIP_COUNT < MAX_CLIP_TRIANGLES);
/* Store the triangle for clipping */
TO_CLIP[CLIP_COUNT].vertex[0] = *v1;
TO_CLIP[CLIP_COUNT].vertex[1] = *v2;
TO_CLIP[CLIP_COUNT].vertex[2] = *v3;
VertexExtra* ve1 = (VertexExtra*) aligned_vector_at(target->extras, vi1);
VertexExtra* ve2 = (VertexExtra*) aligned_vector_at(target->extras, vi2);
VertexExtra* ve3 = (VertexExtra*) aligned_vector_at(target->extras, vi3);
TO_CLIP[CLIP_COUNT].extra[0] = *ve1;
TO_CLIP[CLIP_COUNT].extra[1] = *ve2;
TO_CLIP[CLIP_COUNT].extra[2] = *ve3;
TO_CLIP[CLIP_COUNT].visible = visible;
++CLIP_COUNT;
/*
OK so here's the clever bit. If any triangle except
the first or last needs clipping, then the next one does aswell
(you can't draw a plane through a single triangle in the middle of a
strip, only 2+). This means we can clip in pairs which frees up two
vertices in the middle of the strip, which is exactly the space
we need to restart the triangle strip after the next triangle
*/
if(v3->flags == VERTEX_CMD_EOL) {
/* Last triangle in strip so end a vertex early */
if(triangle == 0) {
// Wipe out the triangle completely
markDead(v1);
markDead(v2);
} else {
// End the strip
(vertex - 1)->flags = VERTEX_CMD_EOL;
}
markDead(vertex);
triangle = -1;
} else if(triangle == 0) {
/* First triangle in strip, remove first vertex */
markDead(v1);
v2->flags = VERTEX_CMD;
v3->flags = VERTEX_CMD;
triangle = -1;
} else {
Vertex* v4 = v3 + 1;
uint32_t vi4 = v4 - start;
TO_CLIP[CLIP_COUNT].vertex[0] = *v3;
TO_CLIP[CLIP_COUNT].vertex[1] = *v2;
TO_CLIP[CLIP_COUNT].vertex[2] = *v4;
VertexExtra* ve4 = (VertexExtra*) aligned_vector_at(target->extras, vi4);
TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi3);
TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2);
TO_CLIP[CLIP_COUNT].extra[2] = *ve4;
visible = (_VERT_VISIBLE(v3) ? 4 : 0) |
(_VERT_VISIBLE(v2) ? 2 : 0) |
(_VERT_VISIBLE(v4) ? 1 : 0);
TO_CLIP[CLIP_COUNT].visible = visible;
++CLIP_COUNT;
// Restart strip
triangle = -1;
// Mark the second vertex as the end of the strip
(vertex - 1)->flags = VERTEX_CMD_EOL;
if(v4->flags == VERTEX_CMD_EOL) {
markDead(v3);
markDead(v4);
} else {
// Swap the next vertices to start a new strip
swapVertex(v3, v4);
v3->flags = VERTEX_CMD;
v4->flags = VERTEX_CMD;
/* Swap the extra data too */
VertexExtra t = *ve4;
*ve3 = *ve4;
*ve4 = t;
}
}
break;
default:
break;
}
}
/* Now, clip all the triangles and append them to the output */
GLushort i;
for(i = 0; i < CLIP_COUNT; ++i) {
_glClipTriangle(&TO_CLIP[i], TO_CLIP[i].visible, target, fladeShade);
}
}

View File

@ -933,16 +933,6 @@ static void transform(SubmissionTarget* target) {
TransformVertices(vertex, target->count);
}
static void clip(SubmissionTarget* target) {
TRACE();
/* Perform clipping, generating new vertices as necessary */
_glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT);
/* Reset the count now that we may have added vertices */
target->count = target->output->vector.size - target->start_offset;
}
static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
const uint8_t* dataIn = (const uint8_t*) xyz;
uint8_t* dataOut = (uint8_t*) xyzOut;
@ -1172,39 +1162,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
transform(target);
}
if(_glIsClippingEnabled()) {
#if DEBUG_CLIPPING
uint32_t i = 0;
fprintf(stderr, "=========\n");
for(i = 0; i < target->count; ++i) {
Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i);
if(v->flags == 0xe0000000 || v->flags == 0xf0000000) {
fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags);
} else {
fprintf(stderr, "%x\n", *((uint32_t*)v));
}
}
#endif
clip(target);
assert(extras.size == target->count);
#if DEBUG_CLIPPING
fprintf(stderr, "--------\n");
for(i = 0; i < target->count; ++i) {
Vertex* v = aligned_vector_at(&target->output->vector, target->start_offset + i);
if(v->flags == 0xe0000000 || v->flags == 0xf0000000) {
fprintf(stderr, "(%f, %f, %f, %f) -> %x\n", v->xyz[0], v->xyz[1], v->xyz[2], v->w, v->flags);
} else {
fprintf(stderr, "%x\n", *((uint32_t*)v));
}
}
#endif
}
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
/*

View File

@ -88,108 +88,19 @@ void APIENTRY glKosInit() {
glKosInitEx(&config);
}
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
GL_FORCE_INLINE bool glIsVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) {
TRACE();
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
PREFETCH(vertex + 1);
const float h = GetVideoMode()->height;
while(n--) {
PREFETCH(vertex + 2);
if(likely(glIsVertex(vertex->flags))) {
const float f = MATH_Fast_Invert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
we add 1.0 to the Z to bring it into range. We add a little extra to
avoid a divide by zero.
*/
if(unlikely(vertex->w == 1.0f)) {
vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
} else {
vertex->xyz[2] = f;
}
}
++vertex;
}
}
GL_FORCE_INLINE void glPerspectiveDivideFastMode(void* src, uint32_t n) {
TRACE();
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
while(n--) {
PREFETCH(vertex + 1);
if(likely(glIsVertex(vertex->flags))) {
const float f = MATH_Fast_Invert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = MATH_fmac(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - MATH_fmac(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
vertex->xyz[2] = f;
}
++vertex;
}
}
GL_FORCE_INLINE void glPerspectiveDivide(void* src, uint32_t n) {
#if FAST_MODE
glPerspectiveDivideFastMode(src, n);
#else
glPerspectiveDivideStandard(src, n);
#endif
}
void APIENTRY glKosSwapBuffers() {
TRACE();
SceneBegin();
SceneListBegin(GPU_LIST_OP_POLY);
glPerspectiveDivide(OP_LIST.vector.data, OP_LIST.vector.size);
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
SceneListFinish();
SceneListBegin(GPU_LIST_PT_POLY);
glPerspectiveDivide(PT_LIST.vector.data, PT_LIST.vector.size);
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
SceneListFinish();
SceneListBegin(GPU_LIST_TR_POLY);
glPerspectiveDivide(TR_LIST.vector.data, TR_LIST.vector.size);
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
SceneListFinish();
SceneFinish();
@ -199,4 +110,4 @@ void APIENTRY glKosSwapBuffers() {
aligned_vector_clear(&TR_LIST.vector);
_glApplyScissor(true);
}
}

View File

@ -1,6 +1,9 @@
#include "../platform.h"
#include "sh4.h"
#define CLIP_DEBUG 0
#define TA_SQ_ADDR (unsigned int *)(void *) \
(0xe0000000 | (((unsigned long)0x10000000) & 0x03ffffe0))
@ -8,6 +11,18 @@
#define PVR_VERTEX_BUF_SIZE 2560 * 256
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
GL_FORCE_INLINE bool glIsVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
GL_FORCE_INLINE bool glIsLastVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL;
}
void InitGPU(_Bool autosort, _Bool fsaa) {
pvr_init_params_t params = {
/* Enable opaque and translucent polygons with size 32 and 32 */
@ -32,23 +47,394 @@ void SceneListBegin(GPUList list) {
pvr_list_begin(list);
}
void SceneListSubmit(void* src, int n) {
uint32_t *d = (uint32_t*) TA_SQ_ADDR;
uint32_t *s = src;
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
const float f = MATH_Fast_Invert(vertex->w);
/* fill/write queues as many times necessary */
while(n--) {
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
d[0] = *(s++);
d[1] = *(s++);
d[2] = *(s++);
d[3] = *(s++);
d[4] = *(s++);
d[5] = *(s++);
d[6] = *(s++);
d[7] = *(s++);
__asm__("pref @%0" : : "r"(d));
d += 8;
/* Convert to NDC and apply viewport */
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
we add 1.0 to the Z to bring it into range. We add a little extra to
avoid a divide by zero.
*/
if(unlikely(vertex->w == 1.0f)) {
vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
} else {
vertex->xyz[2] = f;
}
}
static uint32_t *d; // SQ target
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
#ifndef NDEBUG
assert(!isnan(v->xyz[2]));
assert(!isnan(v->w));
#endif
#if CLIP_DEBUG
printf("Submitting: %x (%x)\n", v, v->flags);
#endif
uint32_t *s = (uint32_t*) v;
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
d[0] = *(s++);
d[1] = *(s++);
d[2] = *(s++);
d[3] = *(s++);
d[4] = *(s++);
d[5] = *(s++);
d[6] = *(s++);
d[7] = *(s++);
__asm__("pref @%0" : : "r"(d));
d += 8;
}
static struct {
Vertex* v;
int visible;
} triangle[3];
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
const int f2 = 256 * t;
const int f1 = 256 - f2;
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
}
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
float t = MATH_Fast_Divide(d0, (d0 - d1)) + epsilon;
t = (t > 1.0f) ? 1.0f : t;
t = (t < 0.0f) ? 0.0f : t;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
}
GL_FORCE_INLINE void ClearTriangle() {
tri_count = 0;
}
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
return;
}
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
}
void SceneListSubmit(void* src, int n) {
/* Do everything, everywhere, all at once */
/* Prep store queues */
d = (uint32_t*) TA_SQ_ADDR;
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
if(!ZNEAR_CLIPPING_ENABLED) {
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
}
/* Wait for both store queues to complete */
d = (uint32_t *)0xe0000000;
d[0] = d[8] = 0;
return;
}
tri_count = 0;
strip_count = 0;
#if CLIP_DEBUG
printf("----\n");
#endif
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
bool is_last_in_strip = glIsLastVertex(vertex->flags);
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(likely(glIsVertex(vertex->flags))) {
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
continue;
}
}
#if CLIP_DEBUG
printf("SC: %d\n", strip_count);
#endif
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(vertex);
tri_count = 0;
strip_count = 0;
}
ShiftRotateTriangle();
} else if(visible_mask) {
/* Clipping time!
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *(vertex - 1);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
switch(visible_mask) {
case 1: {
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 2: {
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
default:
break;
}
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
} else {
/* Invisible? Move to the next in the strip */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
ShiftRotateTriangle();
}
}
/* Wait for both store queues to complete */

View File

@ -8,6 +8,8 @@
#include <dc/matrix3d.h>
#include "../types.h"
#include "../private.h"
#include "sh4_math.h"
#ifndef NDEBUG

View File

@ -3,11 +3,14 @@
#include <stdlib.h>
#include <string.h>
#include "../private.h"
#include "../platform.h"
#include "software.h"
#include "software/edge_equation.h"
#include "software/parameter_equation.h"
#define CLIP_DEBUG 0
static size_t AVAILABLE_VRAM = 16 * 1024 * 1024;
static Matrix4x4 MATRIX;
@ -23,28 +26,16 @@ static VideoMode vid_mode = {
640, 480
};
typedef struct GPUVertex {
uint32_t flags;
float x;
float y;
float z;
float u;
float v;
uint8_t bgra[4];
uint8_t obgra[4];
} GPUVertex;
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
static void DrawTriangle(GPUVertex* v0, GPUVertex* v1, GPUVertex* v2) {
static void DrawTriangle(Vertex* v0, Vertex* v1, Vertex* v2) {
// Compute triangle bounding box.
int minX = MIN(MIN(v0->x, v1->x), v2->x);
int maxX = MAX(MAX(v0->x, v1->x), v2->x);
int minY = MIN(MIN(v0->y, v1->y), v2->y);
int maxY = MAX(MAX(v0->y, v1->y), v2->y);
int minX = MIN(MIN(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
int maxX = MAX(MAX(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
int minY = MIN(MIN(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
int maxY = MAX(MAX(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
// Clip to scissor rect.
@ -56,9 +47,9 @@ static void DrawTriangle(GPUVertex* v0, GPUVertex* v1, GPUVertex* v2) {
// Compute edge equations.
EdgeEquation e0, e1, e2;
EdgeEquationInit(&e0, &v0->x, &v1->x);
EdgeEquationInit(&e1, &v1->x, &v2->x);
EdgeEquationInit(&e2, &v2->x, &v0->x);
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]);
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]);
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]);
float area = 0.5 * (e0.c + e1.c + e2.c);
@ -66,12 +57,12 @@ static void DrawTriangle(GPUVertex* v0, GPUVertex* v1, GPUVertex* v2) {
* so I just swap the vertex order if something is back-facing
* and we want to render it. Patches welcome! */
#define REVERSE_WINDING() \
GPUVertex* tv = v0; \
Vertex* tv = v0; \
v0 = v1; \
v1 = tv; \
EdgeEquationInit(&e0, &v0->x, &v1->x); \
EdgeEquationInit(&e1, &v1->x, &v2->x); \
EdgeEquationInit(&e2, &v2->x, &v0->x); \
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); \
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); \
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); \
area = 0.5f * (e0.c + e1.c + e2.c) \
// Check if triangle is backfacing.
@ -135,18 +126,395 @@ void SceneBegin() {
SDL_RenderClear(RENDERER);
}
void SceneListBegin(GPUList list) {
static Vertex BUFFER[1024 * 32];
static uint32_t vertex_counter = 0;
GL_FORCE_INLINE bool glIsVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
GL_FORCE_INLINE bool glIsLastVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL;
}
void SceneListBegin(GPUList list) {
vertex_counter = 0;
}
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
const float f = 1.0f / (vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
if(vertex->w == 1.0f) {
vertex->xyz[2] = 1.0f / (1.0001f + vertex->xyz[2]);
} else {
vertex->xyz[2] = f;
}
}
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
#ifndef NDEBUG
if(glIsVertex(v->flags)) {
assert(!isnan(v->xyz[2]));
assert(!isnan(v->w));
}
#endif
#if CLIP_DEBUG
printf("Submitting: %x (%x)\n", v, v->flags);
#endif
BUFFER[vertex_counter++] = *v;
}
static struct {
Vertex* v;
int visible;
} triangle[3];
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
const int f2 = 256 * t;
const int f1 = 256 - f2;
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
}
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
float t = (d0 / (d0 - d1)) + epsilon;
t = (t > 1.0f) ? 1.0f : t;
t = (t < 0.0f) ? 0.0f : t;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
}
GL_FORCE_INLINE void ClearTriangle() {
tri_count = 0;
}
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
return;
}
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
}
void SceneListSubmit(void* src, int n) {
uint32_t vertex_counter = 0;
const uint32_t* flags = (const uint32_t*) src;
uint32_t step = sizeof(GPUVertex) / sizeof(uint32_t);
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
for(int i = 0; i < n; ++i, flags += step) {
const float h = GetVideoMode()->height;
/* If Z-clipping is disabled, just fire everything over to the buffer */
if(!ZNEAR_CLIPPING_ENABLED) {
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
}
return;
}
tri_count = 0;
strip_count = 0;
#if CLIP_DEBUG
printf("----\n");
#endif
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
bool is_last_in_strip = glIsLastVertex(vertex->flags);
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(glIsVertex(vertex->flags)) {
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
continue;
}
}
#if CLIP_DEBUG
printf("SC: %d\n", strip_count);
#endif
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(vertex);
tri_count = 0;
strip_count = 0;
}
ShiftRotateTriangle();
} else if(visible_mask) {
/* Clipping time!
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *(vertex - 1);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
switch(visible_mask) {
case 1: {
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 2: {
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
default:
break;
}
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
} else {
/* Invisible? Move to the next in the strip */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
ShiftRotateTriangle();
}
}
}
void SceneListFinish() {
uint32_t vidx = 0;
const uint32_t* flags = (const uint32_t*) BUFFER;
uint32_t step = sizeof(Vertex) / sizeof(uint32_t);
for(int i = 0; i < vertex_counter; ++i, flags += step) {
if((*flags & GPU_CMD_POLYHDR) == GPU_CMD_POLYHDR) {
vertex_counter = 0;
vidx = 0;
uint32_t mode1 = *(flags + 1);
// Extract culling mode
@ -157,33 +525,29 @@ void SceneListSubmit(void* src, int n) {
switch(*flags) {
case GPU_CMD_VERTEX_EOL:
case GPU_CMD_VERTEX: // Fallthrough
vertex_counter++;
vidx++;
break;
default:
break;
}
}
if(vertex_counter > 2) {
GPUVertex* v0 = (GPUVertex*) (flags - step - step);
GPUVertex* v1 = (GPUVertex*) (flags - step);
GPUVertex* v2 = (GPUVertex*) (flags);
(vertex_counter % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2);
if(vidx > 2) {
Vertex* v0 = (Vertex*) (flags - step - step);
Vertex* v1 = (Vertex*) (flags - step);
Vertex* v2 = (Vertex*) (flags);
(vidx % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2);
}
if((*flags) == GPU_CMD_VERTEX_EOL) {
vertex_counter = 0;
vidx = 0;
}
}
}
void SceneListFinish() {
}
void SceneFinish() {
SDL_RenderPresent(RENDERER);
return;
/* Only sensible place to hook the quit signal */
SDL_Event e;
while (SDL_PollEvent(&e)) {

View File

@ -279,9 +279,6 @@ typedef enum {
struct SubmissionTarget;
float _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout);
void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade);
PolyList* _glOpaquePolyList();
PolyList* _glPunchThruPolyList();
PolyList *_glTransparentPolyList();
@ -385,6 +382,8 @@ GLboolean _glIsMipmapComplete(const TextureObject* obj);
GLubyte* _glGetMipmapLocation(const TextureObject* obj, GLuint level);
GLuint _glGetMipmapLevelCount(const TextureObject* obj);
extern GLboolean ZNEAR_CLIPPING_ENABLED;
extern GLboolean LIGHTING_ENABLED;
GLboolean _glIsLightingEnabled();

View File

@ -18,6 +18,8 @@ static GLenum FRONT_FACE = GL_CCW;
static GLboolean CULLING_ENABLED = GL_FALSE;
static GLboolean COLOR_MATERIAL_ENABLED = GL_FALSE;
GLboolean ZNEAR_CLIPPING_ENABLED = GL_TRUE;
GLboolean LIGHTING_ENABLED = GL_FALSE;
/* Is the shared texture palette enabled? */
@ -356,7 +358,7 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
_glEnableLight(cap & 0xF, GL_TRUE);
break;
case GL_NEARZ_CLIPPING_KOS:
_glEnableClipping(GL_TRUE);
ZNEAR_CLIPPING_ENABLED = GL_TRUE;
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
@ -418,7 +420,7 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
_glEnableLight(cap & 0xF, GL_FALSE);
break;
case GL_NEARZ_CLIPPING_KOS:
_glEnableClipping(GL_FALSE);
ZNEAR_CLIPPING_ENABLED = GL_FALSE;
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:

318
samples/nehe10/main.c Normal file
View File

@ -0,0 +1,318 @@
/*
KallistiOS 2.0.0
nehe08.c
(c)2021 Luke Benstead
(c)2014 Josh Pearson
(c)2001 Benoit Miller
(c)2000 Jeff Molofee
*/
#ifdef __DREAMCAST__
#include <kos.h>
#endif
#include <stdio.h>
#include <GL/gl.h>
#include <GL/glu.h>
#include <GL/glkos.h>
#include <stdbool.h>
#include "../loadbmp.h"
#ifdef __DREAMCAST__
extern uint8 romdisk[];
KOS_INIT_ROMDISK(romdisk);
#define IMG_PATH "/rd/brick.bmp"
#else
#define IMG_PATH "../samples/nehe10/romdisk/brick.bmp"
#endif
bool keys[256]; // Array Used For The Keyboard Routine
bool active = GL_TRUE; // Window Active Flag Set To TRUE By Default
bool fullscreen = GL_TRUE; // Fullscreen Flag Set To Fullscreen Mode By Default
bool blend; // Blending ON/OFF
bool bp; // B Pressed?
bool fp; // F Pressed?
const float piover180 = 0.0174532925f;
float heading;
float xpos;
float zpos;
GLfloat yrot; // Y Rotation
GLfloat walkbias = 0;
GLfloat walkbiasangle = 0;
GLfloat lookupdown = 0.0f;
GLfloat z=0.0f; // Depth Into The Screen
GLuint filter; // Which Filter To Use
GLuint texture[3]; // Storage For 3 Textures
typedef struct tagVERTEX
{
float x, y, z;
float u, v;
} VERTEX;
typedef struct tagTRIANGLE
{
VERTEX vertex[3];
} TRIANGLE;
typedef struct tagSECTOR
{
int numtriangles;
TRIANGLE* triangle;
} SECTOR;
SECTOR sector1;
void readstr(FILE *f,char *string)
{
do
{
fgets(string, 255, f);
} while ((string[0] == '/') || (string[0] == '\n'));
return;
}
void SetupWorld()
{
float x, y, z, u, v;
int numtriangles;
FILE *filein;
char oneline[255];
filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From
readstr(filein,oneline);
sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles);
sector1.triangle = (TRIANGLE*) malloc(sizeof(TRIANGLE) * numtriangles);
sector1.numtriangles = numtriangles;
for (int loop = 0; loop < numtriangles; loop++)
{
for (int vert = 0; vert < 3; vert++)
{
readstr(filein,oneline);
sscanf(oneline, "%f %f %f %f %f", &x, &y, &z, &u, &v);
sector1.triangle[loop].vertex[vert].x = x;
sector1.triangle[loop].vertex[vert].y = y;
sector1.triangle[loop].vertex[vert].z = z;
sector1.triangle[loop].vertex[vert].u = u;
sector1.triangle[loop].vertex[vert].v = v;
}
}
fclose(filein);
return;
}
int LoadGLTextures() // Load Bitmaps And Convert To Textures
{
int Status = GL_FALSE; // Status Indicator
Image image1;
// Load The Bitmap, Check For Errors, If Bitmap's Not Found Quit
if (ImageLoad(IMG_PATH, &image1))
{
Status = GL_TRUE; // Set The Status To TRUE
glGenTextures(3, &texture[0]); // Create Three Textures
// Create Nearest Filtered Texture
glBindTexture(GL_TEXTURE_2D, texture[0]);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, 3, image1.sizeX, image1.sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1.data);
// Create Linear Filtered Texture
glBindTexture(GL_TEXTURE_2D, texture[1]);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, 3, image1.sizeX, image1.sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1.data);
// Create MipMapped Texture
glBindTexture(GL_TEXTURE_2D, texture[2]);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR_MIPMAP_NEAREST);
gluBuild2DMipmaps(GL_TEXTURE_2D, 3, image1.sizeX, image1.sizeY, GL_RGB, GL_UNSIGNED_BYTE, image1.data);
}
return Status; // Return The Status
}
/* A general OpenGL initialization function. Sets all of the initial parameters. */
GLboolean InitGL(int width, int height) // We call this right after our OpenGL window is created.
{
glViewport(0, 0, width, height); // Reset The Current Viewport
glMatrixMode(GL_PROJECTION); // Select The Projection Matrix
glLoadIdentity(); // Reset The Projection Matrix
// Calculate The Aspect Ratio Of The Window
gluPerspective(45.0f,(GLfloat)width/(GLfloat)height,0.1f,100.0f);
glMatrixMode(GL_MODELVIEW); // Select The Modelview Matrix
glLoadIdentity();
if (!LoadGLTextures()) // Jump To Texture Loading Routine
{
return GL_FALSE; // If Texture Didn't Load Return false
}
glEnable(GL_TEXTURE_2D); // Enable Texture Mapping
glBlendFunc(GL_SRC_ALPHA,GL_ONE); // Set The Blending Function For Translucency
glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black
glClearDepth(1.0); // Enables Clearing Of The Depth Buffer
glDepthFunc(GL_LESS); // The Type Of Depth Test To Do
glEnable(GL_DEPTH_TEST); // Enables Depth Testing
glShadeModel(GL_SMOOTH); // Enables Smooth Color Shading
glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); // Really Nice Perspective Calculations
SetupWorld();
return GL_TRUE;
}
void DrawGLScene(void) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer
glLoadIdentity(); // Reset The View
GLfloat x_m, y_m, z_m, u_m, v_m;
GLfloat xtrans = -xpos;
GLfloat ztrans = -zpos;
GLfloat ytrans = -walkbias-0.25f;
GLfloat sceneroty = 360.0f - yrot;
int numtriangles;
glRotatef(lookupdown,1.0f,0,0);
glRotatef(sceneroty,0,1.0f,0);
glTranslatef(xtrans, ytrans, ztrans);
glBindTexture(GL_TEXTURE_2D, texture[filter]);
numtriangles = sector1.numtriangles;
// Process Each Triangle
for (int loop_m = 0; loop_m < numtriangles; loop_m++)
{
glBegin(GL_TRIANGLES);
glNormal3f( 0.0f, 0.0f, 1.0f);
x_m = sector1.triangle[loop_m].vertex[0].x;
y_m = sector1.triangle[loop_m].vertex[0].y;
z_m = sector1.triangle[loop_m].vertex[0].z;
u_m = sector1.triangle[loop_m].vertex[0].u;
v_m = sector1.triangle[loop_m].vertex[0].v;
glTexCoord2f(u_m,v_m); glVertex3f(x_m,y_m,z_m);
x_m = sector1.triangle[loop_m].vertex[1].x;
y_m = sector1.triangle[loop_m].vertex[1].y;
z_m = sector1.triangle[loop_m].vertex[1].z;
u_m = sector1.triangle[loop_m].vertex[1].u;
v_m = sector1.triangle[loop_m].vertex[1].v;
glTexCoord2f(u_m,v_m); glVertex3f(x_m,y_m,z_m);
x_m = sector1.triangle[loop_m].vertex[2].x;
y_m = sector1.triangle[loop_m].vertex[2].y;
z_m = sector1.triangle[loop_m].vertex[2].z;
u_m = sector1.triangle[loop_m].vertex[2].u;
v_m = sector1.triangle[loop_m].vertex[2].v;
glTexCoord2f(u_m,v_m); glVertex3f(x_m,y_m,z_m);
glEnd();
}
/* Finish the frame */
glKosSwapBuffers();
}
int ReadController(void) {
#ifdef __DREAMCAST__
maple_device_t *cont;
cont_state_t *state;
cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER);
/* Check key status */
state = (cont_state_t *)maple_dev_status(cont);
if(!state) {
printf("Error reading controller\n");
return 0;
}
if(state->buttons & CONT_START)
return 0;
if(state->buttons & CONT_DPAD_UP) {
xpos -= (float)sin(heading*piover180) * 0.05f;
zpos -= (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle >= 359.0f)
{
walkbiasangle = 0.0f;
}
else
{
walkbiasangle+= 10;
}
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
}
if(state->buttons & CONT_DPAD_DOWN) {
xpos += (float)sin(heading*piover180) * 0.05f;
zpos += (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle <= 1.0f)
{
walkbiasangle = 359.0f;
}
else
{
walkbiasangle-= 10;
}
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
}
if(state->buttons & CONT_DPAD_LEFT) {
heading += 1.0f;
yrot = heading;
}
if(state->buttons & CONT_DPAD_RIGHT) {
heading -= 1.0f;
yrot = heading;
}
#endif
/* Switch to the blended polygon list if needed */
if(blend) {
glEnable(GL_BLEND);
glDepthMask(0);
}
else {
glDisable(GL_BLEND);
glDepthMask(1);
}
return 1;
}
int main(int argc, char **argv) {
printf("nehe10 beginning\n");
/* Get basic stuff initialized */
glKosInit();
InitGL(640, 480);
while(1) {
if (!ReadController())
break;
DrawGLScene();
}
return 0;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

View File

@ -0,0 +1,160 @@
NUMPOLLIES 36
// Floor 1
-3.0 0.0 -3.0 0.0 6.0
-3.0 0.0 3.0 0.0 0.0
3.0 0.0 3.0 6.0 0.0
-3.0 0.0 -3.0 0.0 6.0
3.0 0.0 -3.0 6.0 6.0
3.0 0.0 3.0 6.0 0.0
// Ceiling 1
-3.0 1.0 -3.0 0.0 6.0
-3.0 1.0 3.0 0.0 0.0
3.0 1.0 3.0 6.0 0.0
-3.0 1.0 -3.0 0.0 6.0
3.0 1.0 -3.0 6.0 6.0
3.0 1.0 3.0 6.0 0.0
// A1
-2.0 1.0 -2.0 0.0 1.0
-2.0 0.0 -2.0 0.0 0.0
-0.5 0.0 -2.0 1.5 0.0
-2.0 1.0 -2.0 0.0 1.0
-0.5 1.0 -2.0 1.5 1.0
-0.5 0.0 -2.0 1.5 0.0
// A2
2.0 1.0 -2.0 2.0 1.0
2.0 0.0 -2.0 2.0 0.0
0.5 0.0 -2.0 0.5 0.0
2.0 1.0 -2.0 2.0 1.0
0.5 1.0 -2.0 0.5 1.0
0.5 0.0 -2.0 0.5 0.0
// B1
-2.0 1.0 2.0 2.0 1.0
-2.0 0.0 2.0 2.0 0.0
-0.5 0.0 2.0 0.5 0.0
-2.0 1.0 2.0 2.0 1.0
-0.5 1.0 2.0 0.5 1.0
-0.5 0.0 2.0 0.5 0.0
// B2
2.0 1.0 2.0 2.0 1.0
2.0 0.0 2.0 2.0 0.0
0.5 0.0 2.0 0.5 0.0
2.0 1.0 2.0 2.0 1.0
0.5 1.0 2.0 0.5 1.0
0.5 0.0 2.0 0.5 0.0
// C1
-2.0 1.0 -2.0 0.0 1.0
-2.0 0.0 -2.0 0.0 0.0
-2.0 0.0 -0.5 1.5 0.0
-2.0 1.0 -2.0 0.0 1.0
-2.0 1.0 -0.5 1.5 1.0
-2.0 0.0 -0.5 1.5 0.0
// C2
-2.0 1.0 2.0 2.0 1.0
-2.0 0.0 2.0 2.0 0.0
-2.0 0.0 0.5 0.5 0.0
-2.0 1.0 2.0 2.0 1.0
-2.0 1.0 0.5 0.5 1.0
-2.0 0.0 0.5 0.5 0.0
// D1
2.0 1.0 -2.0 0.0 1.0
2.0 0.0 -2.0 0.0 0.0
2.0 0.0 -0.5 1.5 0.0
2.0 1.0 -2.0 0.0 1.0
2.0 1.0 -0.5 1.5 1.0
2.0 0.0 -0.5 1.5 0.0
// D2
2.0 1.0 2.0 2.0 1.0
2.0 0.0 2.0 2.0 0.0
2.0 0.0 0.5 0.5 0.0
2.0 1.0 2.0 2.0 1.0
2.0 1.0 0.5 0.5 1.0
2.0 0.0 0.5 0.5 0.0
// Upper hallway - L
-0.5 1.0 -3.0 0.0 1.0
-0.5 0.0 -3.0 0.0 0.0
-0.5 0.0 -2.0 1.0 0.0
-0.5 1.0 -3.0 0.0 1.0
-0.5 1.0 -2.0 1.0 1.0
-0.5 0.0 -2.0 1.0 0.0
// Upper hallway - R
0.5 1.0 -3.0 0.0 1.0
0.5 0.0 -3.0 0.0 0.0
0.5 0.0 -2.0 1.0 0.0
0.5 1.0 -3.0 0.0 1.0
0.5 1.0 -2.0 1.0 1.0
0.5 0.0 -2.0 1.0 0.0
// Lower hallway - L
-0.5 1.0 3.0 0.0 1.0
-0.5 0.0 3.0 0.0 0.0
-0.5 0.0 2.0 1.0 0.0
-0.5 1.0 3.0 0.0 1.0
-0.5 1.0 2.0 1.0 1.0
-0.5 0.0 2.0 1.0 0.0
// Lower hallway - R
0.5 1.0 3.0 0.0 1.0
0.5 0.0 3.0 0.0 0.0
0.5 0.0 2.0 1.0 0.0
0.5 1.0 3.0 0.0 1.0
0.5 1.0 2.0 1.0 1.0
0.5 0.0 2.0 1.0 0.0
// Left hallway - Lw
-3.0 1.0 0.5 1.0 1.0
-3.0 0.0 0.5 1.0 0.0
-2.0 0.0 0.5 0.0 0.0
-3.0 1.0 0.5 1.0 1.0
-2.0 1.0 0.5 0.0 1.0
-2.0 0.0 0.5 0.0 0.0
// Left hallway - Hi
-3.0 1.0 -0.5 1.0 1.0
-3.0 0.0 -0.5 1.0 0.0
-2.0 0.0 -0.5 0.0 0.0
-3.0 1.0 -0.5 1.0 1.0
-2.0 1.0 -0.5 0.0 1.0
-2.0 0.0 -0.5 0.0 0.0
// Right hallway - Lw
3.0 1.0 0.5 1.0 1.0
3.0 0.0 0.5 1.0 0.0
2.0 0.0 0.5 0.0 0.0
3.0 1.0 0.5 1.0 1.0
2.0 1.0 0.5 0.0 1.0
2.0 0.0 0.5 0.0 0.0
// Right hallway - Hi
3.0 1.0 -0.5 1.0 1.0
3.0 0.0 -0.5 1.0 0.0
2.0 0.0 -0.5 0.0 0.0
3.0 1.0 -0.5 1.0 1.0
2.0 1.0 -0.5 0.0 1.0
2.0 0.0 -0.5 0.0 0.0

View File

@ -20,6 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
glEnable(GL_DEPTH_TEST); // Enables Depth Testing
glShadeModel(GL_SMOOTH); // Enables Smooth Color Shading
glEnable(GL_TEXTURE_2D);
glEnable(GL_CULL_FACE);
glMatrixMode(GL_PROJECTION);
glLoadIdentity(); // Reset The Projection Matrix