Huge refactor of the drawing code

This commit is contained in:
Luke Benstead 2019-03-24 08:09:02 +00:00
parent 3cb24c4fb4
commit 5e6927d9a1
5 changed files with 387 additions and 201 deletions

124
GL/clip.c
View File

@ -1,6 +1,7 @@
#include <float.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#ifdef _arch_dreamcast
#include <dc/pvr.h>
@ -9,7 +10,7 @@
#endif
#include "profiler.h"
#include "clip.h"
#include "private.h"
#include "../containers/aligned_vector.h"
@ -23,8 +24,8 @@ void _glEnableClipping(unsigned char v) {
ZCLIP_ENABLED = v;
}
void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math")));
void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) {
void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math")));
void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) {
const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow
*t = (NEAR_PLANE - v1->w) / (v2->w - v1->w);
@ -73,13 +74,22 @@ static inline void interpolateColour(const uint8_t* v1, const uint8_t* v2, const
const uint32_t VERTEX_CMD_EOL = 0xf0000000;
const uint32_t VERTEX_CMD = 0xe0000000;
void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) __attribute__((optimize("fast-math")));
void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) {
uint8_t i, c = 0;
typedef struct {
ClipVertex vertex[3];
VertexExtra extra[3];
uint8_t visible;
} Triangle;
void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) __attribute__((optimize("fast-math")));
void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) {
uint8_t i, c = 0;
uint8_t lastVisible = 255;
ClipVertex* last = NULL;
VertexExtra* veLast = NULL;
const ClipVertex* vertices = triangle->vertex;
const VertexExtra* extras = triangle->extra;
/* Used when flat shading is enabled */
uint32_t finalColour = *((uint32_t*) vertices[2].bgra);
@ -88,6 +98,8 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect
uint8_t thisIndex = (i == 3) ? 0 : i;
ClipVertex next;
VertexExtra veNext;
next.flags = VERTEX_CMD;
uint8_t thisVisible = (visible & (1 << (2 - thisIndex))) > 0;
@ -97,13 +109,18 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect
if(lastVisible < 255 && lastVisible != thisVisible) {
const ClipVertex* v1 = &vertices[lastIndex];
const ClipVertex* v2 = &vertices[thisIndex];
const VertexExtra* ve1 = &extras[lastIndex];
const VertexExtra* ve2 = &extras[thisIndex];
float t;
clipLineToNearZ(v1, v2, &next, &t);
_glClipLineToNearZ(v1, v2, &next, &t);
interpolateFloat(v1->w, v2->w, t, &next.w);
interpolateVec3(v1->nxyz, v2->nxyz, t, next.nxyz);
interpolateVec2(v1->uv, v2->uv, t, next.uv);
interpolateVec2(v1->st, v2->st, t, next.st);
interpolateVec3(ve1->nxyz, ve2->nxyz, t, veNext.nxyz);
interpolateVec2(ve1->st, ve2->st, t, veNext.st);
if(flatShade) {
*((uint32_t*) next.bgra) = finalColour;
@ -111,15 +128,22 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect
interpolateColour(v1->bgra, v2->bgra, t, next.bgra);
}
last = aligned_vector_push_back(output, &next, 1);
/* Push back the new vertices to the end of both the ClipVertex and VertexExtra lists */
last = aligned_vector_push_back(&target->output->vector, &next, 1);
last->flags = VERTEX_CMD;
veLast = aligned_vector_push_back(target->extras, &veNext, 1);
++c;
}
}
if(thisVisible && i != 3) {
last = aligned_vector_push_back(output, &vertices[thisIndex], 1);
last = aligned_vector_push_back(&target->output->vector, &vertices[thisIndex], 1);
last->flags = VERTEX_CMD;
veLast = aligned_vector_push_back(target->extras, &extras[thisIndex], 1);
++c;
}
@ -134,13 +158,21 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect
newVerts[1] = *(last - 1);
newVerts[2] = *(last);
VertexExtra newExtras[3];
newExtras[0] = *(veLast - 3);
newExtras[1] = *(veLast - 1);
newExtras[2] = *(veLast);
(last - 1)->flags = VERTEX_CMD_EOL;
newVerts[0].flags = VERTEX_CMD;
newVerts[1].flags = VERTEX_CMD;
newVerts[2].flags = VERTEX_CMD_EOL;
aligned_vector_resize(output, output->size - 1);
aligned_vector_push_back(output, newVerts, 3);
aligned_vector_resize(&target->output->vector, target->output->vector.size - 1);
aligned_vector_push_back(&target->output->vector, newVerts, 3);
aligned_vector_resize(target->extras, target->extras->size - 1);
aligned_vector_push_back(target->extras, newExtras, 3);
} else {
last->flags = VERTEX_CMD_EOL;
}
@ -161,40 +193,39 @@ static inline void markDead(ClipVertex* vert) {
#define B011 3
#define B110 6
void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade) {
/* Room for clipping 16 triangles */
typedef struct {
ClipVertex vertex[3];
uint8_t visible;
} Triangle;
#define MAX_CLIP_TRIANGLES 255
static Triangle TO_CLIP[256];
void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
static Triangle TO_CLIP[MAX_CLIP_TRIANGLES];
static uint8_t CLIP_COUNT = 0;
CLIP_COUNT = 0;
uint32_t i = 0;
/* Skip the header */
ClipVertex* vertex = _glSubmissionTargetStart(target);
const ClipVertex* end = _glSubmissionTargetEnd(target);
const ClipVertex* start = vertex;
assert(offset < vertices->size);
ClipVertex* header = (ClipVertex*) aligned_vector_at(vertices, offset);
ClipVertex* vertex = header + 1;
int32_t triangle = -1;
uint32_t count = vertices->size - offset;
/* Go to the (potential) end of the first triangle */
vertex++;
int32_t triangle = 0;
uint32_t vi1, vi2, vi3;
/* Start at 3 due to the header */
for(i = 3; i < count; ++i, ++triangle) {
assert(offset + i < vertices->size);
vertex = aligned_vector_at(vertices, offset + i);
while(vertex < end) {
vertex++;
triangle++;
uint8_t even = (triangle % 2) == 0;
ClipVertex* v1 = (even) ? vertex - 2 : vertex - 1;
ClipVertex* v2 = (even) ? vertex - 1 : vertex - 2;
ClipVertex* v3 = vertex;
/* Indexes into extras array */
vi1 = v1 - start;
vi2 = v2 - start;
vi3 = v3 - start;
/* Skip ahead if we don't have a complete triangle yet */
if(v1->flags != VERTEX_CMD || v2->flags != VERTEX_CMD) {
triangle = -1;
@ -249,10 +280,17 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS
case B101:
case B011:
case B110:
assert(CLIP_COUNT < MAX_CLIP_TRIANGLES);
/* Store the triangle for clipping */
TO_CLIP[CLIP_COUNT].vertex[0] = *v1;
TO_CLIP[CLIP_COUNT].vertex[1] = *v2;
TO_CLIP[CLIP_COUNT].vertex[2] = *v3;
TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi1);
TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2);
TO_CLIP[CLIP_COUNT].extra[2] = *(VertexExtra*) aligned_vector_at(target->extras, vi3);
TO_CLIP[CLIP_COUNT].visible = visible;
++CLIP_COUNT;
@ -287,6 +325,15 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS
triangle = -1;
} else {
/* FIXME: ?
* This situation doesn't actually seem possible, we always clip from one end
* of the triangle strip to the other, so we're never going to hit the plane in the
* middle of the strip (with previous/next unhandled tris).
*
* Uncomment if this actually happens */
assert(0 && "Not Implemented (see comment)");
/*
ClipVertex* v4 = vertex + 1;
TO_CLIP[CLIP_COUNT].vertex[0] = *v3;
@ -298,26 +345,24 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS
TO_CLIP[CLIP_COUNT].visible = visible;
++CLIP_COUNT;
/* Restart strip */
// Restart strip
triangle = -1;
/* Mark the second vertex as the end of the strip */
// Mark the second vertex as the end of the strip
(vertex - 1)->flags = VERTEX_CMD_EOL;
if(v4->flags == VERTEX_CMD_EOL) {
markDead(vertex);
markDead(v4);
} else {
/* Swap the next vertices to start a new strip */
// Swap the next vertices to start a new strip
ClipVertex tmp = *vertex;
*vertex = *v4;
*v4 = tmp;
vertex->flags = VERTEX_CMD;
v4->flags = VERTEX_CMD;
}
i += 1;
} */
}
break;
default:
@ -326,7 +371,8 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS
}
/* Now, clip all the triangles and append them to the output */
GLushort i;
for(i = 0; i < CLIP_COUNT; ++i) {
clipTriangle(TO_CLIP[i].vertex, TO_CLIP[i].visible, vertices, fladeShade);
_glClipTriangle(&TO_CLIP[i], TO_CLIP[i].visible, target, fladeShade);
}
}

View File

@ -1,49 +0,0 @@
#ifndef CLIP_H
#define CLIP_H
#include <stdint.h>
#include "../containers/aligned_vector.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
CLIP_RESULT_ALL_IN_FRONT,
CLIP_RESULT_ALL_BEHIND,
CLIP_RESULT_ALL_ON_PLANE,
CLIP_RESULT_FRONT_TO_BACK,
CLIP_RESULT_BACK_TO_FRONT
} ClipResult;
#define A8IDX 3
#define R8IDX 2
#define G8IDX 1
#define B8IDX 0
typedef struct {
/* Same 32 byte layout as pvr_vertex_t */
uint32_t flags;
float xyz[3];
float uv[2];
uint8_t bgra[4];
uint32_t oargb;
/* Important, we have 24 bytes here. That means when submitting to the SQs we need to
* increment the pointer by 6 */
float nxyz[3]; /* Normal */
float w;
float st[2];
} ClipVertex;
void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t);
void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade);
#ifdef __cplusplus
}
#endif
#endif // CLIP_H

349
GL/draw.c
View File

@ -81,7 +81,21 @@ static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride
}
}
static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) {
/* VE == VertexExtra */
static void _readVertexData3f3fVE(const float* input, GLuint count, GLubyte stride, float* output) {
const float* end = (float*) (((GLubyte*) input) + (count * stride));
while(input < end) {
output[0] = input[0];
output[1] = input[1];
output[2] = input[2];
input = (float*) (((GLubyte*) input) + stride);
output = (float*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) {
const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride);
while(input < end) {
@ -94,7 +108,20 @@ static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte st
}
}
static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) {
static void _readVertexData3us3fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) {
const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0];
output[1] = input[1];
output[2] = input[2];
input = (GLushort*) (((GLubyte*) input) + stride);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) {
const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride);
while(input < end) {
@ -107,6 +134,19 @@ static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stri
}
}
static void _readVertexData3ui3fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) {
const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0];
output[1] = input[1];
output[2] = input[2];
input = (GLuint*) (((GLubyte*) input) + stride);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
const GLubyte* end = ((GLubyte*) input) + (count * stride);
@ -121,6 +161,20 @@ static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte str
}
}
static void _readVertexData3ub3fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
const GLubyte* end = ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE;
input += stride;
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride, float* output) {
const float* end = (float*) ((GLubyte*) input) + (count * stride);
@ -133,6 +187,18 @@ static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride
}
}
static void _readVertexData2f2fVE(const float* input, GLuint count, GLubyte stride, GLfloat* output) {
const float* end = (float*) ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0];
output[1] = input[1];
input = (float*) (((GLubyte*) input) + stride);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride, float* output) {
const float* end = (float*) ((GLubyte*) input) + (count * stride);
@ -185,6 +251,18 @@ static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte st
}
}
static void _readVertexData2us2fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) {
const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0];
output[1] = input[1];
input = (GLushort*) (((GLubyte*) input) + stride);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stride, float* output) {
const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride);
@ -197,6 +275,18 @@ static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stri
}
}
static void _readVertexData2ui2fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) {
const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0];
output[1] = input[1];
input = (GLuint*) (((GLubyte*) input) + stride);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte stride, float* output) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride);
@ -210,6 +300,19 @@ static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte str
}
}
static void _readVertexData2ub2fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride);
while(input < end) {
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
input = (((GLubyte*) input) + stride);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) {
const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride);
@ -279,13 +382,12 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s
}
}
static void _fillWithNegZ(GLuint count, GLfloat* output) {
const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (sizeof(ClipVertex) * count);
static void _fillWithNegZVE(GLuint count, GLfloat* output) {
const GLfloat* end = output + (count * 3);
while(output < end) {
output[0] = output[1] = 0.0f;
output[2] = -1.0f;
output += sizeof(ClipVertex);
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
@ -303,10 +405,18 @@ static void _fillWhiteARGB(GLuint count, GLubyte* output) {
}
static void _fillZero2f(GLuint count, GLfloat* output) {
const GLfloat* end = output + (sizeof(ClipVertex) * count);
const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (count * sizeof(ClipVertex));
while(output < end) {
output[0] = output[1] = 0.0f;
output += sizeof(ClipVertex);
output = (GLfloat*) (((GLubyte*) output) + sizeof(ClipVertex));
}
}
static void _fillZero2fVE(GLuint count, GLfloat* output) {
const GLfloat* end = output + (2 * count);
while(output < end) {
output[0] = output[1] = 0.0f;
output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra));
}
}
@ -433,6 +543,18 @@ do { \
*b = temp; \
} while(0)
PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) {
return aligned_vector_at(&target->output->vector, target->header_offset);
}
ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target) {
return aligned_vector_at(&target->output->vector, target->start_offset);
}
ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target) {
return _glSubmissionTargetStart(target) + target->count;
}
static inline void genTriangles(ClipVertex* output, GLuint count) {
const ClipVertex* end = output + count;
ClipVertex* it = output + 2;
@ -571,9 +693,10 @@ static inline void _readUVData(const GLuint first, const GLuint count, ClipVerte
}
}
static inline void _readSTData(const GLuint first, const GLuint count, ClipVertex* output) {
static inline void _readSTData(const GLuint first, const GLuint count, SubmissionTarget* target) {
if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) {
_fillZero2f(count, output->st);
VertexExtra* extra = aligned_vector_at(target->extras, 0);
_fillZero2fVE(count, extra->st);
return;
}
@ -581,21 +704,22 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte
const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride));
if(ST_POINTER.size == 2) {
VertexExtra* extra = aligned_vector_at(target->extras, 0);
switch(ST_POINTER.type) {
case GL_FLOAT:
_readVertexData2f2f(stptr, count, ststride, output[0].st);
_readVertexData2f2fVE(stptr, count, ststride, extra->st);
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
_readVertexData2ub2f(stptr, count, ststride, output[0].st);
_readVertexData2ub2fVE(stptr, count, ststride, extra->st);
break;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
_readVertexData2us2f(stptr, count, ststride, output[0].st);
_readVertexData2us2fVE(stptr, count, ststride, extra->st);
break;
case GL_INT:
case GL_UNSIGNED_INT:
_readVertexData2ui2f(stptr, count, ststride, output[0].st);
_readVertexData2ui2fVE(stptr, count, ststride, extra->st);
break;
default:
assert(0 && "Not Implemented");
@ -605,9 +729,10 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte
}
}
static inline void _readNormalData(const GLuint first, const GLuint count, ClipVertex* output) {
static inline void _readNormalData(const GLuint first, const GLuint count, SubmissionTarget* target) {
if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) {
_fillWithNegZ(count, output->nxyz);
VertexExtra* extra = aligned_vector_at(target->extras, 0);
_fillWithNegZVE(count, extra->nxyz);
return;
}
@ -615,21 +740,22 @@ static inline void _readNormalData(const GLuint first, const GLuint count, ClipV
const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride));
if(NORMAL_POINTER.size == 3) {
VertexExtra* extra = aligned_vector_at(target->extras, 0);
switch(NORMAL_POINTER.type) {
case GL_FLOAT:
_readVertexData3f3f(nptr, count, nstride, output[0].nxyz);
_readVertexData3f3fVE(nptr, count, nstride, extra->nxyz);
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
_readVertexData3ub3f(nptr, count, nstride, output[0].nxyz);
_readVertexData3ub3fVE(nptr, count, nstride, extra->nxyz);
break;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
_readVertexData3us3f(nptr, count, nstride, output[0].nxyz);
_readVertexData3us3fVE(nptr, count, nstride, extra->nxyz);
break;
case GL_INT:
case GL_UNSIGNED_INT:
_readVertexData3ui3f(nptr, count, nstride, output[0].nxyz);
_readVertexData3ui3fVE(nptr, count, nstride, extra->nxyz);
break;
default:
assert(0 && "Not Implemented");
@ -694,7 +820,7 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Clip
}
}
static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLuint count,
static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count,
const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) {
/* Read from the client buffers and generate an array of ClipVertices */
@ -703,14 +829,15 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first,
const ClipVertex* end;
if(!indices) {
_readPositionData(first, count, output);
_readDiffuseData(first, count, output);
if(doTexture) _readUVData(first, count, output);
if(doLighting) _readNormalData(first, count, output);
if(doTexture && doMultitexture) _readSTData(first, count, output);
_readPositionData(first, count, _glSubmissionTargetStart(target));
_readDiffuseData(first, count, _glSubmissionTargetStart(target));
if(doTexture) _readUVData(first, count, _glSubmissionTargetStart(target));
if(doLighting) _readNormalData(first, count, target);
if(doTexture && doMultitexture) _readSTData(first, count, target);
it = _glSubmissionTargetStart(target);
end = _glSubmissionTargetEnd(target);
it = output;
end = output + count;
while(it < end) {
(it++)->flags = PVR_CMD_VERTEX;
}
@ -718,25 +845,26 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first,
// Drawing arrays
switch(mode) {
case GL_TRIANGLES:
genTriangles(output, count);
genTriangles(_glSubmissionTargetStart(target), count);
break;
case GL_QUADS:
genQuads(output, count);
genQuads(_glSubmissionTargetStart(target), count);
break;
case GL_POLYGON:
case GL_TRIANGLE_FAN:
genTriangleFan(output, count);
genTriangleFan(_glSubmissionTargetStart(target), count);
break;
case GL_TRIANGLE_STRIP:
genTriangleStrip(output, count);
genTriangleStrip(_glSubmissionTargetStart(target), count);
break;
default:
assert(0 && "Not Implemented");
}
} else {
const IndexParseFunc indexFunc = _calcParseIndexFunc(type);
it = output;
end = output + count;
it = _glSubmissionTargetStart(target);
end = _glSubmissionTargetEnd(target);
GLuint j;
const GLubyte* idx = indices;
while(it < end) {
@ -744,31 +872,33 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first,
_readPositionData(j, 1, it);
_readDiffuseData(j, 1, it);
if(doTexture) _readUVData(j, 1, it);
if(doLighting) _readNormalData(j, 1, it);
if(doTexture && doMultitexture) _readSTData(j, 1, it);
//FIXME: Need to think about how we can share this */
//if(doLighting) _readNormalData(j, 1, it);
//if(doTexture && doMultitexture) _readSTData(j, 1, it);
++it;
idx += istride;
}
it = output;
it = _glSubmissionTargetStart(target);
while(it < end) {
(it++)->flags = PVR_CMD_VERTEX;
}
it = _glSubmissionTargetStart(target);
// Drawing arrays
switch(mode) {
case GL_TRIANGLES:
genTriangles(output, count);
genTriangles(it, count);
break;
case GL_QUADS:
genQuads(output, count);
genQuads(it, count);
break;
case GL_POLYGON:
case GL_TRIANGLE_FAN:
genTriangleFan(output, count);
genTriangleFan(it, count);
break;
case GL_TRIANGLE_STRIP:
genTriangleStrip(output, count);
genTriangleStrip(it, count);
break;
default:
assert(0 && "Not Implemented");
@ -776,15 +906,14 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first,
}
}
static void transform(ClipVertex* output, const GLuint count) {
static void transform(SubmissionTarget* target) {
/* Perform modelview transform, storing W */
ClipVertex* vertex = output;
ClipVertex* vertex = _glSubmissionTargetStart(target);
const ClipVertex* end = _glSubmissionTargetEnd(target);
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
GLsizei i = count;
while(i--) {
while(vertex < end) {
register float __x __asm__("fr12") = (vertex->xyz[0]);
register float __y __asm__("fr13") = (vertex->xyz[1]);
register float __z __asm__("fr14") = (vertex->xyz[2]);
@ -801,17 +930,16 @@ static void transform(ClipVertex* output, const GLuint count) {
vertex->xyz[1] = __y;
vertex->xyz[2] = __z;
vertex->w = __w;
++vertex;
}
}
static GLsizei clip(AlignedVector* polylist, uint32_t offset, const GLuint count) {
static void clip(SubmissionTarget* target) {
/* Perform clipping, generating new vertices as necessary */
clipTriangleStrip2(polylist, offset, _glGetShadeModel() == GL_FLAT);
_glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT);
/* List size, minus the original offset (which includes the header), minus the header */
return polylist->size - offset - 1;
/* Reset the count now that we may have added vertices */
target->count = target->output->vector.size - target->start_offset;
}
static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
@ -846,7 +974,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u
}
}
static void light(ClipVertex* output, const GLuint count) {
static void light(SubmissionTarget* target) {
if(!_glIsLightingEnabled()) {
return;
}
@ -863,22 +991,23 @@ static void light(ClipVertex* output, const GLuint count) {
aligned_vector_init(eye_space_data, sizeof(EyeSpaceData));
}
aligned_vector_resize(eye_space_data, count);
aligned_vector_resize(eye_space_data, target->count);
/* Perform lighting calculations and manipulate the colour */
ClipVertex* vertex = output;
ClipVertex* vertex = _glSubmissionTargetStart(target);
VertexExtra* extra = aligned_vector_at(target->extras, 0);
EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data;
_glMatrixLoadModelView();
mat_transform3(vertex->xyz, eye_space->xyz, count, sizeof(ClipVertex), sizeof(EyeSpaceData));
mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(ClipVertex), sizeof(EyeSpaceData));
_glMatrixLoadNormal();
mat_transform_normal3(vertex->nxyz, eye_space->n, count, sizeof(ClipVertex), sizeof(EyeSpaceData));
mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData));
GLsizei i;
EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0);
for(i = 0; i < count; ++i, ++vertex, ++ES) {
for(i = 0; i < target->count; ++i, ++vertex, ++ES) {
/* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled
* then the lighting calculation should possibly take it into account */
@ -903,12 +1032,12 @@ static void light(ClipVertex* output, const GLuint count) {
}
}
static void divide(ClipVertex* output, const GLuint count) {
static void divide(SubmissionTarget* target) {
/* Perform perspective divide on each vertex */
ClipVertex* vertex = output;
ClipVertex* vertex = _glSubmissionTargetStart(target);
const ClipVertex* end = _glSubmissionTargetEnd(target);
GLsizei i = count;
while(i--) {
while(vertex < end) {
vertex->xyz[2] = 1.0f / vertex->w;
vertex->xyz[0] *= vertex->xyz[2];
vertex->xyz[1] *= vertex->xyz[2];
@ -926,12 +1055,17 @@ static void push(PVRHeader* header, ClipVertex* output, const GLuint count, Poly
pvr_poly_compile(&header->hdr, &cxt);
/* Post-process the vertex list */
/*
* This is currently unnecessary. aligned_vector memsets the allocated objects
* to zero, and we don't touch oargb, also, we don't *enable* oargb yet in the
* pvr header so it should be ignored anyway. If this ever becomes a problem,
* uncomment this.
ClipVertex* vout = output;
GLuint i = count;
while(i--) {
const ClipVertex* end = output + count;
while(vout < end) {
vout->oargb = 0;
}
*/
}
#define DEBUG_CLIPPING 0
@ -942,6 +1076,21 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type
return;
}
static SubmissionTarget* target = NULL;
static AlignedVector extras;
/* Initialization of the target and extras */
if(!target) {
target = (SubmissionTarget*) malloc(sizeof(SubmissionTarget));
target->extras = NULL;
target->count = 0;
target->output = NULL;
target->header_offset = target->start_offset = 0;
aligned_vector_init(&extras, sizeof(VertexExtra));
target->extras = &extras;
}
GLboolean doMultitexture, doTexture, doLighting;
GLint activeTexture;
glGetIntegerv(GL_ACTIVE_TEXTURE_ARB, &activeTexture);
@ -958,39 +1107,32 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type
profiler_push(__func__);
target->output = _glActivePolyList();
target->count = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = target->output->vector.size;
target->start_offset = target->header_offset + 1;
PolyList* activeList = _glActivePolyList();
/* Make sure we have enough room for all the "extra" data */
aligned_vector_resize(&extras, target->count);
/* Make room in the list buffer */
GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
ClipVertex* start = aligned_vector_extend(&activeList->vector, spaceNeeded + 1);
/* Store a pointer to the header for later */
PVRHeader* header = (PVRHeader*) start++;
/* We store an offset to the first ClipVertex because clipping may generate more
* vertices, which may cause a realloc and thus invalidate start and header
* we use this startOffset to reset those pointers after clipping */
uint32_t startOffset = start - (ClipVertex*) activeList->vector.data;
/* Make room for the vertices and header */
aligned_vector_extend(&target->output->vector, target->count + 1);
profiler_checkpoint("allocate");
generate(start, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting);
generate(target, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting);
profiler_checkpoint("generate");
light(start, spaceNeeded);
light(target);
profiler_checkpoint("light");
transform(start, spaceNeeded);
transform(target);
profiler_checkpoint("transform");
if(_glIsClippingEnabled()) {
uint32_t offset = ((start - 1) - (ClipVertex*) activeList->vector.data);
#if DEBUG_CLIPPING
uint32_t i = 0;
fprintf(stderr, "=========\n");
@ -1005,11 +1147,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type
}
#endif
spaceNeeded = clip(&activeList->vector, offset, spaceNeeded);
/* Clipping may have realloc'd so reset the start pointer */
start = ((ClipVertex*) activeList->vector.data) + startOffset;
header = (PVRHeader*) (start - 1); /* Update the header pointer */
clip(target);
#if DEBUG_CLIPPING
fprintf(stderr, "--------\n");
@ -1027,11 +1165,11 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type
profiler_checkpoint("clip");
divide(start, spaceNeeded);
divide(target);
profiler_checkpoint("divide");
push(header, start, spaceNeeded, _glActivePolyList(), 0);
push(_glSubmissionTargetHeader(target), _glSubmissionTargetStart(target), target->count, _glActivePolyList(), 0);
profiler_checkpoint("push");
/*
@ -1042,36 +1180,37 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type
- We want to set the uv coordinates to the passed st ones
*/
TextureObject* texture1 = _glGetTexture1();
/* Multitexture implicitly disabled */
if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) {
doMultitexture = GL_FALSE;
}
if(!doMultitexture) {
/* Multitexture actively disabled */
profiler_pop();
return;
}
TextureObject* texture1 = _glGetTexture1();
if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) {
/* Multitexture implicitly disabled */
profiler_pop();
return;
}
/* Push back a copy of the list to the transparent poly list, including the header
(hence the - 1)
(hence the + 1)
*/
ClipVertex* vertex = aligned_vector_push_back(
&_glTransparentPolyList()->vector, start - 1, spaceNeeded + 1
&_glTransparentPolyList()->vector, (ClipVertex*) _glSubmissionTargetHeader(target), target->count + 1
);
PVRHeader* mtHeader = (PVRHeader*) vertex++;
ClipVertex* mtStart = vertex;
/* Copy ST coordinates to UV ones */
GLsizei i = spaceNeeded;
while(i--) {
vertex->uv[0] = vertex->st[0];
vertex->uv[1] = vertex->st[1];
/* Replace the UV coordinates with the ST ones */
const VertexExtra* end = aligned_vector_back(target->extras) + 1;
VertexExtra* ve = aligned_vector_at(target->extras, 0);
while(ve < end) {
vertex->uv[0] = ve->st[0];
vertex->uv[1] = ve->st[1];
++vertex;
++ve;
}
/* Store state, as we're about to mess around with it */
@ -1088,7 +1227,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
/* Send the buffer again to the transparent list */
push(mtHeader, mtStart, spaceNeeded, _glTransparentPolyList(), 1);
push(mtHeader, mtStart, target->count, _glTransparentPolyList(), 1);
/* Reset state */
glDepthFunc(depthFunc);

View File

@ -31,7 +31,6 @@ static void pvr_list_submit(void *src, int n) {
d[7] = *(s++);
__asm__("pref @%0" : : "r"(d));
d += 8;
s += CLIP_VERTEX_INT_PADDING;
}
/* Wait for both store queues to complete */

View File

@ -1,10 +1,11 @@
#ifndef PRIVATE_H
#define PRIVATE_H
#include <stdint.h>
#include "../include/gl.h"
#include "../containers/aligned_vector.h"
#include "../containers/named_array.h"
#include "./clip.h"
#define TRACE_ENABLED 0
#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);}
@ -17,11 +18,8 @@
#define MAX_TEXTURE_SIZE 1024
#define CLIP_VERTEX_INT_PADDING 6
typedef struct {
pvr_poly_hdr_t hdr;
unsigned int padding[CLIP_VERTEX_INT_PADDING];
} PVRHeader;
typedef struct {
@ -31,9 +29,6 @@ typedef struct {
sy, /* Start y */
ex, /* End x */
ey; /* End y */
/* Padding to match clip vertex */
unsigned int padding[CLIP_VERTEX_INT_PADDING];
} PVRTileClipCommand; /* Tile Clip command for the pvr */
typedef struct {
@ -97,6 +92,62 @@ typedef struct {
GLboolean is_directional;
} LightSource;
typedef struct {
/* Same 32 byte layout as pvr_vertex_t */
uint32_t flags;
float xyz[3];
float uv[2];
uint8_t bgra[4];
/* In the pvr_vertex_t structure, this next 4 bytes is oargb
* but we're not using that for now, so having W here makes the code
* simpler */
float w;
} ClipVertex;
/* ClipVertex doesn't have room for these, so we need to parse them
* out separately. Potentially 'w' will be housed here if we support oargb */
typedef struct {
float nxyz[3];
float st[2];
} VertexExtra;
/* Generating PVR vertices from the user-submitted data gets complicated, particularly
* when a realloc could invalidate pointers. This structure holds all the information
* we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.)
*/
typedef struct {
PolyList* output;
uint32_t header_offset; // The offset of the header in the output list
uint32_t start_offset; // The offset into the output list
uint32_t count; // The number of vertices in this output
/* Pointer to count * VertexExtra; */
AlignedVector* extras;
} SubmissionTarget;
PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target);
ClipVertex* _glSubmissionTargetStart(SubmissionTarget* target);
ClipVertex* _glSubmissionTargetEnd(SubmissionTarget* target);
typedef enum {
CLIP_RESULT_ALL_IN_FRONT,
CLIP_RESULT_ALL_BEHIND,
CLIP_RESULT_ALL_ON_PLANE,
CLIP_RESULT_FRONT_TO_BACK,
CLIP_RESULT_BACK_TO_FRONT
} ClipResult;
#define A8IDX 3
#define R8IDX 2
#define G8IDX 1
#define B8IDX 0
struct SubmissionTarget;
void _glClipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t);
void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade);
PolyList *_glActivePolyList();
PolyList *_glTransparentPolyList();