Use fat 64 byte vertices to improve data locality
This commit is contained in:
parent
8c187f67de
commit
36fe13095c
58
GL/clip.c
58
GL/clip.c
|
@ -85,16 +85,12 @@ const uint32_t VERTEX_CMD = 0xe0000000;
|
|||
|
||||
typedef struct {
|
||||
Vertex vertex[3];
|
||||
VertexExtra extra[3];
|
||||
uint8_t visible;
|
||||
} Triangle;
|
||||
|
||||
void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) {
|
||||
Vertex* last = NULL;
|
||||
VertexExtra* veLast = NULL;
|
||||
|
||||
const Vertex* vertices = triangle->vertex;
|
||||
const VertexExtra* extras = triangle->extra;
|
||||
|
||||
char* bgra = (char*) vertices[2].bgra;
|
||||
|
||||
|
@ -102,25 +98,22 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
|
|||
uint32_t finalColour = *((uint32_t*) bgra);
|
||||
|
||||
Vertex tmp;
|
||||
VertexExtra veTmp;
|
||||
|
||||
uint8_t pushedCount = 0;
|
||||
|
||||
#define IS_VISIBLE(x) (visible & (1 << (2 - (x)))) > 0
|
||||
|
||||
#define PUSH_VERT(vert, ve) \
|
||||
#define PUSH_VERT(vert) \
|
||||
last = aligned_vector_push_back(&target->output->vector, vert, 1); \
|
||||
last->flags = VERTEX_CMD; \
|
||||
veLast = aligned_vector_push_back(target->extras, ve, 1); \
|
||||
++pushedCount;
|
||||
|
||||
#define CLIP_TO_PLANE(vert1, ve1, vert2, ve2) \
|
||||
#define CLIP_TO_PLANE(vert1, vert2) \
|
||||
do { \
|
||||
float t = _glClipLineToNearZ((vert1), (vert2), &tmp); \
|
||||
interpolateFloat((vert1)->w, (vert2)->w, t, &tmp.w); \
|
||||
interpolateVec2((vert1)->uv, (vert2)->uv, t, tmp.uv); \
|
||||
interpolateVec3((ve1)->nxyz, (ve2)->nxyz, t, veTmp.nxyz); \
|
||||
interpolateVec2((ve1)->st, (ve2)->st, t, veTmp.st); \
|
||||
interpolateVec3((vert1)->nxyz, (vert2)->nxyz, t, tmp.nxyz); \
|
||||
interpolateVec2((vert1)->st, (vert2)->st, t, tmp.st); \
|
||||
if(flatShade) { \
|
||||
interpolateColour((const uint8_t*) &finalColour, (const uint8_t*) &finalColour, t, tmp.bgra); \
|
||||
} else { interpolateColour((vert1)->bgra, (vert2)->bgra, t, tmp.bgra); } \
|
||||
|
@ -130,44 +123,38 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
|
|||
uint8_t v1 = IS_VISIBLE(1);
|
||||
uint8_t v2 = IS_VISIBLE(2);
|
||||
if(v0) {
|
||||
PUSH_VERT(&vertices[0], &extras[0]);
|
||||
PUSH_VERT(&vertices[0]);
|
||||
}
|
||||
|
||||
if(v0 != v1) {
|
||||
CLIP_TO_PLANE(&vertices[0], &extras[0], &vertices[1], &extras[1]);
|
||||
PUSH_VERT(&tmp, &veTmp);
|
||||
CLIP_TO_PLANE(&vertices[0], &vertices[1]);
|
||||
PUSH_VERT(&tmp);
|
||||
}
|
||||
|
||||
if(v1) {
|
||||
PUSH_VERT(&vertices[1], &extras[1]);
|
||||
PUSH_VERT(&vertices[1]);
|
||||
}
|
||||
|
||||
if(v1 != v2) {
|
||||
CLIP_TO_PLANE(&vertices[1], &extras[1], &vertices[2], &extras[2]);
|
||||
PUSH_VERT(&tmp, &veTmp);
|
||||
CLIP_TO_PLANE(&vertices[1], &vertices[2]);
|
||||
PUSH_VERT(&tmp);
|
||||
}
|
||||
|
||||
if(v2) {
|
||||
PUSH_VERT(&vertices[2], &extras[2]);
|
||||
PUSH_VERT(&vertices[2]);
|
||||
}
|
||||
|
||||
if(v2 != v0) {
|
||||
CLIP_TO_PLANE(&vertices[2], &extras[2], &vertices[0], &extras[0]);
|
||||
PUSH_VERT(&tmp, &veTmp);
|
||||
CLIP_TO_PLANE(&vertices[2], &vertices[0]);
|
||||
PUSH_VERT(&tmp);
|
||||
}
|
||||
|
||||
if(pushedCount == 4) {
|
||||
Vertex* prev = last - 1;
|
||||
VertexExtra* prevVe = veLast - 1;
|
||||
|
||||
tmp = *prev;
|
||||
veTmp = *prevVe;
|
||||
|
||||
*prev = *last;
|
||||
*prevVe = *veLast;
|
||||
|
||||
*last = tmp;
|
||||
*veLast = veTmp;
|
||||
|
||||
prev->flags = VERTEX_CMD;
|
||||
last->flags = VERTEX_CMD_EOL;
|
||||
|
@ -309,15 +296,6 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
|
|||
TO_CLIP[CLIP_COUNT].vertex[0] = *v1;
|
||||
TO_CLIP[CLIP_COUNT].vertex[1] = *v2;
|
||||
TO_CLIP[CLIP_COUNT].vertex[2] = *v3;
|
||||
|
||||
VertexExtra* ve1 = (VertexExtra*) aligned_vector_at(target->extras, vi1);
|
||||
VertexExtra* ve2 = (VertexExtra*) aligned_vector_at(target->extras, vi2);
|
||||
VertexExtra* ve3 = (VertexExtra*) aligned_vector_at(target->extras, vi3);
|
||||
|
||||
TO_CLIP[CLIP_COUNT].extra[0] = *ve1;
|
||||
TO_CLIP[CLIP_COUNT].extra[1] = *ve2;
|
||||
TO_CLIP[CLIP_COUNT].extra[2] = *ve3;
|
||||
|
||||
TO_CLIP[CLIP_COUNT].visible = visible;
|
||||
++CLIP_COUNT;
|
||||
|
||||
|
@ -359,11 +337,6 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
|
|||
TO_CLIP[CLIP_COUNT].vertex[1] = *v2;
|
||||
TO_CLIP[CLIP_COUNT].vertex[2] = *v4;
|
||||
|
||||
VertexExtra* ve4 = (VertexExtra*) aligned_vector_at(target->extras, vi4);
|
||||
TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi3);
|
||||
TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2);
|
||||
TO_CLIP[CLIP_COUNT].extra[2] = *ve4;
|
||||
|
||||
visible = (_VERT_VISIBLE(v3) ? 4 : 0) |
|
||||
(_VERT_VISIBLE(v2) ? 2 : 0) |
|
||||
(_VERT_VISIBLE(v4) ? 1 : 0);
|
||||
|
@ -385,11 +358,6 @@ void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) {
|
|||
swapVertex(v3, v4);
|
||||
v3->flags = VERTEX_CMD;
|
||||
v4->flags = VERTEX_CMD;
|
||||
|
||||
/* Swap the extra data too */
|
||||
VertexExtra t = *ve4;
|
||||
*ve3 = *ve4;
|
||||
*ve4 = t;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
49
GL/draw.c
49
GL/draw.c
|
@ -651,35 +651,35 @@ GL_FORCE_INLINE void _readUVData(const GLuint first, const GLuint count, Vertex*
|
|||
}
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void _readSTData(const GLuint first, const GLuint count, VertexExtra* extra) {
|
||||
GL_FORCE_INLINE void _readSTData(const GLuint first, const GLuint count, Vertex* outpu) {
|
||||
const GLubyte ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
|
||||
const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride));
|
||||
|
||||
ReadUVFunc func = calcReadUVFunc();
|
||||
GLubyte* out = (GLubyte*) extra[0].st;
|
||||
GLubyte* out = (GLubyte*) outpu[0].st;
|
||||
|
||||
ITERATE(count) {
|
||||
func(stptr, out);
|
||||
stptr += ststride;
|
||||
out += sizeof(VertexExtra);
|
||||
out += sizeof(Vertex);
|
||||
}
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void _readNormalData(const GLuint first, const GLuint count, VertexExtra* extra) {
|
||||
GL_FORCE_INLINE void _readNormalData(const GLuint first, const GLuint count, Vertex* output) {
|
||||
const GLuint nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
|
||||
const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride));
|
||||
|
||||
ReadNormalFunc func = calcReadNormalFunc();
|
||||
GLubyte* out = (GLubyte*) extra[0].nxyz;
|
||||
GLubyte* out = (GLubyte*) output[0].nxyz;
|
||||
|
||||
ITERATE(count) {
|
||||
func(nptr, out);
|
||||
nptr += nstride;
|
||||
out += sizeof(VertexExtra);
|
||||
out += sizeof(Vertex);
|
||||
}
|
||||
|
||||
if(_glIsNormalizeEnabled()) {
|
||||
GLubyte* ptr = (GLubyte*) extra->nxyz;
|
||||
GLubyte* ptr = (GLubyte*) output->nxyz;
|
||||
ITERATE(count) {
|
||||
GLfloat* n = (GLfloat*) ptr;
|
||||
float temp = n[0] * n[0];
|
||||
|
@ -691,7 +691,7 @@ GL_FORCE_INLINE void _readNormalData(const GLuint first, const GLuint count, Ver
|
|||
n[1] *= ilength;
|
||||
n[2] *= ilength;
|
||||
|
||||
ptr += sizeof(VertexExtra);
|
||||
ptr += sizeof(Vertex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -724,7 +724,6 @@ static void generateElements(
|
|||
GLubyte* nxyz;
|
||||
|
||||
Vertex* output = _glSubmissionTargetStart(target);
|
||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||
|
||||
uint32_t i = first;
|
||||
uint32_t idx = 0;
|
||||
|
@ -762,12 +761,11 @@ static void generateElements(
|
|||
pos_func(xyz, (GLubyte*) output->xyz);
|
||||
uv_func(uv, (GLubyte*) output->uv);
|
||||
diffuse_func(bgra, output->bgra);
|
||||
st_func(st, (GLubyte*) ve->st);
|
||||
normal_func(nxyz, (GLubyte*) ve->nxyz);
|
||||
st_func(st, (GLubyte*) output->st);
|
||||
normal_func(nxyz, (GLubyte*) output->nxyz);
|
||||
|
||||
output->flags = PVR_CMD_VERTEX;
|
||||
++output;
|
||||
++ve;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -804,10 +802,8 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
|
|||
}
|
||||
}
|
||||
|
||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||
|
||||
_readNormalData(first, count, ve);
|
||||
_readSTData(first, count, ve);
|
||||
_readNormalData(first, count, start);
|
||||
_readSTData(first, count, start);
|
||||
|
||||
} else {
|
||||
generateElements(
|
||||
|
@ -917,14 +913,13 @@ static void light(SubmissionTarget* target) {
|
|||
|
||||
/* Perform lighting calculations and manipulate the colour */
|
||||
Vertex* vertex = _glSubmissionTargetStart(target);
|
||||
VertexExtra* extra = aligned_vector_at(target->extras, 0);
|
||||
EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data;
|
||||
|
||||
_glMatrixLoadModelView();
|
||||
mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(Vertex), sizeof(EyeSpaceData));
|
||||
|
||||
_glMatrixLoadNormal();
|
||||
mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData));
|
||||
mat_transform_normal3(vertex->nxyz, eye_space->n, target->count, sizeof(Vertex), sizeof(EyeSpaceData));
|
||||
|
||||
EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0);
|
||||
_glPerformLighting(vertex, ES, target->count);
|
||||
|
@ -1005,18 +1000,13 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
}
|
||||
|
||||
static SubmissionTarget* target = NULL;
|
||||
static AlignedVector extras;
|
||||
|
||||
/* Initialization of the target and extras */
|
||||
/* Initialization of the target */
|
||||
if(!target) {
|
||||
target = (SubmissionTarget*) malloc(sizeof(SubmissionTarget));
|
||||
target->extras = NULL;
|
||||
target->count = 0;
|
||||
target->output = NULL;
|
||||
target->header_offset = target->start_offset = 0;
|
||||
|
||||
aligned_vector_init(&extras, sizeof(VertexExtra));
|
||||
target->extras = &extras;
|
||||
}
|
||||
|
||||
GLboolean doMultitexture, doTexture, doLighting;
|
||||
|
@ -1057,9 +1047,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
|
||||
assert(target->count);
|
||||
|
||||
/* Make sure we have enough room for all the "extra" data */
|
||||
aligned_vector_resize(&extras, target->count);
|
||||
|
||||
/* Make room for the vertices and header */
|
||||
aligned_vector_extend(&target->output->vector, target->count + 1);
|
||||
generate(target, mode, first, count, (GLubyte*) indices, type);
|
||||
|
@ -1087,8 +1074,6 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
|
||||
clip(target);
|
||||
|
||||
assert(extras.size == target->count);
|
||||
|
||||
#if DEBUG_CLIPPING
|
||||
fprintf(stderr, "--------\n");
|
||||
for(i = 0; i < target->count; ++i) {
|
||||
|
@ -1139,12 +1124,10 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
PVRHeader* mtHeader = (PVRHeader*) vertex++;
|
||||
|
||||
/* Replace the UV coordinates with the ST ones */
|
||||
VertexExtra* ve = aligned_vector_at(target->extras, 0);
|
||||
ITERATE(target->count) {
|
||||
vertex->uv[0] = ve->st[0];
|
||||
vertex->uv[1] = ve->st[1];
|
||||
vertex->uv[0] = vertex->st[0];
|
||||
vertex->uv[1] = vertex->st[1];
|
||||
++vertex;
|
||||
++ve;
|
||||
}
|
||||
|
||||
/* Send the buffer again to the transparent list */
|
||||
|
|
|
@ -15,13 +15,15 @@ static PolyList OP_LIST;
|
|||
static PolyList PT_LIST;
|
||||
static PolyList TR_LIST;
|
||||
|
||||
static const int STRIDE = sizeof(Vertex) / sizeof(GLuint);
|
||||
|
||||
static void pvr_list_submit(void *src, int n) {
|
||||
GLuint *d = TA_SQ_ADDR;
|
||||
GLuint *s = src;
|
||||
|
||||
/* fill/write queues as many times necessary */
|
||||
while(n--) {
|
||||
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
|
||||
__asm__("pref @%0" : : "r"(s + STRIDE)); /* prefetch 64 bytes for next loop */
|
||||
d[0] = *(s++);
|
||||
d[1] = *(s++);
|
||||
d[2] = *(s++);
|
||||
|
@ -32,6 +34,7 @@ static void pvr_list_submit(void *src, int n) {
|
|||
d[7] = *(s++);
|
||||
__asm__("pref @%0" : : "r"(d));
|
||||
d += 8;
|
||||
s += (STRIDE - 8);
|
||||
}
|
||||
|
||||
/* Wait for both store queues to complete */
|
||||
|
|
27
GL/private.h
27
GL/private.h
|
@ -198,11 +198,23 @@ typedef struct {
|
|||
float xyz[3];
|
||||
float uv[2];
|
||||
uint8_t bgra[4];
|
||||
uint8_t obgra[4];
|
||||
/* End 32 pvr_vertex_t */
|
||||
|
||||
/* In the pvr_vertex_t structure, this next 4 bytes is oargb
|
||||
* but we're not using that for now, so having W here makes the code
|
||||
* simpler */
|
||||
float w;
|
||||
/*
|
||||
* The following are necessary for our purposes
|
||||
* W - W coordinate - for clipping
|
||||
* ST - ST coordinate for multitexture
|
||||
* NXYZ - Normal
|
||||
*/
|
||||
|
||||
float w; // 4
|
||||
float st[2]; // +8 (12)
|
||||
float nxyz[3]; // +12 (24)
|
||||
uint8_t visible; // +1 (25)
|
||||
|
||||
uint8_t padding0[3]; // +3 (28)
|
||||
uint32_t padding1; // +4 (32)
|
||||
} Vertex;
|
||||
|
||||
|
||||
|
@ -242,13 +254,6 @@ do { \
|
|||
*b = c; \
|
||||
} while(0)
|
||||
|
||||
/* ClipVertex doesn't have room for these, so we need to parse them
|
||||
* out separately. Potentially 'w' will be housed here if we support oargb */
|
||||
typedef struct {
|
||||
float nxyz[3];
|
||||
float st[2];
|
||||
} VertexExtra;
|
||||
|
||||
/* Generating PVR vertices from the user-submitted data gets complicated, particularly
|
||||
* when a realloc could invalidate pointers. This structure holds all the information
|
||||
* we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.)
|
||||
|
|
Loading…
Reference in New Issue
Block a user