diff --git a/GL/clip.c b/GL/clip.c index c578192..c140c01 100644 --- a/GL/clip.c +++ b/GL/clip.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _arch_dreamcast #include @@ -9,7 +10,7 @@ #endif #include "profiler.h" -#include "clip.h" +#include "private.h" #include "../containers/aligned_vector.h" @@ -23,8 +24,8 @@ void _glEnableClipping(unsigned char v) { ZCLIP_ENABLED = v; } -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) __attribute__((optimize("fast-math"))); -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t) { +void _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout, float* t) __attribute__((optimize("fast-math"))); +void _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout, float* t) { const float NEAR_PLANE = 0.2; // FIXME: this needs to be read from the projection matrix.. somehow *t = (NEAR_PLANE - v1->w) / (v2->w - v1->w); @@ -73,13 +74,22 @@ static inline void interpolateColour(const uint8_t* v1, const uint8_t* v2, const const uint32_t VERTEX_CMD_EOL = 0xf0000000; const uint32_t VERTEX_CMD = 0xe0000000; -void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) __attribute__((optimize("fast-math"))); -void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVector* output, const uint8_t flatShade) { +typedef struct { + Vertex vertex[3]; + VertexExtra extra[3]; + uint8_t visible; +} Triangle; + +void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) __attribute__((optimize("fast-math"))); +void _glClipTriangle(const Triangle* triangle, const uint8_t visible, SubmissionTarget* target, const uint8_t flatShade) { uint8_t i, c = 0; - uint8_t lastVisible = 255; - ClipVertex* last = NULL; + Vertex* last = NULL; + VertexExtra* veLast = NULL; + + const Vertex* vertices = triangle->vertex; + const VertexExtra* extras = triangle->extra; /* Used when flat shading is enabled */ uint32_t finalColour = *((uint32_t*) vertices[2].bgra); @@ -87,7 +97,9 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect for(i = 0; i < 4; ++i) { uint8_t thisIndex = (i == 3) ? 0 : i; - ClipVertex next; + Vertex next; + VertexExtra veNext; + next.flags = VERTEX_CMD; uint8_t thisVisible = (visible & (1 << (2 - thisIndex))) > 0; @@ -95,15 +107,20 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect uint8_t lastIndex = (i == 3) ? 2 : thisIndex - 1; if(lastVisible < 255 && lastVisible != thisVisible) { - const ClipVertex* v1 = &vertices[lastIndex]; - const ClipVertex* v2 = &vertices[thisIndex]; + const Vertex* v1 = &vertices[lastIndex]; + const Vertex* v2 = &vertices[thisIndex]; + + const VertexExtra* ve1 = &extras[lastIndex]; + const VertexExtra* ve2 = &extras[thisIndex]; + float t; - clipLineToNearZ(v1, v2, &next, &t); + _glClipLineToNearZ(v1, v2, &next, &t); interpolateFloat(v1->w, v2->w, t, &next.w); - interpolateVec3(v1->nxyz, v2->nxyz, t, next.nxyz); interpolateVec2(v1->uv, v2->uv, t, next.uv); - interpolateVec2(v1->st, v2->st, t, next.st); + + interpolateVec3(ve1->nxyz, ve2->nxyz, t, veNext.nxyz); + interpolateVec2(ve1->st, ve2->st, t, veNext.st); if(flatShade) { *((uint32_t*) next.bgra) = finalColour; @@ -111,15 +128,22 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect interpolateColour(v1->bgra, v2->bgra, t, next.bgra); } - last = aligned_vector_push_back(output, &next, 1); + /* Push back the new vertices to the end of both the ClipVertex and VertexExtra lists */ + last = aligned_vector_push_back(&target->output->vector, &next, 1); last->flags = VERTEX_CMD; + + veLast = aligned_vector_push_back(target->extras, &veNext, 1); + ++c; } } if(thisVisible && i != 3) { - last = aligned_vector_push_back(output, &vertices[thisIndex], 1); + last = aligned_vector_push_back(&target->output->vector, &vertices[thisIndex], 1); last->flags = VERTEX_CMD; + + veLast = aligned_vector_push_back(target->extras, &extras[thisIndex], 1); + ++c; } @@ -129,18 +153,26 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect if(last) { if(c == 4) { /* Convert to two triangles */ - ClipVertex newVerts[3]; + Vertex newVerts[3]; newVerts[0] = *(last - 3); newVerts[1] = *(last - 1); newVerts[2] = *(last); + VertexExtra newExtras[3]; + newExtras[0] = *(veLast - 3); + newExtras[1] = *(veLast - 1); + newExtras[2] = *(veLast); + (last - 1)->flags = VERTEX_CMD_EOL; newVerts[0].flags = VERTEX_CMD; newVerts[1].flags = VERTEX_CMD; newVerts[2].flags = VERTEX_CMD_EOL; - aligned_vector_resize(output, output->size - 1); - aligned_vector_push_back(output, newVerts, 3); + aligned_vector_resize(&target->output->vector, target->output->vector.size - 1); + aligned_vector_push_back(&target->output->vector, newVerts, 3); + + aligned_vector_resize(target->extras, target->extras->size - 1); + aligned_vector_push_back(target->extras, newExtras, 3); } else { last->flags = VERTEX_CMD_EOL; } @@ -148,7 +180,7 @@ void clipTriangle(const ClipVertex* vertices, const uint8_t visible, AlignedVect } } -static inline void markDead(ClipVertex* vert) { +static inline void markDead(Vertex* vert) { vert->flags = VERTEX_CMD_EOL; } @@ -161,39 +193,33 @@ static inline void markDead(ClipVertex* vert) { #define B011 3 #define B110 6 -void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade) { - /* Room for clipping 16 triangles */ - typedef struct { - ClipVertex vertex[3]; - uint8_t visible; - } Triangle; +#define MAX_CLIP_TRIANGLES 255 - static Triangle TO_CLIP[256]; +void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade) { + static Triangle TO_CLIP[MAX_CLIP_TRIANGLES]; static uint8_t CLIP_COUNT = 0; CLIP_COUNT = 0; - uint32_t i = 0; - /* Skip the header */ + Vertex* vertex = _glSubmissionTargetStart(target); + const Vertex* end = _glSubmissionTargetEnd(target); + const Vertex* start = vertex; - assert(offset < vertices->size); - ClipVertex* header = (ClipVertex*) aligned_vector_at(vertices, offset); - ClipVertex* vertex = header + 1; + int32_t triangle = -1; - uint32_t count = vertices->size - offset; + /* Go to the (potential) end of the first triangle */ + vertex++; - int32_t triangle = 0; + uint32_t vi1, vi2, vi3; - /* Start at 3 due to the header */ - for(i = 3; i < count; ++i, ++triangle) { - assert(offset + i < vertices->size); - - vertex = aligned_vector_at(vertices, offset + i); + while(vertex < end) { + vertex++; + triangle++; uint8_t even = (triangle % 2) == 0; - ClipVertex* v1 = (even) ? vertex - 2 : vertex - 1; - ClipVertex* v2 = (even) ? vertex - 1 : vertex - 2; - ClipVertex* v3 = vertex; + Vertex* v1 = (even) ? vertex - 2 : vertex - 1; + Vertex* v2 = (even) ? vertex - 1 : vertex - 2; + Vertex* v3 = vertex; /* Skip ahead if we don't have a complete triangle yet */ if(v1->flags != VERTEX_CMD || v2->flags != VERTEX_CMD) { @@ -201,6 +227,11 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS continue; } + /* Indexes into extras array */ + vi1 = v1 - start; + vi2 = v2 - start; + vi3 = v3 - start; + uint8_t visible = ((v1->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v3->w > 0) ? 1 : 0); switch(visible) { @@ -234,10 +265,7 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS markDead(v3); } else { markDead(v1); - ClipVertex tmp = *v2; - *v2 = *v3; - *v3 = tmp; - + swapVertex(v2, v3); triangle = -1; v2->flags = VERTEX_CMD; v3->flags = VERTEX_CMD; @@ -249,10 +277,21 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS case B101: case B011: case B110: + assert(CLIP_COUNT < MAX_CLIP_TRIANGLES); + /* Store the triangle for clipping */ TO_CLIP[CLIP_COUNT].vertex[0] = *v1; TO_CLIP[CLIP_COUNT].vertex[1] = *v2; TO_CLIP[CLIP_COUNT].vertex[2] = *v3; + + VertexExtra* ve1 = (VertexExtra*) aligned_vector_at(target->extras, vi1); + VertexExtra* ve2 = (VertexExtra*) aligned_vector_at(target->extras, vi2); + VertexExtra* ve3 = (VertexExtra*) aligned_vector_at(target->extras, vi3); + + TO_CLIP[CLIP_COUNT].extra[0] = *ve1; + TO_CLIP[CLIP_COUNT].extra[1] = *ve2; + TO_CLIP[CLIP_COUNT].extra[2] = *ve3; + TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; @@ -287,37 +326,43 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS triangle = -1; } else { - ClipVertex* v4 = vertex + 1; + Vertex* v4 = v3 + 1; + uint32_t vi4 = v4 - start; TO_CLIP[CLIP_COUNT].vertex[0] = *v3; TO_CLIP[CLIP_COUNT].vertex[1] = *v2; TO_CLIP[CLIP_COUNT].vertex[2] = *v4; + VertexExtra* ve4 = (VertexExtra*) aligned_vector_at(target->extras, vi4); + TO_CLIP[CLIP_COUNT].extra[0] = *(VertexExtra*) aligned_vector_at(target->extras, vi3); + TO_CLIP[CLIP_COUNT].extra[1] = *(VertexExtra*) aligned_vector_at(target->extras, vi2); + TO_CLIP[CLIP_COUNT].extra[2] = *ve4; + visible = ((v3->w > 0) ? 4 : 0) | ((v2->w > 0) ? 2 : 0) | ((v4->w > 0) ? 1 : 0); TO_CLIP[CLIP_COUNT].visible = visible; ++CLIP_COUNT; - /* Restart strip */ + // Restart strip triangle = -1; - /* Mark the second vertex as the end of the strip */ + // Mark the second vertex as the end of the strip (vertex - 1)->flags = VERTEX_CMD_EOL; if(v4->flags == VERTEX_CMD_EOL) { - markDead(vertex); + markDead(v3); markDead(v4); } else { - /* Swap the next vertices to start a new strip */ - ClipVertex tmp = *vertex; - *vertex = *v4; - *v4 = tmp; - - vertex->flags = VERTEX_CMD; + // Swap the next vertices to start a new strip + swapVertex(v3, v4); + v3->flags = VERTEX_CMD; v4->flags = VERTEX_CMD; - } - i += 1; + /* Swap the extra data too */ + VertexExtra t = *ve4; + *ve3 = *ve4; + *ve4 = t; + } } break; default: @@ -326,7 +371,8 @@ void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeS } /* Now, clip all the triangles and append them to the output */ + GLushort i; for(i = 0; i < CLIP_COUNT; ++i) { - clipTriangle(TO_CLIP[i].vertex, TO_CLIP[i].visible, vertices, fladeShade); + _glClipTriangle(&TO_CLIP[i], TO_CLIP[i].visible, target, fladeShade); } } diff --git a/GL/clip.h b/GL/clip.h deleted file mode 100644 index ce20ee6..0000000 --- a/GL/clip.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef CLIP_H -#define CLIP_H - -#include - -#include "../containers/aligned_vector.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - CLIP_RESULT_ALL_IN_FRONT, - CLIP_RESULT_ALL_BEHIND, - CLIP_RESULT_ALL_ON_PLANE, - CLIP_RESULT_FRONT_TO_BACK, - CLIP_RESULT_BACK_TO_FRONT -} ClipResult; - - -#define A8IDX 3 -#define R8IDX 2 -#define G8IDX 1 -#define B8IDX 0 - - -typedef struct { - /* Same 32 byte layout as pvr_vertex_t */ - uint32_t flags; - float xyz[3]; - float uv[2]; - uint8_t bgra[4]; - uint32_t oargb; - - /* Important, we have 24 bytes here. That means when submitting to the SQs we need to - * increment the pointer by 6 */ - float nxyz[3]; /* Normal */ - float w; - float st[2]; -} ClipVertex; - -void clipLineToNearZ(const ClipVertex* v1, const ClipVertex* v2, ClipVertex* vout, float* t); -void clipTriangleStrip2(AlignedVector* vertices, uint32_t offset, uint8_t fladeShade); - -#ifdef __cplusplus -} -#endif - -#endif // CLIP_H diff --git a/GL/draw.c b/GL/draw.c index 675043a..4d48169 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -20,6 +20,12 @@ static AttribPointer DIFFUSE_POINTER; static GLuint ENABLED_VERTEX_ATTRIBUTES = 0; static GLubyte ACTIVE_CLIENT_TEXTURE = 0; + +#define ITERATE(count) \ + GLuint i = count; \ + while(i--) + + void _glInitAttributePointers() { TRACE(); @@ -65,248 +71,305 @@ static inline GLuint byte_size(GLenum type) { typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in); typedef void (*ByteParseFunc)(GLubyte* out, const GLubyte* in); -typedef void (*PolyBuildFunc)(ClipVertex* first, ClipVertex* previous, ClipVertex* vertex, ClipVertex* next, const GLsizei i); +typedef void (*PolyBuildFunc)(Vertex* first, Vertex* previous, Vertex* vertex, Vertex* next, const GLsizei i); static void _readVertexData3f3f(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) (((GLubyte*) input) + (count * stride)); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; input = (float*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } -static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); +/* VE == VertexExtra */ +static void _readVertexData3f3fVE(const float* input, GLuint count, GLubyte stride, float* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; - while(input < end) { + input = (float*) (((GLubyte*) input) + stride); + output = (float*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + +static void _readVertexData3us3f(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; input = (GLushort*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } -static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); +static void _readVertexData3us3fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; - while(input < end) { + input = (GLushort*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); + } +} + +static void _readVertexData3ui3f(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = input[2]; input = (GLuint*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _readVertexData3ui3fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + output[2] = input[2]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _readVertexData3ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; input += stride; - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _readVertexData3ub3fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + ITERATE(count) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE; + + input += stride; + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _readVertexData2f2f(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; input = (float*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _readVertexData2f2fVE(const float* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + + input = (float*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _readVertexData2f3f(const float* input, GLuint count, GLubyte stride, float* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = 0.0f; input = (float*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData2ub3f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; output[2] = 0.0f; input += stride; - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData2us3f(const GLushort* input, GLuint count, GLubyte stride, float* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = 0.0f; input = (GLushort*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData2us2f(const GLushort* input, GLuint count, GLubyte stride, float* output) { - const GLushort* end = (GLushort*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; input = (GLushort*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _readVertexData2us2fVE(const GLushort* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLushort*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _readVertexData2ui2f(const GLuint* input, GLuint count, GLubyte stride, float* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; input = (GLuint*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _readVertexData2ui2fVE(const GLuint* input, GLuint count, GLubyte stride, GLfloat* output) { + ITERATE(count) { + output[0] = input[0]; + output[1] = input[1]; + + input = (GLuint*) (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _readVertexData2ub2f(const GLubyte* input, GLuint count, GLubyte stride, float* output) { const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; - const GLubyte* end = (GLubyte*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; input = (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _readVertexData2ub2fVE(const GLubyte* input, GLuint count, GLubyte stride, GLfloat* output) { + const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f; + ITERATE(count) { + output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE; + output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE; + + input = (((GLubyte*) input) + stride); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _readVertexData2ui3f(const GLuint* input, GLuint count, GLubyte stride, float* output) { - const GLuint* end = (GLuint*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[0] = input[0]; output[1] = input[1]; output[2] = 0.0f; input = (GLuint*) (((GLubyte*) input) + stride); - output = (float*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (float*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = input[0]; output[G8IDX] = input[1]; output[B8IDX] = input[2]; output[A8IDX] = input[3]; input = (GLubyte*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData4fARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = (GLubyte) (input[0] * 255.0f); output[G8IDX] = (GLubyte) (input[1] * 255.0f); output[B8IDX] = (GLubyte) (input[2] * 255.0f); output[A8IDX] = (GLubyte) (input[3] * 255.0f); input = (float*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData3fARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) { - const float* end = (float*) ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = (GLubyte) (input[0] * 255.0f); output[G8IDX] = (GLubyte) (input[1] * 255.0f); output[B8IDX] = (GLubyte) (input[2] * 255.0f); output[A8IDX] = 1.0f; input = (float*) (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) { - const GLubyte* end = ((GLubyte*) input) + (count * stride); - - while(input < end) { + ITERATE(count) { output[R8IDX] = input[0]; output[G8IDX] = input[1]; output[B8IDX] = input[2]; output[A8IDX] = 1.0f; input = (((GLubyte*) input) + stride); - output = (GLubyte*) (((GLubyte*) output) + sizeof(ClipVertex)); + output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex)); } } -static void _fillWithNegZ(GLuint count, GLfloat* output) { - const GLfloat* end = (GLfloat*) ((GLubyte*) output) + (sizeof(ClipVertex) * count); - while(output < end) { +static void _fillWithNegZVE(GLuint count, GLfloat* output) { + ITERATE(count) { output[0] = output[1] = 0.0f; output[2] = -1.0f; - - output += sizeof(ClipVertex); + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } static void _fillWhiteARGB(GLuint count, GLubyte* output) { - const GLubyte* end = output + (sizeof(ClipVertex) * count); - - while(output < end) { + ITERATE(count) { output[R8IDX] = 255; output[G8IDX] = 255; output[B8IDX] = 255; output[A8IDX] = 255; - output += sizeof(ClipVertex); + output += sizeof(Vertex); } } static void _fillZero2f(GLuint count, GLfloat* output) { - const GLfloat* end = output + (sizeof(ClipVertex) * count); - while(output < end) { + ITERATE(count) { output[0] = output[1] = 0.0f; - output += sizeof(ClipVertex); + output = (GLfloat*) (((GLubyte*) output) + sizeof(Vertex)); + } +} + +static void _fillZero2fVE(GLuint count, GLfloat* output) { + ITERATE(count) { + output[0] = output[1] = 0.0f; + output = (GLfloat*) (((GLubyte*) output) + sizeof(VertexExtra)); } } @@ -326,8 +389,8 @@ static void _readVertexData4uiARGB(const GLuint* input, GLuint count, GLubyte st assert(0 && "Not Implemented"); } -GLuint _glGetEnabledAttributes() { - return ENABLED_VERTEX_ATTRIBUTES; +GLuint* _glGetEnabledAttributes() { + return &ENABLED_VERTEX_ATTRIBUTES; } AttribPointer* _glGetVertexAttribPointer() { @@ -426,77 +489,61 @@ static inline void transformNormalToEyeSpace(GLfloat* normal) { mat_trans_normal3(normal[0], normal[1], normal[2]); } -#define swapVertex(a, b) \ -do { \ - ClipVertex temp = *a; \ - *a = *b; \ - *b = temp; \ -} while(0) +PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) { + assert(target->header_offset < target->output->vector.size); + return aligned_vector_at(&target->output->vector, target->header_offset); +} -static inline void genTriangles(ClipVertex* output, GLuint count) { - const ClipVertex* end = output + count; - ClipVertex* it = output + 2; - while(it < end) { +Vertex* _glSubmissionTargetStart(SubmissionTarget* target) { + assert(target->start_offset < target->output->vector.size); + return aligned_vector_at(&target->output->vector, target->start_offset); +} + +Vertex* _glSubmissionTargetEnd(SubmissionTarget* target) { + return _glSubmissionTargetStart(target) + target->count; +} + +static inline void genTriangles(Vertex* output, GLuint count) { + Vertex* it = output + 2; + ITERATE(count / 3) { it->flags = PVR_CMD_VERTEX_EOL; it += 3; } } -static inline void genQuads(ClipVertex* output, GLuint count) { - ClipVertex* previous; - ClipVertex* this = output + 3; +static inline void genQuads(Vertex* output, GLuint count) { + Vertex* this = output + 2; + Vertex* next = output + 3; - const ClipVertex* end = output + count; + ITERATE(count / 4) { + swapVertex(this, next); + next->flags = PVR_CMD_VERTEX_EOL; - while(this < end) { - previous = this - 1; - swapVertex(previous, this); - this->flags = PVR_CMD_VERTEX_EOL; this += 4; + next += 4; } } -static void genTriangleStrip(ClipVertex* output, GLuint count) { +static void genTriangleStrip(Vertex* output, GLuint count) { output[count - 1].flags = PVR_CMD_VERTEX_EOL; } -#define MAX_POLYGON_SIZE 32 -static ClipVertex buffer[MAX_POLYGON_SIZE]; +static void genTriangleFan(Vertex* output, GLuint count) { + assert(count <= 255); -static void genTriangleFan(ClipVertex* output, GLuint count) { - assert(count < MAX_POLYGON_SIZE); - static ClipVertex buffer[MAX_POLYGON_SIZE]; + Vertex* dst = output + (((count - 2) * 3) - 1); + Vertex* src = output + (count - 1); - if(count <= 3){ - swapVertex(&output[1], &output[2]); - output[2].flags = PVR_CMD_VERTEX_EOL; - return; - } - - if( !((uint32)&buffer % 32) && !((uint32)&output % 32)){ - int temp = sizeof(ClipVertex) *count; - if (temp % 4) - temp = (temp & 0xfffffffc) + 4; - sq_cpy(buffer, output, temp); - } else { - memcpy(buffer, output, sizeof(ClipVertex) * count); - } - - // First 3 vertices are in the right place, just end early - output[2].flags = PVR_CMD_VERTEX_EOL; - - GLsizei i = 3, target = 3; - ClipVertex* first = &output[0]; - - for(; i < count; ++i) { - output[target++] = *first; - output[target++] = buffer[i - 1]; - output[target] = buffer[i]; - output[target++].flags = PVR_CMD_VERTEX_EOL; + GLubyte i = count - 2; + while(i--) { + *dst = *src--; + (*dst--).flags = PVR_CMD_VERTEX_EOL; + *dst-- = *src; + *dst-- = *output; } } -static inline void _readPositionData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readPositionData(const GLuint first, const GLuint count, Vertex* output) { const GLubyte vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const void* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride)); @@ -545,7 +592,7 @@ static inline void _readPositionData(const GLuint first, const GLuint count, Cli } } -static inline void _readUVData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readUVData(const GLuint first, const GLuint count, Vertex* output) { if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) { _fillZero2f(count, output->uv); return; @@ -579,9 +626,9 @@ static inline void _readUVData(const GLuint first, const GLuint count, ClipVerte } } -static inline void _readSTData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readSTData(const GLuint first, const GLuint count, VertexExtra* extra) { if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG) { - _fillZero2f(count, output->st); + _fillZero2fVE(count, extra->st); return; } @@ -591,19 +638,19 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte if(ST_POINTER.size == 2) { switch(ST_POINTER.type) { case GL_FLOAT: - _readVertexData2f2f(stptr, count, ststride, output[0].st); + _readVertexData2f2fVE(stptr, count, ststride, extra->st); break; case GL_BYTE: case GL_UNSIGNED_BYTE: - _readVertexData2ub2f(stptr, count, ststride, output[0].st); + _readVertexData2ub2fVE(stptr, count, ststride, extra->st); break; case GL_SHORT: case GL_UNSIGNED_SHORT: - _readVertexData2us2f(stptr, count, ststride, output[0].st); + _readVertexData2us2fVE(stptr, count, ststride, extra->st); break; case GL_INT: case GL_UNSIGNED_INT: - _readVertexData2ui2f(stptr, count, ststride, output[0].st); + _readVertexData2ui2fVE(stptr, count, ststride, extra->st); break; default: assert(0 && "Not Implemented"); @@ -613,9 +660,9 @@ static inline void _readSTData(const GLuint first, const GLuint count, ClipVerte } } -static inline void _readNormalData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readNormalData(const GLuint first, const GLuint count, VertexExtra* extra) { if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) != NORMAL_ENABLED_FLAG) { - _fillWithNegZ(count, output->nxyz); + _fillWithNegZVE(count, extra->nxyz); return; } @@ -625,19 +672,19 @@ static inline void _readNormalData(const GLuint first, const GLuint count, ClipV if(NORMAL_POINTER.size == 3) { switch(NORMAL_POINTER.type) { case GL_FLOAT: - _readVertexData3f3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3f3fVE(nptr, count, nstride, extra->nxyz); break; case GL_BYTE: case GL_UNSIGNED_BYTE: - _readVertexData3ub3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3ub3fVE(nptr, count, nstride, extra->nxyz); break; case GL_SHORT: case GL_UNSIGNED_SHORT: - _readVertexData3us3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3us3fVE(nptr, count, nstride, extra->nxyz); break; case GL_INT: case GL_UNSIGNED_INT: - _readVertexData3ui3f(nptr, count, nstride, output[0].nxyz); + _readVertexData3ui3fVE(nptr, count, nstride, extra->nxyz); break; default: assert(0 && "Not Implemented"); @@ -647,7 +694,7 @@ static inline void _readNormalData(const GLuint first, const GLuint count, ClipV } } -static inline void _readDiffuseData(const GLuint first, const GLuint count, ClipVertex* output) { +static inline void _readDiffuseData(const GLuint first, const GLuint count, Vertex* output) { if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) { /* Just fill the whole thing white if the attribute is disabled */ _fillWhiteARGB(count, output[0].bgra); @@ -702,81 +749,104 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Clip } } -static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, const GLuint count, +static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count, const GLubyte* indices, const GLenum type, const GLboolean doTexture, const GLboolean doMultitexture, const GLboolean doLighting) { /* Read from the client buffers and generate an array of ClipVertices */ + TRACE(); const GLsizei istride = byte_size(type); - ClipVertex* it; - const ClipVertex* end; if(!indices) { - _readPositionData(first, count, output); - _readDiffuseData(first, count, output); - if(doTexture) _readUVData(first, count, output); - if(doLighting) _readNormalData(first, count, output); - if(doTexture && doMultitexture) _readSTData(first, count, output); + profiler_push(__func__); - it = output; - end = output + count; - while(it < end) { - (it++)->flags = PVR_CMD_VERTEX; + Vertex* start = _glSubmissionTargetStart(target); + + _readPositionData(first, count, start); + profiler_checkpoint("positions"); + + _readDiffuseData(first, count, start); + profiler_checkpoint("diffuse"); + + if(doTexture) _readUVData(first, count, start); + + VertexExtra* ve = aligned_vector_at(target->extras, 0); + + if(doLighting) _readNormalData(first, count, ve); + if(doTexture && doMultitexture) _readSTData(first, count, ve); + profiler_checkpoint("others"); + + Vertex* it = _glSubmissionTargetStart(target); + + ITERATE(count) { + it->flags = PVR_CMD_VERTEX; + ++it; } + profiler_checkpoint("flags"); + // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(output, count); + genTriangles(start, count); break; case GL_QUADS: - genQuads(output, count); + genQuads(start, count); break; - case GL_POLYGON: case GL_TRIANGLE_FAN: - genTriangleFan(output, count); + genTriangleFan(start, count); break; case GL_TRIANGLE_STRIP: - genTriangleStrip(output, count); + genTriangleStrip(_glSubmissionTargetStart(target), count); break; default: assert(0 && "Not Implemented"); } + + profiler_checkpoint("quads"); + profiler_pop(); } else { const IndexParseFunc indexFunc = _calcParseIndexFunc(type); - it = output; - end = output + count; GLuint j; const GLubyte* idx = indices; - while(it < end) { + + Vertex* vertices = _glSubmissionTargetStart(target); + VertexExtra* extras = aligned_vector_at(target->extras, 0); + + ITERATE(count) { j = indexFunc(idx); - _readPositionData(j, 1, it); - _readDiffuseData(j, 1, it); - if(doTexture) _readUVData(j, 1, it); - if(doLighting) _readNormalData(j, 1, it); - if(doTexture && doMultitexture) _readSTData(j, 1, it); - ++it; + + _readPositionData(j, 1, vertices); + _readDiffuseData(j, 1, vertices); + if(doTexture) _readUVData(j, 1, vertices); + if(doLighting) _readNormalData(j, 1, extras); + if(doTexture && doMultitexture) _readSTData(j, 1, extras); + + ++vertices; + ++extras; + idx += istride; } - it = output; + Vertex* it = _glSubmissionTargetStart(target); + const Vertex* end = _glSubmissionTargetEnd(target); while(it < end) { (it++)->flags = PVR_CMD_VERTEX; } + it = _glSubmissionTargetStart(target); // Drawing arrays switch(mode) { case GL_TRIANGLES: - genTriangles(output, count); + genTriangles(it, count); break; case GL_QUADS: - genQuads(output, count); + genQuads(it, count); break; - case GL_POLYGON: case GL_TRIANGLE_FAN: - genTriangleFan(output, count); + genTriangleFan(it, count); break; case GL_TRIANGLE_STRIP: - genTriangleStrip(output, count); + genTriangleStrip(it, count); break; default: assert(0 && "Not Implemented"); @@ -784,15 +854,15 @@ static void generate(ClipVertex* output, const GLenum mode, const GLsizei first, } } -static void transform(ClipVertex* output, const GLuint count) { - /* Perform modelview transform, storing W */ +static void transform(SubmissionTarget* target) { + TRACE(); - ClipVertex* vertex = output; + /* Perform modelview transform, storing W */ + Vertex* vertex = _glSubmissionTargetStart(target); _glApplyRenderMatrix(); /* Apply the Render Matrix Stack */ - GLsizei i = count; - while(i--) { + ITERATE(target->count) { register float __x __asm__("fr12") = (vertex->xyz[0]); register float __y __asm__("fr13") = (vertex->xyz[1]); register float __z __asm__("fr14") = (vertex->xyz[2]); @@ -809,25 +879,25 @@ static void transform(ClipVertex* output, const GLuint count) { vertex->xyz[1] = __y; vertex->xyz[2] = __z; vertex->w = __w; - ++vertex; } } -static GLsizei clip(AlignedVector* polylist, uint32_t offset, const GLuint count) { - /* Perform clipping, generating new vertices as necessary */ - clipTriangleStrip2(polylist, offset, _glGetShadeModel() == GL_FLAT); +static void clip(SubmissionTarget* target) { + TRACE(); - /* List size, minus the original offset (which includes the header), minus the header */ - return polylist->size - offset - 1; + /* Perform clipping, generating new vertices as necessary */ + _glClipTriangleStrip(target, _glGetShadeModel() == GL_FLAT); + + /* Reset the count now that we may have added vertices */ + target->count = target->output->vector.size - target->start_offset; } static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { uint8_t* dataIn = (uint8_t*) xyz; uint8_t* dataOut = (uint8_t*) xyzOut; - uint32_t i = count; - while(i--) { + ITERATE(count) { float* in = (float*) dataIn; float* out = (float*) dataOut; @@ -841,9 +911,8 @@ static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t static void mat_transform_normal3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) { uint8_t* dataIn = (uint8_t*) xyz; uint8_t* dataOut = (uint8_t*) xyzOut; - uint32_t i = count; - while(i--) { + ITERATE(count) { float* in = (float*) dataIn; float* out = (float*) dataOut; @@ -854,7 +923,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u } } -static void light(ClipVertex* output, const GLuint count) { +static void light(SubmissionTarget* target) { if(!_glIsLightingEnabled()) { return; } @@ -871,22 +940,23 @@ static void light(ClipVertex* output, const GLuint count) { aligned_vector_init(eye_space_data, sizeof(EyeSpaceData)); } - aligned_vector_resize(eye_space_data, count); + aligned_vector_resize(eye_space_data, target->count); /* Perform lighting calculations and manipulate the colour */ - ClipVertex* vertex = output; + Vertex* vertex = _glSubmissionTargetStart(target); + VertexExtra* extra = aligned_vector_at(target->extras, 0); EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data; _glMatrixLoadModelView(); - mat_transform3(vertex->xyz, eye_space->xyz, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(Vertex), sizeof(EyeSpaceData)); _glMatrixLoadNormal(); - mat_transform_normal3(vertex->nxyz, eye_space->n, count, sizeof(ClipVertex), sizeof(EyeSpaceData)); + mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData)); GLsizei i; EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0); - for(i = 0; i < count; ++i, ++vertex, ++ES) { + for(i = 0; i < target->count; ++i, ++vertex, ++ES) { /* We ignore diffuse colour when lighting is enabled. If GL_COLOR_MATERIAL is enabled * then the lighting calculation should possibly take it into account */ @@ -911,12 +981,13 @@ static void light(ClipVertex* output, const GLuint count) { } } -static void divide(ClipVertex* output, const GLuint count) { - /* Perform perspective divide on each vertex */ - ClipVertex* vertex = output; +static void divide(SubmissionTarget* target) { + TRACE(); - GLsizei i = count; - while(i--) { + /* Perform perspective divide on each vertex */ + Vertex* vertex = _glSubmissionTargetStart(target); + + ITERATE(target->count) { vertex->xyz[2] = 1.0f / vertex->w; vertex->xyz[0] *= vertex->xyz[2]; vertex->xyz[1] *= vertex->xyz[2]; @@ -924,7 +995,9 @@ static void divide(ClipVertex* output, const GLuint count) { } } -static void push(PVRHeader* header, ClipVertex* output, const GLuint count, PolyList* activePolyList, GLshort textureUnit) { +static void push(PVRHeader* header, Vertex* output, const GLuint count, PolyList* activePolyList, GLshort textureUnit) { + TRACE(); + // Compile the header pvr_poly_cxt_t cxt = *_glGetPVRContext(); cxt.list_type = activePolyList->list_type; @@ -934,22 +1007,49 @@ static void push(PVRHeader* header, ClipVertex* output, const GLuint count, Poly pvr_poly_compile(&header->hdr, &cxt); /* Post-process the vertex list */ + /* + * This is currently unnecessary. aligned_vector memsets the allocated objects + * to zero, and we don't touch oargb, also, we don't *enable* oargb yet in the + * pvr header so it should be ignored anyway. If this ever becomes a problem, + * uncomment this. ClipVertex* vout = output; - - GLuint i = count; - while(i--) { + const ClipVertex* end = output + count; + while(vout < end) { vout->oargb = 0; } + */ } #define DEBUG_CLIPPING 0 static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) { + TRACE(); + /* Do nothing if vertices aren't enabled */ if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) { return; } + /* No vertices? Do nothing */ + if(!count) { + return; + } + + static SubmissionTarget* target = NULL; + static AlignedVector extras; + + /* Initialization of the target and extras */ + if(!target) { + target = (SubmissionTarget*) malloc(sizeof(SubmissionTarget)); + target->extras = NULL; + target->count = 0; + target->output = NULL; + target->header_offset = target->start_offset = 0; + + aligned_vector_init(&extras, sizeof(VertexExtra)); + target->extras = &extras; + } + GLboolean doMultitexture, doTexture, doLighting; GLint activeTexture; glGetIntegerv(GL_ACTIVE_TEXTURE_ARB, &activeTexture); @@ -966,39 +1066,51 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type PROFILER_PUSH(__func__); + /* Polygons are treated as triangle fans, the only time this would be a + * problem is if we supported glPolygonMode(..., GL_LINE) but we don't. + * We optimise the triangle and quad cases. + */ + if(mode == GL_POLYGON) { + if(count == 3) { + mode = GL_TRIANGLES; + } else if(count == 4) { + mode = GL_QUADS; + } else { + mode = GL_TRIANGLE_FAN; + } + } - PolyList* activeList = _glActivePolyList(); + // We don't handle this any further, so just make sure we never pass it down */ + assert(mode != GL_POLYGON); - /* Make room in the list buffer */ - GLsizei spaceNeeded = (mode == GL_POLYGON || mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; - ClipVertex* start = aligned_vector_extend(&activeList->vector, spaceNeeded + 1); + target->output = _glActivePolyList(); + target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + target->header_offset = target->output->vector.size; + target->start_offset = target->header_offset + 1; - /* Store a pointer to the header for later */ - PVRHeader* header = (PVRHeader*) start++; + assert(target->count); - /* We store an offset to the first ClipVertex because clipping may generate more - * vertices, which may cause a realloc and thus invalidate start and header - * we use this startOffset to reset those pointers after clipping */ - uint32_t startOffset = start - (ClipVertex*) activeList->vector.data; + /* Make sure we have enough room for all the "extra" data */ + aligned_vector_resize(&extras, target->count); + + /* Make room for the vertices and header */ + aligned_vector_extend(&target->output->vector, target->count + 1); PROFILER_CHECKPOINT("allocate"); - generate(start, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); + generate(target, mode, first, count, (GLubyte*) indices, type, doTexture, doMultitexture, doLighting); PROFILER_CHECKPOINT("generate"); - light(start, spaceNeeded); + light(target); PROFILER_CHECKPOINT("light"); - transform(start, spaceNeeded); + transform(target); PROFILER_CHECKPOINT("transform"); if(_glIsClippingEnabled()) { - - uint32_t offset = ((start - 1) - (ClipVertex*) activeList->vector.data); - #if DEBUG_CLIPPING uint32_t i = 0; fprintf(stderr, "=========\n"); @@ -1013,11 +1125,9 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type } #endif - spaceNeeded = clip(&activeList->vector, offset, spaceNeeded); + clip(target); - /* Clipping may have realloc'd so reset the start pointer */ - start = ((ClipVertex*) activeList->vector.data) + startOffset; - header = (PVRHeader*) (start - 1); /* Update the header pointer */ + assert(extras.size == target->count); #if DEBUG_CLIPPING fprintf(stderr, "--------\n"); @@ -1035,11 +1145,11 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type PROFILER_CHECKPOINT("clip"); - divide(start, spaceNeeded); + divide(target); PROFILER_CHECKPOINT("divide"); - push(header, start, spaceNeeded, _glActivePolyList(), 0); + push(_glSubmissionTargetHeader(target), _glSubmissionTargetStart(target), target->count, target->output, 0); PROFILER_CHECKPOINT("push"); /* @@ -1058,28 +1168,32 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type TextureObject* texture1 = _glGetTexture1(); + /* Multitexture implicitly disabled */ if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) { - /* Multitexture implicitly disabled */ - PROFILER_POP(); + /* Multitexture actively disabled */ + profiler_pop(); return; } /* Push back a copy of the list to the transparent poly list, including the header - (hence the - 1) + (hence the + 1) */ - ClipVertex* vertex = aligned_vector_push_back( - &_glTransparentPolyList()->vector, start - 1, spaceNeeded + 1 + Vertex* vertex = aligned_vector_push_back( + &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1 ); - PVRHeader* mtHeader = (PVRHeader*) vertex++; - ClipVertex* mtStart = vertex; + assert(vertex); - /* Copy ST coordinates to UV ones */ - GLsizei i = spaceNeeded; - while(i--) { - vertex->uv[0] = vertex->st[0]; - vertex->uv[1] = vertex->st[1]; + PVRHeader* mtHeader = (PVRHeader*) vertex++; + Vertex* mtStart = vertex; + + /* Replace the UV coordinates with the ST ones */ + VertexExtra* ve = aligned_vector_at(target->extras, 0); + ITERATE(target->count) { + vertex->uv[0] = ve->st[0]; + vertex->uv[1] = ve->st[1]; ++vertex; + ++ve; } /* Store state, as we're about to mess around with it */ @@ -1093,10 +1207,12 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type glDepthFunc(GL_EQUAL); glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + /* This is modulation, we need to switch depending on the texture env mode! */ + glBlendFunc(GL_DST_COLOR, GL_ZERO); /* Send the buffer again to the transparent list */ - push(mtHeader, mtStart, spaceNeeded, _glTransparentPolyList(), 1); + push(mtHeader, mtStart, target->count, _glTransparentPolyList(), 1); /* Reset state */ glDepthFunc(depthFunc); @@ -1104,7 +1220,7 @@ static void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type (blendEnabled) ? glEnable(GL_BLEND) : glDisable(GL_BLEND); (depthEnabled) ? glEnable(GL_DEPTH_TEST) : glDisable(GL_DEPTH_TEST); - PROFILER_POP(); + profiler_pop(); } void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { @@ -1124,7 +1240,7 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) { return; } - submitVertices(mode, first, count, GL_UNSIGNED_SHORT, NULL); + submitVertices(mode, first, count, GL_UNSIGNED_INT, NULL); } void APIENTRY glEnableClientState(GLenum cap) { diff --git a/GL/flush.c b/GL/flush.c index f725542..45a3bc2 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -31,7 +31,6 @@ static void pvr_list_submit(void *src, int n) { d[7] = *(s++); __asm__("pref @%0" : : "r"(d)); d += 8; - s += CLIP_VERTEX_INT_PADDING; } /* Wait for both store queues to complete */ @@ -105,9 +104,9 @@ void APIENTRY glKosInitEx(GLdcConfig* config) { PT_LIST.list_type = PVR_LIST_PT_POLY; TR_LIST.list_type = PVR_LIST_TR_POLY; - aligned_vector_init(&OP_LIST.vector, sizeof(ClipVertex)); - aligned_vector_init(&PT_LIST.vector, sizeof(ClipVertex)); - aligned_vector_init(&TR_LIST.vector, sizeof(ClipVertex)); + aligned_vector_init(&OP_LIST.vector, sizeof(Vertex)); + aligned_vector_init(&PT_LIST.vector, sizeof(Vertex)); + aligned_vector_init(&TR_LIST.vector, sizeof(Vertex)); aligned_vector_reserve(&OP_LIST.vector, config->initial_op_capacity); aligned_vector_reserve(&PT_LIST.vector, config->initial_pt_capacity); diff --git a/GL/immediate.c b/GL/immediate.c index 1920352..a9b3185 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -9,6 +9,7 @@ #include "../include/gl.h" #include "../include/glext.h" +#include "profiler.h" #include "private.h" @@ -23,14 +24,20 @@ static AlignedVector NORMALS; static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f}; -static GLfloat COLOR[4] = {1.0f, 1.0f, 1.0f, 1.0f}; +static GLubyte COLOR[4] = {255, 255, 255, 255}; static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; +static AttribPointer VERTEX_ATTRIB; +static AttribPointer DIFFUSE_ATTRIB; +static AttribPointer UV_ATTRIB; +static AttribPointer ST_ATTRIB; +static AttribPointer NORMAL_ATTRIB; + void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(GLfloat)); - aligned_vector_init(&COLOURS, sizeof(GLfloat)); + aligned_vector_init(&COLOURS, sizeof(GLubyte)); aligned_vector_init(&UV_COORDS, sizeof(GLfloat)); aligned_vector_init(&ST_COORDS, sizeof(GLfloat)); aligned_vector_init(&NORMALS, sizeof(GLfloat)); @@ -40,6 +47,31 @@ void _glInitImmediateMode(GLuint initial_size) { aligned_vector_reserve(&UV_COORDS, initial_size); aligned_vector_reserve(&ST_COORDS, initial_size); aligned_vector_reserve(&NORMALS, initial_size); + + VERTEX_ATTRIB.ptr = VERTICES.data; + VERTEX_ATTRIB.size = 3; + VERTEX_ATTRIB.type = GL_FLOAT; + VERTEX_ATTRIB.stride = 0; + + DIFFUSE_ATTRIB.ptr = COLOURS.data; + DIFFUSE_ATTRIB.size = 4; + DIFFUSE_ATTRIB.type = GL_UNSIGNED_BYTE; + DIFFUSE_ATTRIB.stride = 0; + + UV_ATTRIB.ptr = UV_COORDS.data; + UV_ATTRIB.stride = 0; + UV_ATTRIB.type = GL_FLOAT; + UV_ATTRIB.size = 2; + + ST_ATTRIB.ptr = ST_COORDS.data; + ST_ATTRIB.stride = 0; + ST_ATTRIB.type = GL_FLOAT; + ST_ATTRIB.size = 2; + + NORMAL_ATTRIB.ptr = NORMALS.data; + NORMAL_ATTRIB.stride = 0; + NORMAL_ATTRIB.type = GL_FLOAT; + NORMAL_ATTRIB.size = 3; } GLubyte _glCheckImmediateModeInactive(const char* func) { @@ -65,36 +97,45 @@ void APIENTRY glBegin(GLenum mode) { } void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { + COLOR[0] = (GLubyte)(r * 255); + COLOR[1] = (GLubyte)(g * 255); + COLOR[2] = (GLubyte)(b * 255); + COLOR[3] = (GLubyte)(a * 255); +} + +void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { COLOR[0] = r; COLOR[1] = g; COLOR[2] = b; COLOR[3] = a; } -void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { - glColor4f( - ((GLfloat) r) / 255.0f, - ((GLfloat) g) / 255.0f, - ((GLfloat) b) / 255.0f, - ((GLfloat) a) / 255.0f - ); -} - void APIENTRY glColor4fv(const GLfloat* v) { - glColor4f(v[0], v[1], v[2], v[3]); + COLOR[0] = (GLubyte)(v[0] * 255); + COLOR[1] = (GLubyte)(v[1] * 255); + COLOR[2] = (GLubyte)(v[2] * 255); + COLOR[3] = (GLubyte)(v[3] * 255); } void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { - static float a = 1.0f; - glColor4f(r, g, b, a); + COLOR[0] = (GLubyte)(r * 255); + COLOR[1] = (GLubyte)(g * 255); + COLOR[2] = (GLubyte)(b * 255); + COLOR[3] = 255; } void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { - glColor3f((float) red / 255, (float) green / 255, (float) blue / 255); + COLOR[0] = red; + COLOR[1] = green; + COLOR[2] = blue; + COLOR[3] = 255; } void APIENTRY glColor3fv(const GLfloat* v) { - glColor3f(v[0], v[1], v[2]); + COLOR[0] = (GLubyte)(v[0] * 255); + COLOR[1] = (GLubyte)(v[1] * 255); + COLOR[2] = (GLubyte)(v[2] * 255); + COLOR[3] = 255; } void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { @@ -165,80 +206,69 @@ void APIENTRY glNormal3fv(const GLfloat* v) { } void APIENTRY glEnd() { + profiler_push(__func__); + IMMEDIATE_MODE_ACTIVE = GL_FALSE; - GLboolean vertexArrayEnabled, colorArrayEnabled, normalArrayEnabled; - GLboolean texArray0Enabled, texArray1Enabled; + /* Resizing could have invalidated these pointers */ + VERTEX_ATTRIB.ptr = VERTICES.data; + DIFFUSE_ATTRIB.ptr = COLOURS.data; + UV_ATTRIB.ptr = UV_COORDS.data; + ST_ATTRIB.ptr = ST_COORDS.data; + NORMAL_ATTRIB.ptr = NORMALS.data; - glGetBooleanv(GL_VERTEX_ARRAY, &vertexArrayEnabled); - glGetBooleanv(GL_COLOR_ARRAY, &colorArrayEnabled); - glGetBooleanv(GL_NORMAL_ARRAY, &normalArrayEnabled); + GLuint* attrs = _glGetEnabledAttributes(); - AttribPointer vptr = *_glGetVertexAttribPointer(); - AttribPointer dptr = *_glGetDiffuseAttribPointer(); - AttribPointer nptr = *_glGetNormalAttribPointer(); - AttribPointer uvptr = *_glGetUVAttribPointer(); - AttribPointer stptr = *_glGetSTAttribPointer(); + AttribPointer* vattr = _glGetVertexAttribPointer(); + AttribPointer* dattr = _glGetDiffuseAttribPointer(); + AttribPointer* nattr = _glGetNormalAttribPointer(); + AttribPointer* uattr = _glGetUVAttribPointer(); + AttribPointer* sattr = _glGetSTAttribPointer(); - glEnableClientState(GL_VERTEX_ARRAY); - glEnableClientState(GL_COLOR_ARRAY); - glEnableClientState(GL_NORMAL_ARRAY); + /* Stash existing values */ + AttribPointer vptr = *vattr; + AttribPointer dptr = *dattr; + AttribPointer nptr = *nattr; + AttribPointer uvptr = *uattr; + AttribPointer stptr = *sattr; - glVertexPointer(3, GL_FLOAT, 0, VERTICES.data); - glColorPointer(4, GL_FLOAT, 0, COLOURS.data); - glNormalPointer(GL_FLOAT, 0, NORMALS.data); + GLuint prevAttrs = *attrs; - GLint activeTexture; - glGetIntegerv(GL_CLIENT_ACTIVE_TEXTURE, &activeTexture); + /* Switch to our immediate mode arrays */ + *vattr = VERTEX_ATTRIB; + *dattr = DIFFUSE_ATTRIB; + *nattr = NORMAL_ATTRIB; + *uattr = UV_ATTRIB; + *sattr = ST_ATTRIB; - glClientActiveTextureARB(GL_TEXTURE0); - glGetBooleanv(GL_TEXTURE_COORD_ARRAY, &texArray0Enabled); - glEnableClientState(GL_TEXTURE_COORD_ARRAY); - glTexCoordPointer(2, GL_FLOAT, 0, UV_COORDS.data); - - glClientActiveTextureARB(GL_TEXTURE1); - glGetBooleanv(GL_TEXTURE_COORD_ARRAY, &texArray1Enabled); - glEnableClientState(GL_TEXTURE_COORD_ARRAY); - glTexCoordPointer(2, GL_FLOAT, 0, ST_COORDS.data); + *attrs = ~0; // Enable everything glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size / 3); + /* Restore everything */ + *vattr = vptr; + *dattr = dptr; + *nattr = nptr; + *uattr = uvptr; + *sattr = stptr; + + *attrs = prevAttrs; + + /* Clear arrays for next polys */ aligned_vector_clear(&VERTICES); aligned_vector_clear(&COLOURS); aligned_vector_clear(&UV_COORDS); aligned_vector_clear(&ST_COORDS); aligned_vector_clear(&NORMALS); - *_glGetVertexAttribPointer() = vptr; - *_glGetDiffuseAttribPointer() = dptr; - *_glGetNormalAttribPointer() = nptr; - *_glGetUVAttribPointer() = uvptr; - *_glGetSTAttribPointer() = stptr; - - if(!vertexArrayEnabled) { - glDisableClientState(GL_VERTEX_ARRAY); - } - - if(!colorArrayEnabled) { - glDisableClientState(GL_COLOR_ARRAY); - } - - if(!normalArrayEnabled) { - glDisableClientState(GL_NORMAL_ARRAY); - } - - if(!texArray0Enabled) { - glClientActiveTextureARB(GL_TEXTURE0); - glDisableClientState(GL_TEXTURE_COORD_ARRAY); - } - - if(!texArray1Enabled) { - glClientActiveTextureARB(GL_TEXTURE1); - glDisableClientState(GL_TEXTURE_COORD_ARRAY); - } - - glClientActiveTextureARB((GLuint) activeTexture); + *vattr = vptr; + *dattr = dptr; + *nattr = nptr; + *uattr = uvptr; + *sattr = stptr; + profiler_checkpoint("restore"); + profiler_pop(); } void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) { diff --git a/GL/private.h b/GL/private.h index bdb6038..45286f8 100644 --- a/GL/private.h +++ b/GL/private.h @@ -1,10 +1,11 @@ #ifndef PRIVATE_H #define PRIVATE_H +#include + #include "../include/gl.h" #include "../containers/aligned_vector.h" #include "../containers/named_array.h" -#include "./clip.h" #define TRACE_ENABLED 0 #define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);} @@ -17,11 +18,8 @@ #define MAX_TEXTURE_SIZE 1024 -#define CLIP_VERTEX_INT_PADDING 6 - typedef struct { pvr_poly_hdr_t hdr; - unsigned int padding[CLIP_VERTEX_INT_PADDING]; } PVRHeader; typedef struct { @@ -31,9 +29,6 @@ typedef struct { sy, /* Start y */ ex, /* End x */ ey; /* End y */ - - /* Padding to match clip vertex */ - unsigned int padding[CLIP_VERTEX_INT_PADDING]; } PVRTileClipCommand; /* Tile Clip command for the pvr */ typedef struct { @@ -97,6 +92,88 @@ typedef struct { GLboolean is_directional; } LightSource; +typedef struct { + /* Same 32 byte layout as pvr_vertex_t */ + uint32_t flags; + float xyz[3]; + float uv[2]; + uint8_t bgra[4]; + + /* In the pvr_vertex_t structure, this next 4 bytes is oargb + * but we're not using that for now, so having W here makes the code + * simpler */ + float w; +} Vertex; + +/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */ +#define _SWAP32(x, y) \ +do { \ + uint32_t t = *((uint32_t*) &x); \ + *((uint32_t*) &x) = *((uint32_t*) &y); \ + *((uint32_t*) &y) = t; \ +} while(0) + +/* + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ + *((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \ + *((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */ + + +#define swapVertex(a, b) \ +do { \ + _SWAP32(a->flags, b->flags); \ + _SWAP32(a->xyz[0], b->xyz[0]); \ + _SWAP32(a->xyz[1], b->xyz[1]); \ + _SWAP32(a->xyz[2], b->xyz[2]); \ + _SWAP32(a->uv[0], b->uv[0]); \ + _SWAP32(a->uv[1], b->uv[1]); \ + _SWAP32(a->bgra, b->bgra); \ + _SWAP32(a->w, b->w); \ +} while(0) + +/* ClipVertex doesn't have room for these, so we need to parse them + * out separately. Potentially 'w' will be housed here if we support oargb */ +typedef struct { + float nxyz[3]; + float st[2]; +} VertexExtra; + +/* Generating PVR vertices from the user-submitted data gets complicated, particularly + * when a realloc could invalidate pointers. This structure holds all the information + * we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.) + */ +typedef struct { + PolyList* output; + uint32_t header_offset; // The offset of the header in the output list + uint32_t start_offset; // The offset into the output list + uint32_t count; // The number of vertices in this output + + /* Pointer to count * VertexExtra; */ + AlignedVector* extras; +} SubmissionTarget; + +PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target); +Vertex* _glSubmissionTargetStart(SubmissionTarget* target); +Vertex* _glSubmissionTargetEnd(SubmissionTarget* target); + +typedef enum { + CLIP_RESULT_ALL_IN_FRONT, + CLIP_RESULT_ALL_BEHIND, + CLIP_RESULT_ALL_ON_PLANE, + CLIP_RESULT_FRONT_TO_BACK, + CLIP_RESULT_BACK_TO_FRONT +} ClipResult; + + +#define A8IDX 3 +#define R8IDX 2 +#define G8IDX 1 +#define B8IDX 0 + +struct SubmissionTarget; + +void _glClipLineToNearZ(const Vertex* v1, const Vertex* v2, Vertex* vout, float* t); +void _glClipTriangleStrip(SubmissionTarget* target, uint8_t fladeShade); PolyList *_glActivePolyList(); PolyList *_glTransparentPolyList(); @@ -133,7 +210,7 @@ typedef struct { GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func); -GLuint _glGetEnabledAttributes(); +GLuint* _glGetEnabledAttributes(); AttribPointer* _glGetVertexAttribPointer(); AttribPointer* _glGetDiffuseAttribPointer(); AttribPointer* _glGetNormalAttribPointer(); diff --git a/GL/state.c b/GL/state.c index 6872f72..61f3b74 100644 --- a/GL/state.c +++ b/GL/state.c @@ -225,7 +225,7 @@ void _glUpdatePVRTextureContext(pvr_poly_cxt_t* context, GLshort textureUnit) { if(tx1->isPaletted) { if(_glIsSharedTexturePaletteEnabled()) { TexturePalette* palette = _glGetSharedPalette(tx1->shared_bank); - context->txr.format |= PVR_TXRFMT_8BPP_PAL((palette) ? 0 : palette->bank); + context->txr.format |= PVR_TXRFMT_8BPP_PAL(palette->bank); } else { context->txr.format |= PVR_TXRFMT_8BPP_PAL((tx1->palette) ? tx1->palette->bank : 0); } @@ -576,7 +576,7 @@ static GLenum COMPRESSED_FORMATS [] = { static GLint NUM_COMPRESSED_FORMATS = sizeof(COMPRESSED_FORMATS) / sizeof(GLenum); void APIENTRY glGetBooleanv(GLenum pname, GLboolean* params) { - GLuint enabledAttrs = _glGetEnabledAttributes(); + GLuint enabledAttrs = *_glGetEnabledAttributes(); GLuint activeClientTexture = _glGetActiveClientTexture(); switch(pname) { @@ -671,7 +671,7 @@ const GLubyte *glGetString(GLenum name) { return (const GLubyte*) "PowerVR2 CLX2 100mHz"; case GL_VERSION: - return (const GLubyte*) "1.2 (partial) - GLdc 1.0"; + return (const GLubyte*) "1.2 (partial) - GLdc 1.1"; case GL_EXTENSIONS: return (const GLubyte*) "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette"; diff --git a/GL/texture.c b/GL/texture.c index 3dddacb..c8d0202 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -940,19 +940,15 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat, assert(type == GL_UNSIGNED_BYTE); // Anything else needs this loop adjusting GLuint x, y, min, mask; + GLubyte *pixels = (GLubyte*) data; + GLushort *vtex = (GLushort*) targetData; + min = MIN(w, h); mask = min - 1; - uint8 * pixels; - uint16 * vtex; - pixels = (uint8 *) data; - vtex = (uint16*)targetData; - for(y = 0; y < h; y += 2) { for(x = 0; x < w; x++) { - vtex[TWIDOUT((y & mask) / 2, x & mask) + - (x / min + y / min)*min * min / 2] = - pixels[y * w + x] | (pixels[(y + 1) * w + x] << 8); + vtex[TWIDOUT((y & mask) / 2, x & mask) + (x / min + y / min)*min * min / 2] = pixels[y * w + x] | (pixels[(y + 1) * w + x] << 8); } } } else { diff --git a/Makefile b/Makefile index 8f0e0b1..76a1114 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ OBJS += GL/matrix.o GL/fog.o GL/error.o GL/clip.o containers/stack.o containers/ SUBDIRS = -KOS_CFLAGS += -ffast-math -O3 -Iinclude +KOS_CFLAGS += -ffast-math -Ofast -Iinclude link: $(KOS_AR) rcs $(TARGET) $(OBJS) diff --git a/containers/aligned_vector.c b/containers/aligned_vector.c index a3cfb75..1b19224 100644 --- a/containers/aligned_vector.c +++ b/containers/aligned_vector.c @@ -2,6 +2,7 @@ #include #include #include +#include #if defined(__APPLE__) || defined(__WIN32__) /* Linux + Kos define this, OSX does not, so just use malloc there */ @@ -33,6 +34,10 @@ static inline unsigned int round_to_chunk_size(unsigned int val) { void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { + if(element_count == 0) { + return; + } + if(element_count <= vector->capacity) { return; } @@ -45,7 +50,9 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { unsigned int new_byte_size = element_count * vector->element_size; unsigned char* original_data = vector->data; + vector->data = (unsigned char*) memalign(0x20, new_byte_size); + assert(vector->data); if(original_data) { if( !(*vector->data % 32) && !(*original_data % 32)){ @@ -64,10 +71,14 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) { void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) { /* Resize enough room */ + assert(count); + assert(vector->element_size); unsigned int initial_size = vector->size; aligned_vector_resize(vector, vector->size + count); + assert(vector->size == initial_size + count); + unsigned char* dest = vector->data + (vector->element_size * initial_size); /* Copy the objects in */ diff --git a/samples/Makefile b/samples/Makefile index 1d588b5..177f547 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -25,3 +25,4 @@ all: $(KOS_MAKE) -C paletted all $(KOS_MAKE) -C paletted_pcx all $(KOS_MAKE) -C depth_funcs all + $(KOS_MAKE) -C polymark all diff --git a/samples/nehe02de/main.c b/samples/nehe02de/main.c index 9855e54..eba7fca 100644 --- a/samples/nehe02de/main.c +++ b/samples/nehe02de/main.c @@ -7,7 +7,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG { glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black glClearDepth(1.0); // Enables Clearing Of The Depth Buffer - glDepthFunc(GL_LESS); // The Type Of Depth Test To Do + glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do glEnable(GL_DEPTH_TEST); // Enables Depth Testing glShadeModel(GL_SMOOTH); // Enables Smooth Color Shading diff --git a/samples/polymark/Makefile b/samples/polymark/Makefile new file mode 100644 index 0000000..9910cf4 --- /dev/null +++ b/samples/polymark/Makefile @@ -0,0 +1,29 @@ +TARGET = polymark.elf +OBJS = main.o + +all: rm-elf $(TARGET) + +include $(KOS_BASE)/Makefile.rules + +clean: + -rm -f $(TARGET) $(OBJS) romdisk.* + +rm-elf: + -rm -f $(TARGET) romdisk.* + +$(TARGET): $(OBJS) romdisk.o + $(KOS_CC) $(KOS_CFLAGS) $(KOS_LDFLAGS) -o $(TARGET) $(KOS_START) \ + $(OBJS) romdisk.o $(OBJEXTRA) -lm -lkosutils $(KOS_LIBS) + +romdisk.img: + $(KOS_GENROMFS) -f romdisk.img -d romdisk -v + +romdisk.o: romdisk.img + $(KOS_BASE)/utils/bin2o/bin2o romdisk.img romdisk romdisk.o + +run: $(TARGET) + $(KOS_LOADER) $(TARGET) + +dist: + rm -f $(OBJS) romdisk.o romdisk.img + $(KOS_STRIP) $(TARGET) diff --git a/samples/polymark/main.c b/samples/polymark/main.c new file mode 100644 index 0000000..2a671fc --- /dev/null +++ b/samples/polymark/main.c @@ -0,0 +1,178 @@ +/* + KallistiGL 2.0.0 + + quadmark.c + (c)2018 Luke Benstead + (c)2014 Josh Pearson + (c)2002 Dan Potter, Paul Boese +*/ + +#include + +#include + +#include +#include + +enum { PHASE_HALVE, PHASE_INCR, PHASE_DECR, PHASE_FINAL }; + +int polycnt; +int phase = PHASE_HALVE; +float avgfps = -1; + +void running_stats() { + pvr_stats_t stats; + pvr_get_stats(&stats); + + if(avgfps == -1) + avgfps = stats.frame_rate; + else + avgfps = (avgfps + stats.frame_rate) / 2.0f; +} + +void stats() { + pvr_stats_t stats; + + pvr_get_stats(&stats); + dbglog(DBG_DEBUG, "3D Stats: %d VBLs, frame rate ~%f fps\n", + stats.vbl_count, stats.frame_rate); +} + + +int check_start() { + maple_device_t *cont; + cont_state_t *state; + + cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER); + + if(cont) { + state = (cont_state_t *)maple_dev_status(cont); + + if(state) + return state->buttons & CONT_START; + } + + return 0; +} + +pvr_poly_hdr_t hdr; + +void setup() { + glKosInit(); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glOrtho(0, 640, 0, 480, -100, 100); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glEnable(GL_CULL_FACE); +} + +void do_frame() { + int x, y, z; + int size; + int i; + float col; + + for(i = 0; i < polycnt; i++) { + glBegin(GL_POLYGON); + x = rand() % 640; + y = rand() % 480; + z = rand() % 100 + 1; + size = rand() % 50 + 1; + col = (rand() % 255) * 0.00391f; + + glColor3f(col, col, col); + glVertex3f(x - size, y - size, z); + glVertex3f(x + size, y - size, z); + glVertex3f(x + size, y + size, z); + glVertex3f(x, y + size + (size / 2), z); + glVertex3f(x - size, y + size, z); + glEnd(); + } + + glKosSwapBuffers(); +} + +time_t start; +void switch_tests(int ppf) { + printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n", + ppf * 3, ppf * 3 * 60); + avgfps = -1; + polycnt = ppf; +} + +void check_switch() { + time_t now; + + now = time(NULL); + + if(now >= (start + 5)) { + start = time(NULL); + printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2)); + + switch(phase) { + case PHASE_HALVE: + + if(avgfps < 55) { + switch_tests(polycnt / 1.2f); + } + else { + printf(" Entering PHASE_INCR\n"); + phase = PHASE_INCR; + } + + break; + + case PHASE_INCR: + + if(avgfps >= 55) { + switch_tests(polycnt + 15); + } + else { + printf(" Entering PHASE_DECR\n"); + phase = PHASE_DECR; + } + + break; + + case PHASE_DECR: + + if(avgfps < 55) { + switch_tests(polycnt - 30); + } + else { + printf(" Entering PHASE_FINAL\n"); + phase = PHASE_FINAL; + } + + break; + + case PHASE_FINAL: + break; + } + } +} + +int main(int argc, char **argv) { + setup(); + + /* Start off with something obscene */ + switch_tests(200000 / 60); + start = time(NULL); + + for(;;) { + if(check_start()) + break; + + printf(" \r"); + do_frame(); + running_stats(); + check_switch(); + } + + stats(); + + return 0; +} + + diff --git a/samples/polymark/romdisk/PLACEHOLDER b/samples/polymark/romdisk/PLACEHOLDER new file mode 100644 index 0000000..e69de29 diff --git a/samples/zclip/main.c b/samples/zclip/main.c index b99f3a0..8da2498 100644 --- a/samples/zclip/main.c +++ b/samples/zclip/main.c @@ -3,6 +3,7 @@ #include #include "gl.h" +#include "glext.h" #include "glu.h" #include "glkos.h" @@ -23,6 +24,7 @@ typedef struct // Create A Structure TextureImage textures[3]; TextureImage road; +TextureImage lightmap; GLboolean LoadTGA(TextureImage *texture, const char *filename) // Loads A TGA File Into Memory @@ -123,7 +125,11 @@ void LoadGLTextures() { if(!LoadTGA(&road, "/rd/floor.tga")) { fprintf(stderr, "Error loading road texture"); } -}; + + if(!LoadTGA(&lightmap, "/rd/lightmap.tga")) { + fprintf(stderr, "Error loading lightmap texture"); + } +} /* A general OpenGL initialization function. Sets all of the initial parameters. */ @@ -170,40 +176,64 @@ void RenderTower(counter) { float v = 1.0f * (counter + 1); + glActiveTexture(GL_TEXTURE0); + glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, textures[counter].texID); + + glActiveTexture(GL_TEXTURE1); + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, lightmap.texID); + glBegin(GL_QUADS); - glTexCoord2f(0, 0); + glMultiTexCoord2f(GL_TEXTURE0, 0, 0); + glMultiTexCoord2f(GL_TEXTURE1, 0, 0); glVertex3f(-width, 0,-width); - glTexCoord2f(1, 0); + glMultiTexCoord2f(GL_TEXTURE0, 1, 0); + glMultiTexCoord2f(GL_TEXTURE1, 1, 0); glVertex3f(-width, 0, width); - glTexCoord2f(1, v); + glMultiTexCoord2f(GL_TEXTURE0, 1, v); + glMultiTexCoord2f(GL_TEXTURE1, 1, 1); glVertex3f(-width, height, width); - glTexCoord2f(0, v); + glMultiTexCoord2f(GL_TEXTURE0, 0, v); + glMultiTexCoord2f(GL_TEXTURE1, 0, 1); glVertex3f(-width, height,-width); - glTexCoord2f(0, 0); + glMultiTexCoord2f(GL_TEXTURE0, 0, 0); + glMultiTexCoord2f(GL_TEXTURE1, 0, 0); glVertex3f(-width, 0, width); - glTexCoord2f(1, 0); + glMultiTexCoord2f(GL_TEXTURE0, 1, 0); + glMultiTexCoord2f(GL_TEXTURE1, 1, 0); glVertex3f( width, 0, width); - glTexCoord2f(1, v); + glMultiTexCoord2f(GL_TEXTURE0, 1, v); + glMultiTexCoord2f(GL_TEXTURE1, 1, 1); glVertex3f( width, height, width); - glTexCoord2f(0, v); + glMultiTexCoord2f(GL_TEXTURE0, 0, v); + glMultiTexCoord2f(GL_TEXTURE1, 0, 1); glVertex3f(-width, height, width); - glTexCoord2f(0, 0); + glMultiTexCoord2f(GL_TEXTURE0, 0, 0); + glMultiTexCoord2f(GL_TEXTURE1, 0, 0); glVertex3f(width, 0,width); - glTexCoord2f(1, 0); + glMultiTexCoord2f(GL_TEXTURE0, 1, 0); + glMultiTexCoord2f(GL_TEXTURE1, 1, 0); glVertex3f(width, 0,-width); - glTexCoord2f(1, v); + glMultiTexCoord2f(GL_TEXTURE0, 1, v); + glMultiTexCoord2f(GL_TEXTURE1, 1, 1); glVertex3f(width, height,-width); - glTexCoord2f(0, v); + glMultiTexCoord2f(GL_TEXTURE0, 0, v); + glMultiTexCoord2f(GL_TEXTURE1, 0, 1); glVertex3f(width, height, width); glEnd(); } void RenderFloor() { + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, road.texID); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glBegin(GL_QUADS); glTexCoord2f(0.0f, 0.0f); glVertex3f(-100, 0, 0); @@ -219,8 +249,30 @@ void RenderFloor() { /* The main drawing function. */ void DrawGLScene() { + static float z = 0.0f; + static char increasing = 1; + + const float max = 50.0f; + + if(increasing) { + z += 1.0f; + } else { + z -= 1.0f; + } + + if(z > max) { + increasing = !increasing; + z = max; + } + + if(z < 0.0f) { + increasing = !increasing; + z = 0.0f; + } + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer glLoadIdentity(); // Reset The View + glTranslatef(0, 0, z); GLubyte i = 0; diff --git a/samples/zclip/romdisk/lightmap.tga b/samples/zclip/romdisk/lightmap.tga new file mode 100644 index 0000000..af277b6 Binary files /dev/null and b/samples/zclip/romdisk/lightmap.tga differ