diff --git a/GL/draw.c b/GL/draw.c index aea1f34..f910333 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1170,6 +1170,8 @@ void _glInitSubmissionTarget() { GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) { + gl_assert(mode != GL_POLYGON); + SubmissionTarget* const target = &SUBMISSION_TARGET; AlignedVector* const extras = target->extras; @@ -1185,37 +1187,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL return; } - /* Polygons are treated as triangle fans, the only time this would be a - * problem is if we supported glPolygonMode(..., GL_LINE) but we don't. - * We optimise the triangle and quad cases. - */ - if(mode == GL_POLYGON) { - switch(count) { - case 2: - mode = GL_LINES; - break; - case 3: - mode = GL_TRIANGLES; - break; - case 4: - mode = GL_QUADS; - break; - default: - mode = GL_TRIANGLE_FAN; - } - } - - if(mode == GL_LINE_STRIP || mode == GL_LINES) { - fprintf(stderr, "Line drawing is currently unsupported\n"); - return; - } - GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty(); - - // We don't handle this any further, so just make sure we never pass it down */ - gl_assert(mode != GL_POLYGON); - target->output = _glActivePolyList(); target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; target->header_offset = target->output->vector.size; @@ -1308,6 +1281,35 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL // } } +static GLenum convertModeIfNecessary(GLenum mode, GLsizei count) { + /* Polygons are treated as triangle fans, the only time this would be a + * problem is if we supported glPolygonMode(..., GL_LINE) but we don't. + * We optimise the triangle and quad cases. + */ + if(mode == GL_POLYGON) { + switch(count) { + case 2: + mode = GL_LINES; + break; + case 3: + mode = GL_TRIANGLES; + break; + case 4: + mode = GL_QUADS; + break; + default: + mode = GL_TRIANGLE_FAN; + } + } + + if(mode == GL_LINE_STRIP || mode == GL_LINES) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return GL_NONE; + } + + return mode; +} + void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { TRACE(); @@ -1315,6 +1317,11 @@ void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvo return; } + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + return; + } + submitVertices(mode, 0, count, type, indices); } @@ -1325,9 +1332,169 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) { return; } + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + return; + } + submitVertices(mode, first, count, GL_UNSIGNED_INT, NULL); } +static void glDrawPVRArrays(GLenum mode, GLsizei stride, GLint first, GLsizei count, void* data) { + GLint validTypes[] = { + GL_TRIANGLES, + GL_QUADS, + GL_TRIANGLE_STRIP, + 0 + }; + + if(_glCheckValidEnum(mode, validTypes, __func__) != 0) { + return; + } + + SubmissionTarget* const target = &SUBMISSION_TARGET; + AlignedVector* const extras = target->extras; + + GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty(); + + target->output = _glActivePolyList(); + target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + target->header_offset = target->output->vector.size; + target->start_offset = target->header_offset + (header_required); + + gl_assert(target->count); + + /* Make sure we have enough room for all the "extra" data */ + aligned_vector_resize(extras, target->count); + + /* Make room for the vertices and header */ + aligned_vector_extend(&target->output->vector, target->count + (header_required)); + + if(header_required) { + apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); + _glGPUStateMarkClean(); + } + + PREFETCH(data + (first * 32)); + + if(_glIsLightingEnabled()) { + _glMatrixLoadModelView(); + } else { + _glMatrixLoadModelViewProjection(); + } + +#define DO_TRANSFORM() \ + do { \ + Vertex* v = _glSubmissionTargetStart(target); \ + VertexExtra* ve = aligned_vector_at(target->extras, 0); \ + float w = 1.0f; \ + if(mode == GL_QUADS) { \ + const static uint32_t flags [] = { \ + GPU_CMD_VERTEX, GPU_CMD_VERTEX, GPU_CMD_VERTEX_EOL, GPU_CMD_VERTEX \ + }; \ + for(int_fast32_t i = 0; i < count; ++i, ++v) { \ + v->flags = flags[(i % 4)]; \ + TransformVertex(v->xyz, &w, v->xyz, &v->w); \ + } \ + v = _glSubmissionTargetStart(target); \ + Vertex* prev = (v + 2); \ + v += 3; \ + for(int_fast32_t i = 0; i < count; i +=4, v +=4, prev += 4) { \ + const Vertex t = (*prev); \ + *(prev) = *((v)); \ + *((v)) = t; \ + } \ + } else if(mode == GL_TRIANGLES) { \ + const static uint32_t flags [] = { \ + GPU_CMD_VERTEX, GPU_CMD_VERTEX, GPU_CMD_VERTEX_EOL \ + }; \ + for(int_fast32_t i = 0; i < count; ++i, ++v) { \ + PREFETCH(v + 3); \ + TransformVertex(v->xyz, &w, v->xyz, &v->w); \ + v->flags = flags[(i % 3)]; \ + } \ + } else { \ + for(int_fast32_t i = 0; i < count; ++i, ++v) { \ + PREFETCH(v + 6); \ + TransformVertex(v->xyz, &w, v->xyz, &v->w); \ + v->flags = GPU_CMD_VERTEX; \ + } \ + (v - 1)->flags = GPU_CMD_VERTEX_EOL; \ + } \ + } while(0) + + + if(stride == 32) { + uint8_t* src = (uint8_t*) data; + /* Copy the data directly */ + sq_cpy(_glSubmissionTargetStart(target), src + (first * 32), count * 32); + + DO_TRANSFORM(); + } else { + assert(stride == 64); + + struct { + uint32_t data[8]; + }* src = data + (first * 32); + + Vertex* dst = aligned_vector_at(&target->output->vector, target->start_offset); + VertexExtra* dst2 = aligned_vector_at(target->extras, 0); + + for(int i = 0; i < count - 6; ++i) { + PREFETCH(src + 2); + *dst = *((Vertex*) src++); + dst++; + *dst2 = *((VertexExtra*) src++); + dst2++; + } + + PREFETCH(_glSubmissionTargetStart(target)); + PREFETCH(aligned_vector_at(target->extras, 0)); + + for(int i = count - 6; i < count; ++i) { + *dst = *((Vertex*) src++); + dst++; + *dst2 = *((VertexExtra*) src++); + dst2++; + } + + DO_TRANSFORM(); + } +} + + +void APIENTRY glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data) { + TRACE(); + + if(_glCheckImmediateModeInactive(__func__)) { + return; + } + + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + glDrawPVRArrays(mode, 32, first, count, data); +} + +void APIENTRY glDrawPVRArrays64KOS(GLenum mode, GLint first, GLsizei count, void* data) { + TRACE(); + + if(_glCheckImmediateModeInactive(__func__)) { + return; + } + + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + glDrawPVRArrays(mode, 64, first, count, data); +} + void APIENTRY glEnableClientState(GLenum cap) { TRACE(); diff --git a/GL/immediate.c b/GL/immediate.c index c0e2adc..1841c48 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -12,8 +12,6 @@ #include "private.h" -extern inline GLuint _glRecalcFastPath(); - GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; @@ -23,57 +21,28 @@ static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; static AlignedVector VERTICES; -static AttribPointerList IM_ATTRIBS; - -/* We store the list of attributes that have been "enabled" by a call to - glColor, glNormal, glTexCoord etc. otherwise we already have defaults that - can be applied faster */ -static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0; typedef struct { + uint32_t padding; GLfloat x; GLfloat y; GLfloat z; GLfloat u; GLfloat v; - GLfloat s; - GLfloat t; GLubyte bgra[4]; + GLubyte obgra[4]; GLfloat nx; GLfloat ny; GLfloat nz; - GLuint padding[5]; + GLfloat s; + GLfloat t; + GLuint padding2[3]; } IMVertex; void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(IMVertex)); aligned_vector_reserve(&VERTICES, initial_size); - - IM_ATTRIBS.vertex.ptr = VERTICES.data; - IM_ATTRIBS.vertex.size = 3; - IM_ATTRIBS.vertex.type = GL_FLOAT; - IM_ATTRIBS.vertex.stride = sizeof(IMVertex); - - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); - IM_ATTRIBS.uv.stride = sizeof(IMVertex); - IM_ATTRIBS.uv.type = GL_FLOAT; - IM_ATTRIBS.uv.size = 2; - - IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); - IM_ATTRIBS.st.stride = sizeof(IMVertex); - IM_ATTRIBS.st.type = GL_FLOAT; - IM_ATTRIBS.st.size = 2; - - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); - IM_ATTRIBS.colour.size = GL_BGRA; /* Flipped color order */ - IM_ATTRIBS.colour.type = GL_UNSIGNED_BYTE; - IM_ATTRIBS.colour.stride = sizeof(IMVertex); - - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t); - IM_ATTRIBS.normal.stride = sizeof(IMVertex); - IM_ATTRIBS.normal.type = GL_FLOAT; - IM_ATTRIBS.normal.size = 3; } void APIENTRY glBegin(GLenum mode) { @@ -87,8 +56,6 @@ void APIENTRY glBegin(GLenum mode) { } void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = (GLubyte)(a * 255.0f); COLOR[R8IDX] = (GLubyte)(r * 255.0f); COLOR[G8IDX] = (GLubyte)(g * 255.0f); @@ -96,8 +63,6 @@ void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { } void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = a; COLOR[R8IDX] = r; COLOR[G8IDX] = g; @@ -105,8 +70,6 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { } void APIENTRY glColor4ubv(const GLubyte *v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = v[3]; COLOR[R8IDX] = v[0]; COLOR[G8IDX] = v[1]; @@ -114,8 +77,6 @@ void APIENTRY glColor4ubv(const GLubyte *v) { } void APIENTRY glColor4fv(const GLfloat* v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[B8IDX] = (GLubyte)(v[2] * 255); COLOR[G8IDX] = (GLubyte)(v[1] * 255); COLOR[R8IDX] = (GLubyte)(v[0] * 255); @@ -123,8 +84,6 @@ void APIENTRY glColor4fv(const GLfloat* v) { } void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[B8IDX] = (GLubyte)(b * 255.0f); COLOR[G8IDX] = (GLubyte)(g * 255.0f); COLOR[R8IDX] = (GLubyte)(r * 255.0f); @@ -132,8 +91,6 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { } void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = 255; COLOR[R8IDX] = red; COLOR[G8IDX] = green; @@ -141,8 +98,6 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { } void APIENTRY glColor3ubv(const GLubyte *v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = 255; COLOR[R8IDX] = v[0]; COLOR[G8IDX] = v[1]; @@ -150,8 +105,6 @@ void APIENTRY glColor3ubv(const GLubyte *v) { } void APIENTRY glColor3fv(const GLfloat* v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = 255; COLOR[R8IDX] = (GLubyte)(v[0] * 255); COLOR[G8IDX] = (GLubyte)(v[1] * 255); @@ -159,20 +112,8 @@ void APIENTRY glColor3fv(const GLfloat* v) { } void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { - IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - - unsigned int cap = VERTICES.capacity; IMVertex* vert = aligned_vector_extend(&VERTICES, 1); - if(cap != VERTICES.capacity) { - /* Resizing could've invalidated the pointers */ - IM_ATTRIBS.vertex.ptr = VERTICES.data; - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); - IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t); - } - vert->x = x; vert->y = y; vert->z = z; @@ -211,11 +152,9 @@ void APIENTRY glVertex4fv(const GLfloat* v) { void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) { if(target == GL_TEXTURE0) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; UV_COORD[0] = s; UV_COORD[1] = t; } else if(target == GL_TEXTURE1) { - IM_ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG; ST_COORD[0] = s; ST_COORD[1] = t; } else { @@ -225,7 +164,6 @@ void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) { } void APIENTRY glTexCoord1f(GLfloat u) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; UV_COORD[0] = u; UV_COORD[1] = 0.0f; } @@ -235,7 +173,6 @@ void APIENTRY glTexCoord1fv(const GLfloat* v) { } void APIENTRY glTexCoord2f(GLfloat u, GLfloat v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; UV_COORD[0] = u; UV_COORD[1] = v; } @@ -245,7 +182,6 @@ void APIENTRY glTexCoord2fv(const GLfloat* v) { } void APIENTRY glNormal3f(GLfloat x, GLfloat y, GLfloat z) { - IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; NORMAL[0] = x; NORMAL[1] = y; NORMAL[2] = z; @@ -258,38 +194,9 @@ void APIENTRY glNormal3fv(const GLfloat* v) { void APIENTRY glEnd() { IMMEDIATE_MODE_ACTIVE = GL_FALSE; - GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES; - - /* Redirect attrib pointers */ - AttribPointerList stashed_attrib_pointers = ATTRIB_POINTERS; - ATTRIB_POINTERS = IM_ATTRIBS; - - GLuint prevAttrs = *attrs; - - *attrs = IM_ENABLED_VERTEX_ATTRIBUTES; - - /* Store the fast path enabled setting so we can restore it - * after drawing */ - const GLboolean fp_was_enabled = FAST_PATH_ENABLED; - -#ifndef NDEBUG - // Immediate mode should always activate the fast path - GLuint fastPathEnabled = _glRecalcFastPath(); - gl_assert(fastPathEnabled); -#else - /* If we're not debugging, set to true - we assume we haven't broken it! */ - FAST_PATH_ENABLED = GL_TRUE; -#endif - - glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size); - - ATTRIB_POINTERS = stashed_attrib_pointers; - - *attrs = prevAttrs; + glDrawPVRArrays64KOS(ACTIVE_POLYGON_MODE, 0, VERTICES.size, VERTICES.data); aligned_vector_clear(&VERTICES); - - FAST_PATH_ENABLED = fp_was_enabled; } void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) { diff --git a/GL/platforms/sh4.c b/GL/platforms/sh4.c index 05a8d6f..9c1f7dd 100644 --- a/GL/platforms/sh4.c +++ b/GL/platforms/sh4.c @@ -72,7 +72,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f; } -GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { +GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const uint32_t* s) { #ifndef NDEBUG gl_assert(!isnan(v->xyz[2])); gl_assert(!isnan(v->w)); @@ -81,8 +81,6 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) { #if CLIP_DEBUG printf("Submitting: %x (%x)\n", v, v->flags); #endif - - uint32_t *s = (uint32_t*) v; __asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */ d[0] = *(s++); d[1] = *(s++); @@ -172,37 +170,28 @@ GL_FORCE_INLINE void ShiftRotateTriangle() { #define SPAN_SORT_CFG 0x005F8030 -void SceneListSubmit(void* src, int n) { +static inline void submit_unclipped(uint32_t* d, Vertex* vertex, int n) { const float h = GetVideoMode()->height; - PVR_SET(SPAN_SORT_CFG, 0x0); - - uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS; - *PVR_LMMODE0 = 0x0; /* Enable 64bit mode */ - - Vertex __attribute__((aligned(32))) tmp; - - /* Perform perspective divide on each vertex */ - Vertex* vertex = (Vertex*) src; - - if(!_glNearZClippingEnabled()) { - /* Prep store queues */ - - for(int i = 0; i < n; ++i, ++vertex) { - PREFETCH(vertex + 1); - if(glIsVertex(vertex->flags)) { - _glPerspectiveDivideVertex(vertex, h); - } - _glSubmitHeaderOrVertex(d, vertex); + for(int i = 0; i < n; ++i, ++vertex) { + PREFETCH(vertex + 1); + if(glIsVertex(vertex->flags)) { + _glPerspectiveDivideVertex(vertex, h); } - - /* Wait for both store queues to complete */ - d = (uint32_t *) SQ_BASE_ADDRESS; - d[0] = d[8] = 0; - - return; + _glSubmitHeaderOrVertex(d, (const uint32_t*) vertex); } + /* Wait for both store queues to complete */ + d = (uint32_t *) SQ_BASE_ADDRESS; + d[0] = d[8] = 0; + + return; +} + +static inline void submit_clipped(uint32_t* d, Vertex* vertex, int n) { + static Vertex __attribute__((aligned(32))) tmp; + const float h = GetVideoMode()->height; + tri_count = 0; strip_count = 0; @@ -226,7 +215,7 @@ void SceneListSubmit(void* src, int n) { /* We hit a header */ tri_count = 0; strip_count = 0; - _glSubmitHeaderOrVertex(d, vertex); + _glSubmitHeaderOrVertex(d, (const uint32_t*) vertex); continue; } } @@ -257,11 +246,11 @@ void SceneListSubmit(void* src, int n) { tmp = *(vertex - 2); \ /* If we had triangles ahead of this one, submit and finalize */ \ _glPerspectiveDivideVertex(&tmp, h); \ - _glSubmitHeaderOrVertex(d, &tmp); \ + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); \ tmp = *(vertex - 1); \ tmp.flags = GPU_CMD_VERTEX_EOL; \ _glPerspectiveDivideVertex(&tmp, h); \ - _glSubmitHeaderOrVertex(d, &tmp); \ + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); \ } bool is_last_in_strip = glIsLastVertex(vertex->flags); @@ -273,17 +262,17 @@ void SceneListSubmit(void* src, int n) { tmp = *triangle[0].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[0].v, triangle[1].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[2].v, triangle[0].v, &tmp); tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); } break; case 2: { SUBMIT_QUEUED(); @@ -291,17 +280,17 @@ void SceneListSubmit(void* src, int n) { _glClipEdge(triangle[0].v, triangle[1].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); tmp = *triangle[1].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[1].v, triangle[2].v, &tmp); tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); } break; case 3: { SUBMIT_QUEUED(); @@ -309,22 +298,22 @@ void SceneListSubmit(void* src, int n) { tmp = *triangle[0].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); tmp = *triangle[1].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[2].v, triangle[0].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[1].v, triangle[2].v, &tmp); tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); } break; case 4: { SUBMIT_QUEUED(); @@ -332,17 +321,17 @@ void SceneListSubmit(void* src, int n) { _glClipEdge(triangle[1].v, triangle[2].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); tmp = *triangle[2].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[2].v, triangle[0].v, &tmp); tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); } break; case 5: { SUBMIT_QUEUED(); @@ -350,22 +339,22 @@ void SceneListSubmit(void* src, int n) { tmp = *triangle[0].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[0].v, triangle[1].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); tmp = *triangle[2].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[1].v, triangle[2].v, &tmp); tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); } break; case 6: { SUBMIT_QUEUED(); @@ -373,33 +362,33 @@ void SceneListSubmit(void* src, int n) { _glClipEdge(triangle[0].v, triangle[1].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); tmp = *triangle[1].v; tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); _glClipEdge(triangle[2].v, triangle[0].v, &tmp); tmp.flags = GPU_CMD_VERTEX; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); tmp = *triangle[2].v; tmp.flags = GPU_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(&tmp, h); - _glSubmitHeaderOrVertex(d, &tmp); + _glSubmitHeaderOrVertex(d, (const uint32_t*) &tmp); } break; case 7: { /* All the vertices are visible! We divide and submit v0, then shift */ _glPerspectiveDivideVertex(vertex - 2, h); - _glSubmitHeaderOrVertex(d, vertex - 2); + _glSubmitHeaderOrVertex(d, (const uint32_t*) (vertex - 2)); if(is_last_in_strip) { _glPerspectiveDivideVertex(vertex - 1, h); - _glSubmitHeaderOrVertex(d, vertex - 1); + _glSubmitHeaderOrVertex(d, (const uint32_t*) (vertex - 1)); _glPerspectiveDivideVertex(vertex, h); - _glSubmitHeaderOrVertex(d, vertex); + _glSubmitHeaderOrVertex(d, (const uint32_t*) vertex); tri_count = 0; strip_count = 0; } @@ -428,6 +417,25 @@ void SceneListSubmit(void* src, int n) { d[0] = d[8] = 0; } +void SceneListSubmit(void* src, int n) { + PVR_SET(SPAN_SORT_CFG, 0x0); + + uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS; + + *PVR_LMMODE0 = 0x0; /* Enable 64bit mode */ + *((volatile int *)0xA05F6888) = 1; + + /* Perform perspective divide on each vertex */ + Vertex* vertex = (Vertex*) src; + + if(!_glNearZClippingEnabled()) { + /* Prep store queues */ + submit_unclipped(d, vertex, n); + } else { + submit_clipped(d, vertex, n); + } +} + void SceneListFinish() { pvr_list_finish(); } diff --git a/GL/private.h b/GL/private.h index f309571..aad4657 100644 --- a/GL/private.h +++ b/GL/private.h @@ -245,6 +245,8 @@ do { \ typedef struct { float nxyz[3]; float st[2]; + float w; + uint32_t padding[2]; } VertexExtra; /* Generating PVR vertices from the user-submitted data gets complicated, particularly diff --git a/include/GL/glkos.h b/include/GL/glkos.h index e2ef11a..b894f8d 100644 --- a/include/GL/glkos.h +++ b/include/GL/glkos.h @@ -193,5 +193,60 @@ GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void); //for palette internal format (glfcConfig) #define GL_RGB565_KOS 0xEF40 +/** + * This is a custom extension to OpenGL that allows submitting data + * directly in the expected format and alignment of the Dreamcast PowerVR + * chip. + * + * Vertices sent to via this API should be 4-byte aligned (ideally 32) and + * use the following layout: + * + * uint32_t padding; // (Space for the PVR command header) + * float xyz[3]; // Pos + * float uv[2]; // Tex coord + * uint8_t bgra[4]; // Vertex color + * uint8_t obgra[4]; // Offset color (currently unused) + * + * This layout is 32 bytes in size and is copied directly into the PVR command stream. + * + * However, GL also allows for other attributes: normals, and secondary texture coordinates. + * + * For that reason, you can use this API to submit vertices in an "extended" format which + * is as follows: + * + * uint32_t header; // PVR command header + * float xyz[3]; // Pos + * float uv[2]; // Tex coord + * uint8_t bgra[4]; // Vertex color + * uint8_t obgra[4]; // Offset color (currently unused) + * float nxyz[3]; // Normal + * float st[2]; // Secondary texture coords + * uint32_t padding[3]; // Internal use. + */ + +/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation + will happen as usual. Enabled client state *is NOT* respected. + ERRORS: + + - GL_INVALID_ENUM is generated if mode is not an accepted value (GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP are accepted) + - GL_INVALID_VALUE is generated if count is negative + - GL_INVALID_VALUE is generated if data is NULL +*/ +void glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data); + +/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation + will happen as usual. Enabled client state *is NOT* respected. This uses the extended vertex format which includes + normals and secondary texture coordinates. These will only take effect if you've enabled secondary texture coordinates + with glEnable (for ST) or enabled lighting (for normals). GL_NORMALIZE is not respected. + + ERRORS: + + - GL_INVALID_ENUM is generated if mode is not an accepted value (GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP are accepted) + - GL_INVALID_VALUE is generated if count is negative + - GL_INVALID_VALUE is generated if data is NULL +*/ +void glDrawPVRArrays64KOS(GLenum mode, GLint first, GLsizei count, void* data); + + __END_DECLS diff --git a/samples/quadmark/main.c b/samples/quadmark/main.c index 4da3046..c5644c8 100644 --- a/samples/quadmark/main.c +++ b/samples/quadmark/main.c @@ -69,7 +69,11 @@ int check_start() { void setup() { //PVR needs to warm up for a frame, or results will be low - glKosInit(); + GLdcConfig config; + glKosInitConfig(&config); + config.initial_op_capacity = 7000 * 4; + config.initial_immediate_capacity = 7000 * 4; + glKosInitEx(&config); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glOrtho(0, 640, 0, 480, -100, 100); diff --git a/samples/terrain/main.c b/samples/terrain/main.c index d7f7545..030d5c0 100644 --- a/samples/terrain/main.c +++ b/samples/terrain/main.c @@ -9,7 +9,7 @@ #include "GL/glu.h" #include "GL/glkos.h" -#define TERRAIN_SIZE 100 +#define TERRAIN_SIZE 75 #define TERRAIN_SCALE 1.0f #define TERRAIN_HEIGHT_SCALE 1.0f @@ -155,11 +155,29 @@ int main(int argc, char **argv) InitGL(640, 480); ReSizeGLScene(640, 480); + uint64_t accum = 0; + uint64_t last_time = timer_us_gettime64(); + uint32_t frames = 0; + while(1) { if(check_start()) break; DrawGLScene(); + + ++frames; + uint64_t now = timer_us_gettime64(); + uint64_t diff = (now - last_time); + last_time = now; + accum += diff; + + if(accum > 5000000) { + printf("Frame time: %f\n", (float)accum / frames / 1000000.0f); + printf("FPS: %f\n", ((float) frames) / 5.0f); + fflush(stdout); + frames = 0; + accum = 0; + } } return 0;