From 279581c5a6fbea7caa11bc32235fb36f541bd796 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 19 Mar 2023 20:30:35 +0000 Subject: [PATCH] Add a new submission API and switch immediate mode to it --- GL/draw.c | 225 ++++++++++++++++++++++++++++++++++------ GL/immediate.c | 105 ++----------------- GL/private.h | 2 + include/GL/glkos.h | 54 ++++++++++ samples/quadmark/main.c | 6 +- samples/terrain/main.c | 14 ++- 6 files changed, 276 insertions(+), 130 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index aea1f34..f910333 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -1170,6 +1170,8 @@ void _glInitSubmissionTarget() { GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) { + gl_assert(mode != GL_POLYGON); + SubmissionTarget* const target = &SUBMISSION_TARGET; AlignedVector* const extras = target->extras; @@ -1185,37 +1187,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL return; } - /* Polygons are treated as triangle fans, the only time this would be a - * problem is if we supported glPolygonMode(..., GL_LINE) but we don't. - * We optimise the triangle and quad cases. - */ - if(mode == GL_POLYGON) { - switch(count) { - case 2: - mode = GL_LINES; - break; - case 3: - mode = GL_TRIANGLES; - break; - case 4: - mode = GL_QUADS; - break; - default: - mode = GL_TRIANGLE_FAN; - } - } - - if(mode == GL_LINE_STRIP || mode == GL_LINES) { - fprintf(stderr, "Line drawing is currently unsupported\n"); - return; - } - GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty(); - - // We don't handle this any further, so just make sure we never pass it down */ - gl_assert(mode != GL_POLYGON); - target->output = _glActivePolyList(); target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; target->header_offset = target->output->vector.size; @@ -1308,6 +1281,35 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL // } } +static GLenum convertModeIfNecessary(GLenum mode, GLsizei count) { + /* Polygons are treated as triangle fans, the only time this would be a + * problem is if we supported glPolygonMode(..., GL_LINE) but we don't. + * We optimise the triangle and quad cases. + */ + if(mode == GL_POLYGON) { + switch(count) { + case 2: + mode = GL_LINES; + break; + case 3: + mode = GL_TRIANGLES; + break; + case 4: + mode = GL_QUADS; + break; + default: + mode = GL_TRIANGLE_FAN; + } + } + + if(mode == GL_LINE_STRIP || mode == GL_LINES) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return GL_NONE; + } + + return mode; +} + void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { TRACE(); @@ -1315,6 +1317,11 @@ void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvo return; } + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + return; + } + submitVertices(mode, 0, count, type, indices); } @@ -1325,9 +1332,169 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) { return; } + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + return; + } + submitVertices(mode, first, count, GL_UNSIGNED_INT, NULL); } +static void glDrawPVRArrays(GLenum mode, GLsizei stride, GLint first, GLsizei count, void* data) { + GLint validTypes[] = { + GL_TRIANGLES, + GL_QUADS, + GL_TRIANGLE_STRIP, + 0 + }; + + if(_glCheckValidEnum(mode, validTypes, __func__) != 0) { + return; + } + + SubmissionTarget* const target = &SUBMISSION_TARGET; + AlignedVector* const extras = target->extras; + + GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty(); + + target->output = _glActivePolyList(); + target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count; + target->header_offset = target->output->vector.size; + target->start_offset = target->header_offset + (header_required); + + gl_assert(target->count); + + /* Make sure we have enough room for all the "extra" data */ + aligned_vector_resize(extras, target->count); + + /* Make room for the vertices and header */ + aligned_vector_extend(&target->output->vector, target->count + (header_required)); + + if(header_required) { + apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0); + _glGPUStateMarkClean(); + } + + PREFETCH(data + (first * 32)); + + if(_glIsLightingEnabled()) { + _glMatrixLoadModelView(); + } else { + _glMatrixLoadModelViewProjection(); + } + +#define DO_TRANSFORM() \ + do { \ + Vertex* v = _glSubmissionTargetStart(target); \ + VertexExtra* ve = aligned_vector_at(target->extras, 0); \ + float w = 1.0f; \ + if(mode == GL_QUADS) { \ + const static uint32_t flags [] = { \ + GPU_CMD_VERTEX, GPU_CMD_VERTEX, GPU_CMD_VERTEX_EOL, GPU_CMD_VERTEX \ + }; \ + for(int_fast32_t i = 0; i < count; ++i, ++v) { \ + v->flags = flags[(i % 4)]; \ + TransformVertex(v->xyz, &w, v->xyz, &v->w); \ + } \ + v = _glSubmissionTargetStart(target); \ + Vertex* prev = (v + 2); \ + v += 3; \ + for(int_fast32_t i = 0; i < count; i +=4, v +=4, prev += 4) { \ + const Vertex t = (*prev); \ + *(prev) = *((v)); \ + *((v)) = t; \ + } \ + } else if(mode == GL_TRIANGLES) { \ + const static uint32_t flags [] = { \ + GPU_CMD_VERTEX, GPU_CMD_VERTEX, GPU_CMD_VERTEX_EOL \ + }; \ + for(int_fast32_t i = 0; i < count; ++i, ++v) { \ + PREFETCH(v + 3); \ + TransformVertex(v->xyz, &w, v->xyz, &v->w); \ + v->flags = flags[(i % 3)]; \ + } \ + } else { \ + for(int_fast32_t i = 0; i < count; ++i, ++v) { \ + PREFETCH(v + 6); \ + TransformVertex(v->xyz, &w, v->xyz, &v->w); \ + v->flags = GPU_CMD_VERTEX; \ + } \ + (v - 1)->flags = GPU_CMD_VERTEX_EOL; \ + } \ + } while(0) + + + if(stride == 32) { + uint8_t* src = (uint8_t*) data; + /* Copy the data directly */ + sq_cpy(_glSubmissionTargetStart(target), src + (first * 32), count * 32); + + DO_TRANSFORM(); + } else { + assert(stride == 64); + + struct { + uint32_t data[8]; + }* src = data + (first * 32); + + Vertex* dst = aligned_vector_at(&target->output->vector, target->start_offset); + VertexExtra* dst2 = aligned_vector_at(target->extras, 0); + + for(int i = 0; i < count - 6; ++i) { + PREFETCH(src + 2); + *dst = *((Vertex*) src++); + dst++; + *dst2 = *((VertexExtra*) src++); + dst2++; + } + + PREFETCH(_glSubmissionTargetStart(target)); + PREFETCH(aligned_vector_at(target->extras, 0)); + + for(int i = count - 6; i < count; ++i) { + *dst = *((Vertex*) src++); + dst++; + *dst2 = *((VertexExtra*) src++); + dst2++; + } + + DO_TRANSFORM(); + } +} + + +void APIENTRY glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data) { + TRACE(); + + if(_glCheckImmediateModeInactive(__func__)) { + return; + } + + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + glDrawPVRArrays(mode, 32, first, count, data); +} + +void APIENTRY glDrawPVRArrays64KOS(GLenum mode, GLint first, GLsizei count, void* data) { + TRACE(); + + if(_glCheckImmediateModeInactive(__func__)) { + return; + } + + mode = convertModeIfNecessary(mode, count); + if(mode == GL_NONE) { + _glKosThrowError(GL_INVALID_VALUE, __func__); + return; + } + + glDrawPVRArrays(mode, 64, first, count, data); +} + void APIENTRY glEnableClientState(GLenum cap) { TRACE(); diff --git a/GL/immediate.c b/GL/immediate.c index c0e2adc..1841c48 100644 --- a/GL/immediate.c +++ b/GL/immediate.c @@ -12,8 +12,6 @@ #include "private.h" -extern inline GLuint _glRecalcFastPath(); - GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE; static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES; @@ -23,57 +21,28 @@ static GLfloat UV_COORD[2] = {0.0f, 0.0f}; static GLfloat ST_COORD[2] = {0.0f, 0.0f}; static AlignedVector VERTICES; -static AttribPointerList IM_ATTRIBS; - -/* We store the list of attributes that have been "enabled" by a call to - glColor, glNormal, glTexCoord etc. otherwise we already have defaults that - can be applied faster */ -static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0; typedef struct { + uint32_t padding; GLfloat x; GLfloat y; GLfloat z; GLfloat u; GLfloat v; - GLfloat s; - GLfloat t; GLubyte bgra[4]; + GLubyte obgra[4]; GLfloat nx; GLfloat ny; GLfloat nz; - GLuint padding[5]; + GLfloat s; + GLfloat t; + GLuint padding2[3]; } IMVertex; void _glInitImmediateMode(GLuint initial_size) { aligned_vector_init(&VERTICES, sizeof(IMVertex)); aligned_vector_reserve(&VERTICES, initial_size); - - IM_ATTRIBS.vertex.ptr = VERTICES.data; - IM_ATTRIBS.vertex.size = 3; - IM_ATTRIBS.vertex.type = GL_FLOAT; - IM_ATTRIBS.vertex.stride = sizeof(IMVertex); - - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); - IM_ATTRIBS.uv.stride = sizeof(IMVertex); - IM_ATTRIBS.uv.type = GL_FLOAT; - IM_ATTRIBS.uv.size = 2; - - IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); - IM_ATTRIBS.st.stride = sizeof(IMVertex); - IM_ATTRIBS.st.type = GL_FLOAT; - IM_ATTRIBS.st.size = 2; - - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); - IM_ATTRIBS.colour.size = GL_BGRA; /* Flipped color order */ - IM_ATTRIBS.colour.type = GL_UNSIGNED_BYTE; - IM_ATTRIBS.colour.stride = sizeof(IMVertex); - - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t); - IM_ATTRIBS.normal.stride = sizeof(IMVertex); - IM_ATTRIBS.normal.type = GL_FLOAT; - IM_ATTRIBS.normal.size = 3; } void APIENTRY glBegin(GLenum mode) { @@ -87,8 +56,6 @@ void APIENTRY glBegin(GLenum mode) { } void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = (GLubyte)(a * 255.0f); COLOR[R8IDX] = (GLubyte)(r * 255.0f); COLOR[G8IDX] = (GLubyte)(g * 255.0f); @@ -96,8 +63,6 @@ void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { } void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = a; COLOR[R8IDX] = r; COLOR[G8IDX] = g; @@ -105,8 +70,6 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) { } void APIENTRY glColor4ubv(const GLubyte *v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = v[3]; COLOR[R8IDX] = v[0]; COLOR[G8IDX] = v[1]; @@ -114,8 +77,6 @@ void APIENTRY glColor4ubv(const GLubyte *v) { } void APIENTRY glColor4fv(const GLfloat* v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[B8IDX] = (GLubyte)(v[2] * 255); COLOR[G8IDX] = (GLubyte)(v[1] * 255); COLOR[R8IDX] = (GLubyte)(v[0] * 255); @@ -123,8 +84,6 @@ void APIENTRY glColor4fv(const GLfloat* v) { } void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[B8IDX] = (GLubyte)(b * 255.0f); COLOR[G8IDX] = (GLubyte)(g * 255.0f); COLOR[R8IDX] = (GLubyte)(r * 255.0f); @@ -132,8 +91,6 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) { } void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = 255; COLOR[R8IDX] = red; COLOR[G8IDX] = green; @@ -141,8 +98,6 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) { } void APIENTRY glColor3ubv(const GLubyte *v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = 255; COLOR[R8IDX] = v[0]; COLOR[G8IDX] = v[1]; @@ -150,8 +105,6 @@ void APIENTRY glColor3ubv(const GLubyte *v) { } void APIENTRY glColor3fv(const GLfloat* v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG; - COLOR[A8IDX] = 255; COLOR[R8IDX] = (GLubyte)(v[0] * 255); COLOR[G8IDX] = (GLubyte)(v[1] * 255); @@ -159,20 +112,8 @@ void APIENTRY glColor3fv(const GLfloat* v) { } void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) { - IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG; - - unsigned int cap = VERTICES.capacity; IMVertex* vert = aligned_vector_extend(&VERTICES, 1); - if(cap != VERTICES.capacity) { - /* Resizing could've invalidated the pointers */ - IM_ATTRIBS.vertex.ptr = VERTICES.data; - IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3); - IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5); - IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7); - IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t); - } - vert->x = x; vert->y = y; vert->z = z; @@ -211,11 +152,9 @@ void APIENTRY glVertex4fv(const GLfloat* v) { void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) { if(target == GL_TEXTURE0) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; UV_COORD[0] = s; UV_COORD[1] = t; } else if(target == GL_TEXTURE1) { - IM_ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG; ST_COORD[0] = s; ST_COORD[1] = t; } else { @@ -225,7 +164,6 @@ void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) { } void APIENTRY glTexCoord1f(GLfloat u) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; UV_COORD[0] = u; UV_COORD[1] = 0.0f; } @@ -235,7 +173,6 @@ void APIENTRY glTexCoord1fv(const GLfloat* v) { } void APIENTRY glTexCoord2f(GLfloat u, GLfloat v) { - IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG; UV_COORD[0] = u; UV_COORD[1] = v; } @@ -245,7 +182,6 @@ void APIENTRY glTexCoord2fv(const GLfloat* v) { } void APIENTRY glNormal3f(GLfloat x, GLfloat y, GLfloat z) { - IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG; NORMAL[0] = x; NORMAL[1] = y; NORMAL[2] = z; @@ -258,38 +194,9 @@ void APIENTRY glNormal3fv(const GLfloat* v) { void APIENTRY glEnd() { IMMEDIATE_MODE_ACTIVE = GL_FALSE; - GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES; - - /* Redirect attrib pointers */ - AttribPointerList stashed_attrib_pointers = ATTRIB_POINTERS; - ATTRIB_POINTERS = IM_ATTRIBS; - - GLuint prevAttrs = *attrs; - - *attrs = IM_ENABLED_VERTEX_ATTRIBUTES; - - /* Store the fast path enabled setting so we can restore it - * after drawing */ - const GLboolean fp_was_enabled = FAST_PATH_ENABLED; - -#ifndef NDEBUG - // Immediate mode should always activate the fast path - GLuint fastPathEnabled = _glRecalcFastPath(); - gl_assert(fastPathEnabled); -#else - /* If we're not debugging, set to true - we assume we haven't broken it! */ - FAST_PATH_ENABLED = GL_TRUE; -#endif - - glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size); - - ATTRIB_POINTERS = stashed_attrib_pointers; - - *attrs = prevAttrs; + glDrawPVRArrays64KOS(ACTIVE_POLYGON_MODE, 0, VERTICES.size, VERTICES.data); aligned_vector_clear(&VERTICES); - - FAST_PATH_ENABLED = fp_was_enabled; } void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) { diff --git a/GL/private.h b/GL/private.h index 7db1b13..2596ac3 100644 --- a/GL/private.h +++ b/GL/private.h @@ -245,6 +245,8 @@ do { \ typedef struct { float nxyz[3]; float st[2]; + float w; + uint32_t padding[2]; } VertexExtra; /* Generating PVR vertices from the user-submitted data gets complicated, particularly diff --git a/include/GL/glkos.h b/include/GL/glkos.h index e2ef11a..3ffb2d6 100644 --- a/include/GL/glkos.h +++ b/include/GL/glkos.h @@ -193,5 +193,59 @@ GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void); //for palette internal format (glfcConfig) #define GL_RGB565_KOS 0xEF40 +/** + * This is a custom extension to OpenGL that allows submitting data + * directly in the expected format and alignment of the Dreamcast PowerVR + * chip. + * + * Vertices sent to via this API should be 4-byte aligned (ideally 32) and + * use the following layout: + * + * uint32_t padding; // (Space for the PVR command header) + * float xyz[3]; // Pos + * float uv[2]; // Tex coord + * uint8_t bgra[4]; // Vertex color + * uint8_t obgra[4]; // Offset color (currently unused) + * + * This layout is 32 bytes in size and is copied directly into the PVR command stream. + * + * However, GL also allows for other attributes: normals, and secondary texture coordinates. + * + * For that reason, you can use this API to submit vertices in an "extended" format which + * is as follows: + * + * uint32_t header; // PVR command header + * float xyz[3]; // Pos + * float uv[2]; // Tex coord + * uint8_t bgra[4]; // Vertex color + * uint8_t obgra[4]; // Offset color (currently unused) + * float nxyz[3]; // Normal + * float st[2]; // Secondary texture coords + * uint32_t padding[3]; // Internal use. + */ + +/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation + will happen as usual. Enabled client state *is NOT* respected. + ERRORS: + + - GL_INVALID_ENUM is generated if mode is not an accepted value (GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP are accepted) + - GL_INVALID_VALUE is generated if count is negative + - GL_INVALID_VALUE is generated if data is NULL +*/ +void glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data); + +/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation + will happen as usual. Enabled client state *is NOT* respected. This uses the extended vertex format which includes + normals and secondary texture coordinates. + + ERRORS: + + - GL_INVALID_ENUM is generated if mode is not an accepted value (GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP are accepted) + - GL_INVALID_VALUE is generated if count is negative + - GL_INVALID_VALUE is generated if data is NULL +*/ +void glDrawPVRArrays64KOS(GLenum mode, GLint first, GLsizei count, void* data); + + __END_DECLS diff --git a/samples/quadmark/main.c b/samples/quadmark/main.c index 4da3046..c5644c8 100644 --- a/samples/quadmark/main.c +++ b/samples/quadmark/main.c @@ -69,7 +69,11 @@ int check_start() { void setup() { //PVR needs to warm up for a frame, or results will be low - glKosInit(); + GLdcConfig config; + glKosInitConfig(&config); + config.initial_op_capacity = 7000 * 4; + config.initial_immediate_capacity = 7000 * 4; + glKosInitEx(&config); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glOrtho(0, 640, 0, 480, -100, 100); diff --git a/samples/terrain/main.c b/samples/terrain/main.c index d7f7545..3f2812b 100644 --- a/samples/terrain/main.c +++ b/samples/terrain/main.c @@ -9,7 +9,7 @@ #include "GL/glu.h" #include "GL/glkos.h" -#define TERRAIN_SIZE 100 +#define TERRAIN_SIZE 75 #define TERRAIN_SCALE 1.0f #define TERRAIN_HEIGHT_SCALE 1.0f @@ -155,11 +155,23 @@ int main(int argc, char **argv) InitGL(640, 480); ReSizeGLScene(640, 480); + uint64_t us = timer_us_gettime64(); + uint32_t frames = 0; while(1) { if(check_start()) break; DrawGLScene(); + + ++frames; + uint64_t now = timer_us_gettime64(); + uint64_t diff = (now - us); + if(diff > 5000000) { + printf("FPS: %f\n", ((float) frames) / 5.0f); + fflush(stdout); + frames = 0; + us = now; + } } return 0;