Add a new submission API and switch immediate mode to it

This commit is contained in:
Luke Benstead 2023-03-19 20:30:35 +00:00
parent 0c5f941098
commit 279581c5a6
6 changed files with 276 additions and 130 deletions

225
GL/draw.c
View File

@ -1170,6 +1170,8 @@ void _glInitSubmissionTarget() {
GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) {
gl_assert(mode != GL_POLYGON);
SubmissionTarget* const target = &SUBMISSION_TARGET;
AlignedVector* const extras = target->extras;
@ -1185,37 +1187,8 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
return;
}
/* Polygons are treated as triangle fans, the only time this would be a
* problem is if we supported glPolygonMode(..., GL_LINE) but we don't.
* We optimise the triangle and quad cases.
*/
if(mode == GL_POLYGON) {
switch(count) {
case 2:
mode = GL_LINES;
break;
case 3:
mode = GL_TRIANGLES;
break;
case 4:
mode = GL_QUADS;
break;
default:
mode = GL_TRIANGLE_FAN;
}
}
if(mode == GL_LINE_STRIP || mode == GL_LINES) {
fprintf(stderr, "Line drawing is currently unsupported\n");
return;
}
GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty();
// We don't handle this any further, so just make sure we never pass it down */
gl_assert(mode != GL_POLYGON);
target->output = _glActivePolyList();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = target->output->vector.size;
@ -1308,6 +1281,35 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
// }
}
static GLenum convertModeIfNecessary(GLenum mode, GLsizei count) {
/* Polygons are treated as triangle fans, the only time this would be a
* problem is if we supported glPolygonMode(..., GL_LINE) but we don't.
* We optimise the triangle and quad cases.
*/
if(mode == GL_POLYGON) {
switch(count) {
case 2:
mode = GL_LINES;
break;
case 3:
mode = GL_TRIANGLES;
break;
case 4:
mode = GL_QUADS;
break;
default:
mode = GL_TRIANGLE_FAN;
}
}
if(mode == GL_LINE_STRIP || mode == GL_LINES) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return GL_NONE;
}
return mode;
}
void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) {
TRACE();
@ -1315,6 +1317,11 @@ void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvo
return;
}
mode = convertModeIfNecessary(mode, count);
if(mode == GL_NONE) {
return;
}
submitVertices(mode, 0, count, type, indices);
}
@ -1325,9 +1332,169 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) {
return;
}
mode = convertModeIfNecessary(mode, count);
if(mode == GL_NONE) {
return;
}
submitVertices(mode, first, count, GL_UNSIGNED_INT, NULL);
}
static void glDrawPVRArrays(GLenum mode, GLsizei stride, GLint first, GLsizei count, void* data) {
GLint validTypes[] = {
GL_TRIANGLES,
GL_QUADS,
GL_TRIANGLE_STRIP,
0
};
if(_glCheckValidEnum(mode, validTypes, __func__) != 0) {
return;
}
SubmissionTarget* const target = &SUBMISSION_TARGET;
AlignedVector* const extras = target->extras;
GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty();
target->output = _glActivePolyList();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = target->output->vector.size;
target->start_offset = target->header_offset + (header_required);
gl_assert(target->count);
/* Make sure we have enough room for all the "extra" data */
aligned_vector_resize(extras, target->count);
/* Make room for the vertices and header */
aligned_vector_extend(&target->output->vector, target->count + (header_required));
if(header_required) {
apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
_glGPUStateMarkClean();
}
PREFETCH(data + (first * 32));
if(_glIsLightingEnabled()) {
_glMatrixLoadModelView();
} else {
_glMatrixLoadModelViewProjection();
}
#define DO_TRANSFORM() \
do { \
Vertex* v = _glSubmissionTargetStart(target); \
VertexExtra* ve = aligned_vector_at(target->extras, 0); \
float w = 1.0f; \
if(mode == GL_QUADS) { \
const static uint32_t flags [] = { \
GPU_CMD_VERTEX, GPU_CMD_VERTEX, GPU_CMD_VERTEX_EOL, GPU_CMD_VERTEX \
}; \
for(int_fast32_t i = 0; i < count; ++i, ++v) { \
v->flags = flags[(i % 4)]; \
TransformVertex(v->xyz, &w, v->xyz, &v->w); \
} \
v = _glSubmissionTargetStart(target); \
Vertex* prev = (v + 2); \
v += 3; \
for(int_fast32_t i = 0; i < count; i +=4, v +=4, prev += 4) { \
const Vertex t = (*prev); \
*(prev) = *((v)); \
*((v)) = t; \
} \
} else if(mode == GL_TRIANGLES) { \
const static uint32_t flags [] = { \
GPU_CMD_VERTEX, GPU_CMD_VERTEX, GPU_CMD_VERTEX_EOL \
}; \
for(int_fast32_t i = 0; i < count; ++i, ++v) { \
PREFETCH(v + 3); \
TransformVertex(v->xyz, &w, v->xyz, &v->w); \
v->flags = flags[(i % 3)]; \
} \
} else { \
for(int_fast32_t i = 0; i < count; ++i, ++v) { \
PREFETCH(v + 6); \
TransformVertex(v->xyz, &w, v->xyz, &v->w); \
v->flags = GPU_CMD_VERTEX; \
} \
(v - 1)->flags = GPU_CMD_VERTEX_EOL; \
} \
} while(0)
if(stride == 32) {
uint8_t* src = (uint8_t*) data;
/* Copy the data directly */
sq_cpy(_glSubmissionTargetStart(target), src + (first * 32), count * 32);
DO_TRANSFORM();
} else {
assert(stride == 64);
struct {
uint32_t data[8];
}* src = data + (first * 32);
Vertex* dst = aligned_vector_at(&target->output->vector, target->start_offset);
VertexExtra* dst2 = aligned_vector_at(target->extras, 0);
for(int i = 0; i < count - 6; ++i) {
PREFETCH(src + 2);
*dst = *((Vertex*) src++);
dst++;
*dst2 = *((VertexExtra*) src++);
dst2++;
}
PREFETCH(_glSubmissionTargetStart(target));
PREFETCH(aligned_vector_at(target->extras, 0));
for(int i = count - 6; i < count; ++i) {
*dst = *((Vertex*) src++);
dst++;
*dst2 = *((VertexExtra*) src++);
dst2++;
}
DO_TRANSFORM();
}
}
void APIENTRY glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data) {
TRACE();
if(_glCheckImmediateModeInactive(__func__)) {
return;
}
mode = convertModeIfNecessary(mode, count);
if(mode == GL_NONE) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return;
}
glDrawPVRArrays(mode, 32, first, count, data);
}
void APIENTRY glDrawPVRArrays64KOS(GLenum mode, GLint first, GLsizei count, void* data) {
TRACE();
if(_glCheckImmediateModeInactive(__func__)) {
return;
}
mode = convertModeIfNecessary(mode, count);
if(mode == GL_NONE) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return;
}
glDrawPVRArrays(mode, 64, first, count, data);
}
void APIENTRY glEnableClientState(GLenum cap) {
TRACE();

View File

@ -12,8 +12,6 @@
#include "private.h"
extern inline GLuint _glRecalcFastPath();
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
@ -23,57 +21,28 @@ static GLfloat UV_COORD[2] = {0.0f, 0.0f};
static GLfloat ST_COORD[2] = {0.0f, 0.0f};
static AlignedVector VERTICES;
static AttribPointerList IM_ATTRIBS;
/* We store the list of attributes that have been "enabled" by a call to
glColor, glNormal, glTexCoord etc. otherwise we already have defaults that
can be applied faster */
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
typedef struct {
uint32_t padding;
GLfloat x;
GLfloat y;
GLfloat z;
GLfloat u;
GLfloat v;
GLfloat s;
GLfloat t;
GLubyte bgra[4];
GLubyte obgra[4];
GLfloat nx;
GLfloat ny;
GLfloat nz;
GLuint padding[5];
GLfloat s;
GLfloat t;
GLuint padding2[3];
} IMVertex;
void _glInitImmediateMode(GLuint initial_size) {
aligned_vector_init(&VERTICES, sizeof(IMVertex));
aligned_vector_reserve(&VERTICES, initial_size);
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.vertex.size = 3;
IM_ATTRIBS.vertex.type = GL_FLOAT;
IM_ATTRIBS.vertex.stride = sizeof(IMVertex);
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
IM_ATTRIBS.uv.stride = sizeof(IMVertex);
IM_ATTRIBS.uv.type = GL_FLOAT;
IM_ATTRIBS.uv.size = 2;
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
IM_ATTRIBS.st.stride = sizeof(IMVertex);
IM_ATTRIBS.st.type = GL_FLOAT;
IM_ATTRIBS.st.size = 2;
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
IM_ATTRIBS.colour.size = GL_BGRA; /* Flipped color order */
IM_ATTRIBS.colour.type = GL_UNSIGNED_BYTE;
IM_ATTRIBS.colour.stride = sizeof(IMVertex);
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t);
IM_ATTRIBS.normal.stride = sizeof(IMVertex);
IM_ATTRIBS.normal.type = GL_FLOAT;
IM_ATTRIBS.normal.size = 3;
}
void APIENTRY glBegin(GLenum mode) {
@ -87,8 +56,6 @@ void APIENTRY glBegin(GLenum mode) {
}
void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = (GLubyte)(a * 255.0f);
COLOR[R8IDX] = (GLubyte)(r * 255.0f);
COLOR[G8IDX] = (GLubyte)(g * 255.0f);
@ -96,8 +63,6 @@ void APIENTRY glColor4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
}
void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = a;
COLOR[R8IDX] = r;
COLOR[G8IDX] = g;
@ -105,8 +70,6 @@ void APIENTRY glColor4ub(GLubyte r, GLubyte g, GLubyte b, GLubyte a) {
}
void APIENTRY glColor4ubv(const GLubyte *v) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = v[3];
COLOR[R8IDX] = v[0];
COLOR[G8IDX] = v[1];
@ -114,8 +77,6 @@ void APIENTRY glColor4ubv(const GLubyte *v) {
}
void APIENTRY glColor4fv(const GLfloat* v) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[B8IDX] = (GLubyte)(v[2] * 255);
COLOR[G8IDX] = (GLubyte)(v[1] * 255);
COLOR[R8IDX] = (GLubyte)(v[0] * 255);
@ -123,8 +84,6 @@ void APIENTRY glColor4fv(const GLfloat* v) {
}
void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[B8IDX] = (GLubyte)(b * 255.0f);
COLOR[G8IDX] = (GLubyte)(g * 255.0f);
COLOR[R8IDX] = (GLubyte)(r * 255.0f);
@ -132,8 +91,6 @@ void APIENTRY glColor3f(GLfloat r, GLfloat g, GLfloat b) {
}
void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = 255;
COLOR[R8IDX] = red;
COLOR[G8IDX] = green;
@ -141,8 +98,6 @@ void APIENTRY glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
}
void APIENTRY glColor3ubv(const GLubyte *v) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = 255;
COLOR[R8IDX] = v[0];
COLOR[G8IDX] = v[1];
@ -150,8 +105,6 @@ void APIENTRY glColor3ubv(const GLubyte *v) {
}
void APIENTRY glColor3fv(const GLfloat* v) {
IM_ENABLED_VERTEX_ATTRIBUTES |= DIFFUSE_ENABLED_FLAG;
COLOR[A8IDX] = 255;
COLOR[R8IDX] = (GLubyte)(v[0] * 255);
COLOR[G8IDX] = (GLubyte)(v[1] * 255);
@ -159,20 +112,8 @@ void APIENTRY glColor3fv(const GLfloat* v) {
}
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
unsigned int cap = VERTICES.capacity;
IMVertex* vert = aligned_vector_extend(&VERTICES, 1);
if(cap != VERTICES.capacity) {
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t);
}
vert->x = x;
vert->y = y;
vert->z = z;
@ -211,11 +152,9 @@ void APIENTRY glVertex4fv(const GLfloat* v) {
void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) {
if(target == GL_TEXTURE0) {
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
UV_COORD[0] = s;
UV_COORD[1] = t;
} else if(target == GL_TEXTURE1) {
IM_ENABLED_VERTEX_ATTRIBUTES |= ST_ENABLED_FLAG;
ST_COORD[0] = s;
ST_COORD[1] = t;
} else {
@ -225,7 +164,6 @@ void APIENTRY glMultiTexCoord2fARB(GLenum target, GLfloat s, GLfloat t) {
}
void APIENTRY glTexCoord1f(GLfloat u) {
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
UV_COORD[0] = u;
UV_COORD[1] = 0.0f;
}
@ -235,7 +173,6 @@ void APIENTRY glTexCoord1fv(const GLfloat* v) {
}
void APIENTRY glTexCoord2f(GLfloat u, GLfloat v) {
IM_ENABLED_VERTEX_ATTRIBUTES |= UV_ENABLED_FLAG;
UV_COORD[0] = u;
UV_COORD[1] = v;
}
@ -245,7 +182,6 @@ void APIENTRY glTexCoord2fv(const GLfloat* v) {
}
void APIENTRY glNormal3f(GLfloat x, GLfloat y, GLfloat z) {
IM_ENABLED_VERTEX_ATTRIBUTES |= NORMAL_ENABLED_FLAG;
NORMAL[0] = x;
NORMAL[1] = y;
NORMAL[2] = z;
@ -258,38 +194,9 @@ void APIENTRY glNormal3fv(const GLfloat* v) {
void APIENTRY glEnd() {
IMMEDIATE_MODE_ACTIVE = GL_FALSE;
GLuint* attrs = &ENABLED_VERTEX_ATTRIBUTES;
/* Redirect attrib pointers */
AttribPointerList stashed_attrib_pointers = ATTRIB_POINTERS;
ATTRIB_POINTERS = IM_ATTRIBS;
GLuint prevAttrs = *attrs;
*attrs = IM_ENABLED_VERTEX_ATTRIBUTES;
/* Store the fast path enabled setting so we can restore it
* after drawing */
const GLboolean fp_was_enabled = FAST_PATH_ENABLED;
#ifndef NDEBUG
// Immediate mode should always activate the fast path
GLuint fastPathEnabled = _glRecalcFastPath();
gl_assert(fastPathEnabled);
#else
/* If we're not debugging, set to true - we assume we haven't broken it! */
FAST_PATH_ENABLED = GL_TRUE;
#endif
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
ATTRIB_POINTERS = stashed_attrib_pointers;
*attrs = prevAttrs;
glDrawPVRArrays64KOS(ACTIVE_POLYGON_MODE, 0, VERTICES.size, VERTICES.data);
aligned_vector_clear(&VERTICES);
FAST_PATH_ENABLED = fp_was_enabled;
}
void APIENTRY glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) {

View File

@ -245,6 +245,8 @@ do { \
typedef struct {
float nxyz[3];
float st[2];
float w;
uint32_t padding[2];
} VertexExtra;
/* Generating PVR vertices from the user-submitted data gets complicated, particularly

View File

@ -193,5 +193,59 @@ GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void);
//for palette internal format (glfcConfig)
#define GL_RGB565_KOS 0xEF40
/**
* This is a custom extension to OpenGL that allows submitting data
* directly in the expected format and alignment of the Dreamcast PowerVR
* chip.
*
* Vertices sent to via this API should be 4-byte aligned (ideally 32) and
* use the following layout:
*
* uint32_t padding; // (Space for the PVR command header)
* float xyz[3]; // Pos
* float uv[2]; // Tex coord
* uint8_t bgra[4]; // Vertex color
* uint8_t obgra[4]; // Offset color (currently unused)
*
* This layout is 32 bytes in size and is copied directly into the PVR command stream.
*
* However, GL also allows for other attributes: normals, and secondary texture coordinates.
*
* For that reason, you can use this API to submit vertices in an "extended" format which
* is as follows:
*
* uint32_t header; // PVR command header
* float xyz[3]; // Pos
* float uv[2]; // Tex coord
* uint8_t bgra[4]; // Vertex color
* uint8_t obgra[4]; // Offset color (currently unused)
* float nxyz[3]; // Normal
* float st[2]; // Secondary texture coords
* uint32_t padding[3]; // Internal use.
*/
/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation
will happen as usual. Enabled client state *is NOT* respected.
ERRORS:
- GL_INVALID_ENUM is generated if mode is not an accepted value (GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP are accepted)
- GL_INVALID_VALUE is generated if count is negative
- GL_INVALID_VALUE is generated if data is NULL
*/
void glDrawPVRArrays32KOS(GLenum mode, GLint first, GLsizei count, void* data);
/* Draw polygons by directly copying the vertices into the PVR command stream. Transformation
will happen as usual. Enabled client state *is NOT* respected. This uses the extended vertex format which includes
normals and secondary texture coordinates.
ERRORS:
- GL_INVALID_ENUM is generated if mode is not an accepted value (GL_TRIANGLES, GL_QUADS, GL_TRIANGLE_STRIP are accepted)
- GL_INVALID_VALUE is generated if count is negative
- GL_INVALID_VALUE is generated if data is NULL
*/
void glDrawPVRArrays64KOS(GLenum mode, GLint first, GLsizei count, void* data);
__END_DECLS

View File

@ -69,7 +69,11 @@ int check_start() {
void setup() {
//PVR needs to warm up for a frame, or results will be low
glKosInit();
GLdcConfig config;
glKosInitConfig(&config);
config.initial_op_capacity = 7000 * 4;
config.initial_immediate_capacity = 7000 * 4;
glKosInitEx(&config);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glOrtho(0, 640, 0, 480, -100, 100);

View File

@ -9,7 +9,7 @@
#include "GL/glu.h"
#include "GL/glkos.h"
#define TERRAIN_SIZE 100
#define TERRAIN_SIZE 75
#define TERRAIN_SCALE 1.0f
#define TERRAIN_HEIGHT_SCALE 1.0f
@ -155,11 +155,23 @@ int main(int argc, char **argv)
InitGL(640, 480);
ReSizeGLScene(640, 480);
uint64_t us = timer_us_gettime64();
uint32_t frames = 0;
while(1) {
if(check_start())
break;
DrawGLScene();
++frames;
uint64_t now = timer_us_gettime64();
uint64_t diff = (now - us);
if(diff > 5000000) {
printf("FPS: %f\n", ((float) frames) / 5.0f);
fflush(stdout);
frames = 0;
us = now;
}
}
return 0;