Merge branch '52-fast-path' into 'master'

Resolve "Implement a fast path"

Closes #52

See merge request simulant/GLdc!49
This commit is contained in:
Luke Benstead 2019-10-01 07:51:28 +00:00
commit 90eb264210
6 changed files with 216 additions and 61 deletions

139
GL/draw.c
View File

@ -19,7 +19,7 @@ static AttribPointer DIFFUSE_POINTER;
static GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
static GLubyte ACTIVE_CLIENT_TEXTURE = 0;
static GLboolean FAST_PATH_ENABLED = GL_FALSE;
#define ITERATE(count) \
GLuint i = count; \
@ -55,6 +55,44 @@ void _glInitAttributePointers() {
NORMAL_POINTER.size = 3;
}
static GLboolean _glIsVertexDataFastPathCompatible() {
/*
* We provide a "fast path" if vertex data is provided in
* exactly the right format that matches what the PVR can handle.
* This function returns true if all the requirements are met.
*/
/*
* At least these attributes need to be enabled, because we're not going to do any checking
* in the loop
*/
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) != VERTEX_ENABLED_FLAG) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) return GL_FALSE;
// All 3 attribute types must have a stride of 32
if(VERTEX_POINTER.stride != 32) return GL_FALSE;
if(UV_POINTER.stride != 32) return GL_FALSE;
if(DIFFUSE_POINTER.stride != 32) return GL_FALSE;
// UV must follow vertex, diffuse must follow UV
if((UV_POINTER.ptr - VERTEX_POINTER.ptr) != sizeof(GLfloat) * 3) return GL_FALSE;
if((DIFFUSE_POINTER.ptr - UV_POINTER.ptr) != sizeof(GLfloat) * 2) return GL_FALSE;
if(VERTEX_POINTER.type != GL_FLOAT) return GL_FALSE;
if(VERTEX_POINTER.size != 3) return GL_FALSE;
if(UV_POINTER.type != GL_FLOAT) return GL_FALSE;
if(UV_POINTER.size != 2) return GL_FALSE;
if(DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) return GL_FALSE;
/* BGRA is the required color order */
if(DIFFUSE_POINTER.size != GL_BGRA) return GL_FALSE;
return GL_TRUE;
}
static inline GLuint byte_size(GLenum type) {
switch(type) {
case GL_BYTE: return sizeof(GLbyte);
@ -304,8 +342,8 @@ static void _readVertexData4ubARGB(const GLubyte* input, GLuint count, GLubyte s
output[B8IDX] = input[2];
output[A8IDX] = input[3];
input = (GLubyte*) (((GLubyte*) input) + stride);
output = (GLubyte*) (((GLubyte*) output) + sizeof(Vertex));
input += stride;
output += sizeof(Vertex);
}
}
@ -345,6 +383,30 @@ static void _readVertexData3ubARGB(const GLubyte* input, GLuint count, GLubyte s
}
}
static void _readVertexData4ubRevARGB(const GLubyte* input, GLuint count, GLubyte stride, GLubyte* output) {
ITERATE(count) {
output[0] = input[0];
output[1] = input[1];
output[2] = input[2];
output[3] = input[3];
input += stride;
output += sizeof(Vertex);
}
}
static void _readVertexData4fRevARGB(const float* input, GLuint count, GLubyte stride, GLubyte* output) {
ITERATE(count) {
output[0] = (GLubyte) clamp(input[0] * 255.0f, 0, 255);
output[1] = (GLubyte) clamp(input[1] * 255.0f, 0, 255);
output[2] = (GLubyte) clamp(input[2] * 255.0f, 0, 255);
output[3] = (GLubyte) clamp(input[3] * 255.0f, 0, 255);
input = (float*) (((GLubyte*) input) + stride);
output += sizeof(Vertex);
}
}
static void _fillWithNegZVE(GLuint count, GLfloat* output) {
ITERATE(count) {
output[0] = output[1] = 0.0f;
@ -394,6 +456,14 @@ static void _readVertexData4uiARGB(const GLuint* input, GLuint count, GLubyte st
assert(0 && "Not Implemented");
}
static void _readVertexData4usRevARGB(const GLushort* input, GLuint count, GLubyte stride, GLubyte* output) {
assert(0 && "Not Implemented");
}
static void _readVertexData4uiRevARGB(const GLuint* input, GLuint count, GLubyte stride, GLubyte* output) {
assert(0 && "Not Implemented");
}
GLuint* _glGetEnabledAttributes() {
return &ENABLED_VERTEX_ATTRIBUTES;
}
@ -712,7 +782,7 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Vert
}
const GLubyte cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
const void* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr + (first * cstride));
const void* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr) + (first * cstride);
if(DIFFUSE_POINTER.size == 3) {
switch(DIFFUSE_POINTER.type) {
@ -756,6 +826,27 @@ static inline void _readDiffuseData(const GLuint first, const GLuint count, Vert
default:
assert(0 && "Not Implemented");
}
} else if(DIFFUSE_POINTER.size == GL_BGRA) {
switch(DIFFUSE_POINTER.type) {
case GL_DOUBLE:
case GL_FLOAT:
_readVertexData4fRevARGB(cptr, count, cstride, output[0].bgra);
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
_readVertexData4ubRevARGB(cptr, count, cstride, output[0].bgra);
break;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
_readVertexData4usRevARGB(cptr, count, cstride, output[0].bgra);
break;
case GL_INT:
case GL_UNSIGNED_INT:
_readVertexData4uiRevARGB(cptr, count, cstride, output[0].bgra);
break;
default:
assert(0 && "Not Implemented");
}
}else {
assert(0 && "Not Implemented");
}
@ -766,6 +857,7 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
/* Read from the client buffers and generate an array of ClipVertices */
TRACE();
static const uint32_t FAST_PATH_BYTE_SIZE = (sizeof(GLfloat) * 3) + (sizeof(GLfloat) * 2) + (sizeof(GLubyte) * 4);
const GLsizei istride = byte_size(type);
if(!indices) {
@ -773,6 +865,17 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
Vertex* start = _glSubmissionTargetStart(target);
if(FAST_PATH_ENABLED) {
/* Copy the pos, uv and color directly in one go */
const GLfloat* pos = VERTEX_POINTER.ptr;
Vertex* it = start;
ITERATE(count) {
it->flags = PVR_CMD_VERTEX;
memcpy(it->xyz, pos, FAST_PATH_BYTE_SIZE);
it++;
pos += 32 / sizeof(GLfloat);
}
} else {
_readPositionData(first, count, start);
profiler_checkpoint("positions");
@ -781,20 +884,19 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
if(doTexture) _readUVData(first, count, start);
VertexExtra* ve = aligned_vector_at(target->extras, 0);
if(doLighting) _readNormalData(first, count, ve);
if(doTexture && doMultitexture) _readSTData(first, count, ve);
profiler_checkpoint("others");
Vertex* it = _glSubmissionTargetStart(target);
ITERATE(count) {
it->flags = PVR_CMD_VERTEX;
++it;
}
}
profiler_checkpoint("flags");
VertexExtra* ve = aligned_vector_at(target->extras, 0);
if(doLighting) _readNormalData(first, count, ve);
if(doTexture && doMultitexture) _readSTData(first, count, ve);
profiler_checkpoint("others");
// Drawing arrays
switch(mode) {
@ -1292,6 +1394,11 @@ void APIENTRY glClientActiveTextureARB(GLenum texture) {
ACTIVE_CLIENT_TEXTURE = (texture == GL_TEXTURE1_ARB) ? 1 : 0;
}
GLboolean _glRecalcFastPath() {
FAST_PATH_ENABLED = _glIsVertexDataFastPathCompatible();
return FAST_PATH_ENABLED;
}
void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
TRACE();
@ -1307,6 +1414,8 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, cons
tointer->stride = stride;
tointer->type = type;
tointer->size = size;
_glRecalcFastPath();
}
void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
@ -1322,12 +1431,14 @@ void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const
VERTEX_POINTER.stride = stride;
VERTEX_POINTER.type = type;
VERTEX_POINTER.size = size;
_glRecalcFastPath();
}
void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid * pointer) {
TRACE();
if(size != 3 && size != 4) {
if(size != 3 && size != 4 && size != GL_BGRA) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
_glKosPrintError();
return;
@ -1337,6 +1448,8 @@ void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const G
DIFFUSE_POINTER.stride = stride;
DIFFUSE_POINTER.type = type;
DIFFUSE_POINTER.size = size;
_glRecalcFastPath();
}
void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * pointer) {
@ -1346,4 +1459,6 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin
NORMAL_POINTER.stride = stride;
NORMAL_POINTER.type = type;
NORMAL_POINTER.size = 3;
_glRecalcFastPath();
}

View File

@ -7,8 +7,12 @@
* 3. This is entirely untested.
*/
#include <string.h>
#include <stdio.h>
#include "../include/gl.h"
#include "../include/glext.h"
#include "../include/glkos.h"
#include "profiler.h"
#include "private.h"
@ -17,18 +21,14 @@ static GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
static AlignedVector VERTICES;
static AlignedVector COLOURS;
static AlignedVector UV_COORDS;
static AlignedVector ST_COORDS;
static AlignedVector NORMALS;
static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f};
static GLubyte COLOR[4] = {255, 255, 255, 255};
static GLfloat UV_COORD[2] = {0.0f, 0.0f};
static GLfloat ST_COORD[2] = {0.0f, 0.0f};
static AttribPointer VERTEX_ATTRIB;
static AttribPointer DIFFUSE_ATTRIB;
static AttribPointer UV_ATTRIB;
@ -36,42 +36,38 @@ static AttribPointer ST_ATTRIB;
static AttribPointer NORMAL_ATTRIB;
void _glInitImmediateMode(GLuint initial_size) {
aligned_vector_init(&VERTICES, sizeof(GLfloat));
aligned_vector_init(&COLOURS, sizeof(GLubyte));
aligned_vector_init(&UV_COORDS, sizeof(GLfloat));
aligned_vector_init(&VERTICES, sizeof(GLVertexKOS));
aligned_vector_init(&ST_COORDS, sizeof(GLfloat));
aligned_vector_init(&NORMALS, sizeof(GLfloat));
aligned_vector_reserve(&VERTICES, initial_size);
aligned_vector_reserve(&COLOURS, initial_size);
aligned_vector_reserve(&UV_COORDS, initial_size);
aligned_vector_reserve(&ST_COORDS, initial_size);
aligned_vector_reserve(&NORMALS, initial_size);
aligned_vector_reserve(&ST_COORDS, initial_size * 2);
aligned_vector_reserve(&NORMALS, initial_size * 3);
VERTEX_ATTRIB.ptr = VERTICES.data;
VERTEX_ATTRIB.ptr = VERTICES.data + sizeof(uint32_t);
VERTEX_ATTRIB.size = 3;
VERTEX_ATTRIB.type = GL_FLOAT;
VERTEX_ATTRIB.stride = 0;
VERTEX_ATTRIB.stride = 32;
DIFFUSE_ATTRIB.ptr = COLOURS.data;
DIFFUSE_ATTRIB.size = 4;
DIFFUSE_ATTRIB.type = GL_UNSIGNED_BYTE;
DIFFUSE_ATTRIB.stride = 0;
UV_ATTRIB.ptr = UV_COORDS.data;
UV_ATTRIB.stride = 0;
UV_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 3);
UV_ATTRIB.stride = 32;
UV_ATTRIB.type = GL_FLOAT;
UV_ATTRIB.size = 2;
ST_ATTRIB.ptr = ST_COORDS.data;
ST_ATTRIB.stride = 0;
ST_ATTRIB.type = GL_FLOAT;
ST_ATTRIB.size = 2;
DIFFUSE_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 5);
DIFFUSE_ATTRIB.size = GL_BGRA; /* Flipped color order */
DIFFUSE_ATTRIB.type = GL_UNSIGNED_BYTE;
DIFFUSE_ATTRIB.stride = 32;
NORMAL_ATTRIB.ptr = NORMALS.data;
NORMAL_ATTRIB.stride = 0;
NORMAL_ATTRIB.type = GL_FLOAT;
NORMAL_ATTRIB.size = 3;
ST_ATTRIB.ptr = ST_COORDS.data;
ST_ATTRIB.stride = 0;
ST_ATTRIB.type = GL_FLOAT;
ST_ATTRIB.size = 2;
}
GLubyte _glCheckImmediateModeInactive(const char* func) {
@ -146,17 +142,23 @@ void APIENTRY glColor3fv(const GLfloat* v) {
}
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
aligned_vector_reserve(&VERTICES, VERTICES.size + 3);
aligned_vector_push_back(&VERTICES, &x, 1);
aligned_vector_push_back(&VERTICES, &y, 1);
aligned_vector_push_back(&VERTICES, &z, 1);
GLVertexKOS* vert = aligned_vector_extend(&VERTICES, 1);
GLfloat* st = aligned_vector_extend(&ST_COORDS, 2);
GLfloat* n = aligned_vector_extend(&NORMALS, 3);
vert->x = x;
vert->y = y;
vert->z = z;
vert->u = UV_COORD[0];
vert->v = UV_COORD[1];
/* Push back the stashed colour, normal and uv_coordinate */
aligned_vector_push_back(&COLOURS, COLOR, 4);
aligned_vector_push_back(&UV_COORDS, UV_COORD, 2);
aligned_vector_push_back(&ST_COORDS, ST_COORD, 2);
aligned_vector_push_back(&NORMALS, NORMAL, 3);
vert->bgra[R8IDX] = COLOR[0];
vert->bgra[G8IDX] = COLOR[1];
vert->bgra[B8IDX] = COLOR[2];
vert->bgra[A8IDX] = COLOR[3];
memcpy(st, ST_COORD, sizeof(GLfloat) * 2);
memcpy(n, NORMAL, sizeof(GLfloat) * 3);
}
void APIENTRY glVertex3fv(const GLfloat* v) {
@ -218,11 +220,12 @@ void APIENTRY glEnd() {
IMMEDIATE_MODE_ACTIVE = GL_FALSE;
/* Resizing could have invalidated these pointers */
VERTEX_ATTRIB.ptr = VERTICES.data;
DIFFUSE_ATTRIB.ptr = COLOURS.data;
UV_ATTRIB.ptr = UV_COORDS.data;
ST_ATTRIB.ptr = ST_COORDS.data;
VERTEX_ATTRIB.ptr = VERTICES.data + sizeof(uint32_t);
UV_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 3);
DIFFUSE_ATTRIB.ptr = VERTEX_ATTRIB.ptr + (sizeof(GLfloat) * 5);
NORMAL_ATTRIB.ptr = NORMALS.data;
ST_ATTRIB.ptr = ST_COORDS.data;
GLuint* attrs = _glGetEnabledAttributes();
@ -250,7 +253,15 @@ void APIENTRY glEnd() {
*attrs = ~0; // Enable everything
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size / 3);
#ifndef NDEBUG
_glRecalcFastPath();
#else
// Immediate mode should always activate the fast path
GLboolean fastPathEnabled = _glRecalcFastPath();
assert(fastPathEnabled);
#endif
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
/* Restore everything */
*vattr = vptr;
@ -263,8 +274,6 @@ void APIENTRY glEnd() {
/* Clear arrays for next polys */
aligned_vector_clear(&VERTICES);
aligned_vector_clear(&COLOURS);
aligned_vector_clear(&UV_COORDS);
aligned_vector_clear(&ST_COORDS);
aligned_vector_clear(&NORMALS);

View File

@ -316,6 +316,8 @@ GLboolean _glIsLightingEnabled();
GLboolean _glIsLightEnabled(GLubyte light);
GLboolean _glIsColorMaterialEnabled();
GLboolean _glRecalcFastPath();
typedef struct {
float xyz[3];
float n[3];

View File

@ -671,7 +671,7 @@ const GLubyte *glGetString(GLenum name) {
return (const GLubyte*) "1.2 (partial) - GLdc 1.1";
case GL_EXTENSIONS:
return (const GLubyte*) "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette";
return (const GLubyte*) "GL_ARB_framebuffer_object, GL_ARB_multitexture, GL_ARB_texture_rg, GL_EXT_paletted_texture, GL_EXT_shared_texture_palette, GL_KOS_multiple_shared_palette, GL_ARB_vertex_array_bgra";
}
return (const GLubyte*) "GL_KOS_ERROR: ENUM Unsupported\n";

15
GL/util.c Normal file
View File

@ -0,0 +1,15 @@
#include "../include/glkos.h"
void APIENTRY glVertexPackColor3fKOS(GLVertexKOS* vertex, float r, float g, float b) {
vertex->color[3] = 255;
vertex->color[2] = (r * 255.0f);
vertex->color[1] = (g * 255.0f);
vertex->color[0] = (b * 255.0f);
}
void APIENTRY glVertexPackColor4fKOS(GLVertexKOS* vertex, float r, float g, float b, float a) {
vertex->color[3] = (a * 255.0f);
vertex->color[2] = (r * 255.0f);
vertex->color[1] = (g * 255.0f);
vertex->color[0] = (b * 255.0f);
}

View File

@ -53,6 +53,20 @@ typedef struct {
} GLdcConfig;
typedef struct {
GLuint padding0;
GLfloat x;
GLfloat y;
GLfloat z;
GLfloat u;
GLfloat v;
GLubyte bgra[4];
GLuint padding1;
} GLVertexKOS;
GLAPI void APIENTRY glVertexPackColor3fKOS(GLVertexKOS* vertex, float r, float g, float b);
GLAPI void APIENTRY glVertexPackColor4fKOS(GLVertexKOS* vertex, float r, float g, float b, float a);
GLAPI void APIENTRY glKosInitConfig(GLdcConfig* config);
/* Usage: