Merge remote-tracking branch 'origin/master'

This commit is contained in:
Luke Benstead 2021-04-21 21:31:21 +01:00
commit bb53dad90d
4 changed files with 156 additions and 61 deletions

182
GL/draw.c
View File

@ -53,39 +53,49 @@ void _glInitAttributePointers() {
} }
GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() { GL_FORCE_INLINE GLboolean _glIsVertexDataFastPathCompatible() {
/* /* The fast path is enabled when all enabled elements of the vertex
* We provide a "fast path" if vertex data is provided in * match the output format. This means:
* exactly the right format that matches what the PVR can handle. *
* This function returns true if all the requirements are met. * xyz == 3f
* uv == 2f
* rgba == argb4444
* st == 2f
* normal == 3f
*
* When this happens we do inline straight copies of the enabled data
* and transforms for positions and normals happen while copying.
*/ */
/* if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
* At least these attributes need to be enabled, because we're not going to do any checking if(VERTEX_POINTER.size != 3 || VERTEX_POINTER.type != GL_FLOAT) {
* in the loop return GL_FALSE;
*/ }
if((ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) != VERTEX_ENABLED_FLAG) return GL_FALSE; }
if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) != UV_ENABLED_FLAG) return GL_FALSE;
if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) != DIFFUSE_ENABLED_FLAG) return GL_FALSE;
// All 3 attribute types must have a stride of 32 if((ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG)) {
if(VERTEX_POINTER.stride != 32) return GL_FALSE; if(UV_POINTER.size != 2 || UV_POINTER.type != GL_FLOAT) {
if(UV_POINTER.stride != 32) return GL_FALSE; return GL_FALSE;
if(DIFFUSE_POINTER.stride != 32) return GL_FALSE; }
}
// UV must follow vertex, diffuse must follow UV if((ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG)) {
if((UV_POINTER.ptr - VERTEX_POINTER.ptr) != sizeof(GLfloat) * 3) return GL_FALSE; /* FIXME: Shouldn't this be a reversed format? */
if((DIFFUSE_POINTER.ptr - UV_POINTER.ptr) != sizeof(GLfloat) * 2) return GL_FALSE; if(DIFFUSE_POINTER.size != GL_BGRA || DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) {
return GL_FALSE;
}
}
if(VERTEX_POINTER.type != GL_FLOAT) return GL_FALSE; if((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG)) {
if(VERTEX_POINTER.size != 3) return GL_FALSE; if(ST_POINTER.size != 2 || ST_POINTER.type != GL_FLOAT) {
return GL_FALSE;
}
}
if(UV_POINTER.type != GL_FLOAT) return GL_FALSE; if((ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG)) {
if(UV_POINTER.size != 2) return GL_FALSE; if(NORMAL_POINTER.size != 3 || NORMAL_POINTER.type != GL_FLOAT) {
return GL_FALSE;
if(DIFFUSE_POINTER.type != GL_UNSIGNED_BYTE) return GL_FALSE; }
}
/* BGRA is the required color order */
if(DIFFUSE_POINTER.size != GL_BGRA) return GL_FALSE;
return GL_TRUE; return GL_TRUE;
} }
@ -616,11 +626,10 @@ ReadNormalFunc calcReadNormalFunc() {
} }
} }
void _readPositionData(const GLuint first, const GLuint count, const Vertex* output) { static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLsizei vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type); const GLsizei vstride = (VERTEX_POINTER.stride) ? VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
const void* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride)); const GLubyte* vptr = ((GLubyte*) VERTEX_POINTER.ptr + (first * vstride));
ReadDiffuseFunc func = calcReadPositionFunc();
GLubyte* out = (GLubyte*) output[0].xyz; GLubyte* out = (GLubyte*) output[0].xyz;
uint32_t* flags; uint32_t* flags;
@ -637,11 +646,10 @@ void _readPositionData(const GLuint first, const GLuint count, const Vertex* out
} }
} }
void _readUVData(const GLuint first, const GLuint count, const Vertex* output) { static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLsizei uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type); const GLsizei uvstride = (UV_POINTER.stride) ? UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
const void* uvptr = ((GLubyte*) UV_POINTER.ptr + (first * uvstride)); const GLubyte* uvptr = ((GLubyte*) UV_POINTER.ptr + (first * uvstride));
ReadUVFunc func = calcReadUVFunc();
GLubyte* out = (GLubyte*) output[0].uv; GLubyte* out = (GLubyte*) output[0].uv;
ITERATE(count) { ITERATE(count) {
@ -651,11 +659,10 @@ void _readUVData(const GLuint first, const GLuint count, const Vertex* output) {
} }
} }
void _readSTData(const GLuint first, const GLuint count, const VertexExtra* extra) { static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) {
const GLsizei ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type); const GLsizei ststride = (ST_POINTER.stride) ? ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const void* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride)); const GLubyte* stptr = ((GLubyte*) ST_POINTER.ptr + (first * ststride));
ReadUVFunc func = calcReadSTFunc();
GLubyte* out = (GLubyte*) extra[0].st; GLubyte* out = (GLubyte*) extra[0].st;
ITERATE(count) { ITERATE(count) {
@ -665,11 +672,10 @@ void _readSTData(const GLuint first, const GLuint count, const VertexExtra* extr
} }
} }
void _readNormalData(const GLuint first, const GLuint count, const VertexExtra* extra) { static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) {
const GLsizei nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type); const GLsizei nstride = (NORMAL_POINTER.stride) ? NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
const void* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride)); const GLubyte* nptr = ((GLubyte*) NORMAL_POINTER.ptr + (first * nstride));
ReadNormalFunc func = calcReadNormalFunc();
GLubyte* out = (GLubyte*) extra[0].nxyz; GLubyte* out = (GLubyte*) extra[0].nxyz;
ITERATE(count) { ITERATE(count) {
@ -696,12 +702,11 @@ void _readNormalData(const GLuint first, const GLuint count, const VertexExtra*
} }
} }
void _readDiffuseData(const GLuint first, const GLuint count, const Vertex* output) { static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLuint size = (DIFFUSE_POINTER.size == GL_BGRA) ? 4 : DIFFUSE_POINTER.size; const GLuint size = (DIFFUSE_POINTER.size == GL_BGRA) ? 4 : DIFFUSE_POINTER.size;
const GLuint cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : size * byte_size(DIFFUSE_POINTER.type); const GLuint cstride = (DIFFUSE_POINTER.stride) ? DIFFUSE_POINTER.stride : size * byte_size(DIFFUSE_POINTER.type);
const GLubyte* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr) + (first * cstride); const GLubyte* cptr = ((GLubyte*) DIFFUSE_POINTER.ptr) + (first * cstride);
ReadDiffuseFunc func = calcReadDiffuseFunc();
GLubyte* out = (GLubyte*) output[0].bgra; GLubyte* out = (GLubyte*) output[0].bgra;
ITERATE(count) { ITERATE(count) {
@ -772,35 +777,87 @@ static void generateElements(
} }
} }
static const uint32_t FAST_PATH_BYTE_SIZE = (sizeof(GLfloat) * 3) + (sizeof(GLfloat) * 2) + (sizeof(GLubyte) * 4);
static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) { static void generateArraysFastPath(SubmissionTarget* target, const GLsizei first, const GLuint count) {
Vertex* start = _glSubmissionTargetStart(target); Vertex* start = _glSubmissionTargetStart(target);
const GLuint vstride = (VERTEX_POINTER.stride) ?
VERTEX_POINTER.stride : VERTEX_POINTER.size * byte_size(VERTEX_POINTER.type);
const GLuint uvstride = (UV_POINTER.stride) ?
UV_POINTER.stride : UV_POINTER.size * byte_size(UV_POINTER.type);
const GLuint ststride = (ST_POINTER.stride) ?
ST_POINTER.stride : ST_POINTER.size * byte_size(ST_POINTER.type);
const GLuint dstride = (DIFFUSE_POINTER.stride) ?
DIFFUSE_POINTER.stride : DIFFUSE_POINTER.size * byte_size(DIFFUSE_POINTER.type);
const GLuint nstride = (NORMAL_POINTER.stride) ?
NORMAL_POINTER.stride : NORMAL_POINTER.size * byte_size(NORMAL_POINTER.type);
/* Copy the pos, uv and color directly in one go */ /* Copy the pos, uv and color directly in one go */
const GLubyte* pos = VERTEX_POINTER.ptr; const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? VERTEX_POINTER.ptr + (first * vstride) : NULL;
Vertex* it = start; const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? UV_POINTER.ptr + (first * uvstride) : NULL;
ITERATE(count) { const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? DIFFUSE_POINTER.ptr + (first * dstride) : NULL;
it->flags = GPU_CMD_VERTEX; const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ST_POINTER.ptr + (first * ststride) : NULL;
MEMCPY4(it->xyz, pos, FAST_PATH_BYTE_SIZE); const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? NORMAL_POINTER.ptr + (first * nstride) : NULL;
it++;
pos += VERTEX_POINTER.stride;
}
VertexExtra* ve = aligned_vector_at(target->extras, 0); VertexExtra* ve = aligned_vector_at(target->extras, 0);
Vertex* it = start;
_readNormalData(first, count, ve); const float w = 1.0f;
_readSTData(first, count, ve);
uint32_t i = count;
while(i--) {
it->flags = GPU_CMD_VERTEX;
if(pos) {
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
pos += vstride;
}
if(uv) {
MEMCPY4(it->uv, uv, sizeof(float) * 2);
uv += uvstride;
}
if(col) {
MEMCPY4(it->bgra, col, sizeof(uint32_t));
col += dstride;
}
if(st) {
MEMCPY4(ve->st, st, sizeof(float) * 2);
st += ststride;
}
if(n) {
MEMCPY4(ve->nxyz, n, sizeof(float) * 3);
n += nstride;
}
it++;
ve++;
}
} }
static void generateArrays(SubmissionTarget* target, const GLsizei first, const GLuint count) { static void generateArrays(SubmissionTarget* target, const GLsizei first, const GLuint count) {
Vertex* start = _glSubmissionTargetStart(target); Vertex* start = _glSubmissionTargetStart(target);
VertexExtra* ve = aligned_vector_at(target->extras, 0); VertexExtra* ve = aligned_vector_at(target->extras, 0);
_readPositionData(first, count, start); ReadPositionFunc pfunc = calcReadPositionFunc();
_readDiffuseData(first, count, start); ReadDiffuseFunc dfunc = calcReadDiffuseFunc();
_readUVData(first, count, start); ReadUVFunc uvfunc = calcReadUVFunc();
_readNormalData(first, count, ve); ReadNormalFunc nfunc = calcReadNormalFunc();
_readSTData(first, count, ve); ReadUVFunc stfunc = calcReadSTFunc();
_readPositionData(pfunc, first, count, start);
_readDiffuseData(dfunc, first, count, start);
_readUVData(uvfunc, first, count, start);
_readNormalData(nfunc, first, count, ve);
_readSTData(stfunc, first, count, ve);
} }
static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count, static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei first, const GLuint count,
@ -842,8 +899,6 @@ static void transform(SubmissionTarget* target) {
/* Perform modelview transform, storing W */ /* Perform modelview transform, storing W */
Vertex* vertex = _glSubmissionTargetStart(target); Vertex* vertex = _glSubmissionTargetStart(target);
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
TransformVertices(vertex, target->count); TransformVertices(vertex, target->count);
} }
@ -1047,13 +1102,18 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
/* Make room for the vertices and header */ /* Make room for the vertices and header */
aligned_vector_extend(&target->output->vector, target->count + 1); aligned_vector_extend(&target->output->vector, target->count + 1);
_glApplyRenderMatrix(); /* Apply the Render Matrix Stack */
generate(target, mode, first, count, (GLubyte*) indices, type); generate(target, mode, first, count, (GLubyte*) indices, type);
if(doLighting){ if(doLighting){
light(target); light(target);
} }
transform(target); if(!FAST_PATH_ENABLED) {
transform(target);
}
if(_glIsClippingEnabled()) { if(_glIsClippingEnabled()) {
#if DEBUG_CLIPPING #if DEBUG_CLIPPING

View File

@ -69,6 +69,25 @@ inline void TransformVec4(float* x) {
} }
static inline void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
register float __x __asm__("fr12") = (xyz[0]);
register float __y __asm__("fr13") = (xyz[1]);
register float __z __asm__("fr14") = (xyz[2]);
register float __w __asm__("fr15") = (*w);
__asm__ __volatile__(
"fldi1 fr15\n"
"ftrv xmtrx,fv12\n"
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w)
: "0" (__x), "1" (__y), "2" (__z), "3" (__w)
);
oxyz[0] = __x;
oxyz[1] = __y;
oxyz[2] = __z;
*ow = __w;
}
static inline void TransformVertices(Vertex* vertices, const int count) { static inline void TransformVertices(Vertex* vertices, const int count) {
Vertex* it = vertices; Vertex* it = vertices;
for(int i = 0; i < count; ++i, ++it) { for(int i = 0; i < count; ++i, ++it) {

View File

@ -328,3 +328,18 @@ void TransformVertices(Vertex* vertices, const int count) {
vertices->w = ret[3]; vertices->w = ret[3];
} }
} }
void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
float ret[4];
ret[0] = xyz[0];
ret[1] = xyz[1];
ret[2] = xyz[2];
ret[3] = *w;
TransformVec4(ret);
oxyz[0] = ret[0];
oxyz[1] = ret[1];
oxyz[2] = ret[2];
*ow = ret[3];
}

View File

@ -50,6 +50,7 @@ static inline void TransformNormalNoMod(const float* xIn, float* xOut) {
} }
void TransformVertices(Vertex* vertices, const int count); void TransformVertices(Vertex* vertices, const int count);
void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow);
void InitGPU(_Bool autosort, _Bool fsaa); void InitGPU(_Bool autosort, _Bool fsaa);