Merge branch 'master' of https://gitlab.com/simulant/GLdc

2020-05-13 19:05:29 -04:00 · 2020-05-13 19:05:29 -04:00 · 294819108d
commit 294819108d
parent 4e47b30b0a 01cb46a99e
3 changed files with 99 additions and 98 deletions
--- a/GL/draw.c
+++ b/GL/draw.c
@ -114,14 +114,8 @@ typedef void (*FloatParseFunc)(GLfloat* out, const GLubyte* in);
 typedef void (*ByteParseFunc)(GLubyte* out, const GLubyte* in);
 typedef void (*PolyBuildFunc)(Vertex* first, Vertex* previous, Vertex* vertex, Vertex* next, const GLsizei i);

-
-GL_FORCE_INLINE float clamp(float d, float min, float max) {
-    const float t = d < min ? min : d;
-    return t > max ? max : t;
-}
-
 static void _readVertexData3f3f(const GLubyte* in, GLubyte* out) {
-    memcpy(out, in, sizeof(float) * 12);
+    vec3cpy(out, in);
 }

 // 10:10:10:2REV format
@ -177,15 +171,14 @@ static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) {
 }

 static void _readVertexData2f2f(const GLubyte* in, GLubyte* out) {
-    memcpy(out, in, sizeof(float) * 2);
+    vec2cpy(out, in);
 }

 static void _readVertexData2f3f(const GLubyte* in, GLubyte* out) {
    const float* input = (const float*) in;
    float* output = (float*) out;

-    output[0] = input[0];
-    output[1] = input[1];
+    vec2cpy(output, input);
    output[2] = 0.0f;
 }

@ -937,36 +930,34 @@ static void light(SubmissionTarget* target) {
    _glPerformLighting(vertex, ES, target->count);
 }

+#define PVR_MIN_Z 0.2f
+#define PVR_MAX_Z 1.0 + PVR_MIN_Z
+
 GL_FORCE_INLINE void divide(SubmissionTarget* target) {
    TRACE();

    /* Perform perspective divide on each vertex */
    Vertex* vertex = _glSubmissionTargetStart(target);

+    /* PVR expects with invW or invZ as the final depth coordinate,
+     * but there are issues with that. Using invW means that orthographic
+     * projections fail (because W is always 1). invZ fails when stuff is near
+     * the near plane (because it ends up <= 0) so what we do is take invZ
+     * and add the near-plane distance (plus an epsilon value) to take it above 0
+     * then invert that. */
+
+    Matrix4x4* proj = _glGetProjectionMatrix();
+    float m22 = (*proj)[10];
+    float m32 = (*proj)[14];
+    float zNear = MATH_Fast_Divide((2.0f * m32), (2.0f * m22 - 2.0f));
+
    ITERATE(target->count) {
        float f = MATH_Fast_Invert(vertex->w);
        vertex->xyz[0] *= f;
        vertex->xyz[1] *= f;
-
-        /* Unlike normal GL graphics, the PVR takes Z coordinates from +EPSILON to +inf
-         * this is annoying because a traditional Z divide plus shift may end up with
-         * a coordinate of 0 which isn't valid. This is because the PVR
-         * expects invW as the coordinate, but that breaks orthographic projections
-         *
-         * So instead, we do a normal z/w divide, but shift from -1 to +1, and
-         * make it 0.001f to 2.001f, then we divide by 0.5 to bring it to
-         * 0.0005 to 1.0005 and finally we invert by subtracting from 1.001
-         * to just ensure we never end up with a value at 0.0 due to rounding
-         * errors */
-        vertex->xyz[2] = 1.001f - (((vertex->xyz[2] * f) + 1.001f) * 0.5f);
-
-        /* FIXME: Consider taking glDepthRange into account. PVR is designed to use 1/w
-         * which is unlike most GPUs - this apparently provides advantages.
-         *
-         * This can be done (if Z is between -1 and 1) with:
-         *
-         * //((DEPTH_RANGE_MULTIPLIER_L * vertex->xyz[2] * f) + DEPTH_RANGE_MULTIPLIER_H);
-         */
+        vertex->xyz[2] = MATH_Fast_Invert(
+            vertex->xyz[2] + zNear + 0.05f
+        );
        ++vertex;
    }
 }
--- a/GL/lighting.c
+++ b/GL/lighting.c
@ -78,27 +78,29 @@ void _glEnableLight(GLubyte light, GLboolean value) {
 }

 GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask) {
-    float baseColour[4];
-
    /* Pre-calculate lighting values */
-    GLubyte i;
+    GLshort i;

-    for(i = 0; i < MAX_LIGHTS; ++i) {
-        if(mask & AMBIENT_MASK) {
+    if(mask & AMBIENT_MASK) {
+        for(i = 0; i < MAX_LIGHTS; ++i) {
            LIGHTS[i].ambientMaterial[0] = LIGHTS[i].ambient[0] * MATERIAL.ambient[0];
            LIGHTS[i].ambientMaterial[1] = LIGHTS[i].ambient[1] * MATERIAL.ambient[1];
            LIGHTS[i].ambientMaterial[2] = LIGHTS[i].ambient[2] * MATERIAL.ambient[2];
            LIGHTS[i].ambientMaterial[3] = LIGHTS[i].ambient[3] * MATERIAL.ambient[3];
        }
+    }

-        if(mask & DIFFUSE_MASK) {
+    if(mask & DIFFUSE_MASK) {
+        for(i = 0; i < MAX_LIGHTS; ++i) {
            LIGHTS[i].diffuseMaterial[0] = LIGHTS[i].diffuse[0] * MATERIAL.diffuse[0];
            LIGHTS[i].diffuseMaterial[1] = LIGHTS[i].diffuse[1] * MATERIAL.diffuse[1];
            LIGHTS[i].diffuseMaterial[2] = LIGHTS[i].diffuse[2] * MATERIAL.diffuse[2];
            LIGHTS[i].diffuseMaterial[3] = LIGHTS[i].diffuse[3] * MATERIAL.diffuse[3];
        }
+    }

-        if(mask & SPECULAR_MASK) {
+    if(mask & SPECULAR_MASK) {
+        for(i = 0; i < MAX_LIGHTS; ++i) {
            LIGHTS[i].specularMaterial[0] = LIGHTS[i].specular[0] * MATERIAL.specular[0];
            LIGHTS[i].specularMaterial[1] = LIGHTS[i].specular[1] * MATERIAL.specular[1];
            LIGHTS[i].specularMaterial[2] = LIGHTS[i].specular[2] * MATERIAL.specular[2];
@ -109,15 +111,10 @@ GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask) {
    /* If ambient or emission are updated, we need to update
     * the base colour. */
    if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) {
-        baseColour[0] = SCENE_AMBIENT[0] * MATERIAL.ambient[0] + MATERIAL.emissive[0];
-        baseColour[1] = SCENE_AMBIENT[1] * MATERIAL.ambient[1] + MATERIAL.emissive[1];
-        baseColour[2] = SCENE_AMBIENT[2] * MATERIAL.ambient[2] + MATERIAL.emissive[2];
-        baseColour[3] = SCENE_AMBIENT[3] * MATERIAL.ambient[3] + MATERIAL.emissive[3];
-
-        MATERIAL.baseColour[R8IDX] = (uint8_t)(_MIN(baseColour[0] * 255.0f, 255.0f));
-        MATERIAL.baseColour[G8IDX] = (uint8_t)(_MIN(baseColour[1] * 255.0f, 255.0f));
-        MATERIAL.baseColour[B8IDX] = (uint8_t)(_MIN(baseColour[2] * 255.0f, 255.0f));
-        MATERIAL.baseColour[A8IDX] = (uint8_t)(_MIN(baseColour[3] * 255.0f, 255.0f));
+        MATERIAL.baseColour[0] = MATH_fmac(SCENE_AMBIENT[0], MATERIAL.ambient[0], MATERIAL.emissive[0]);
+        MATERIAL.baseColour[1] = MATH_fmac(SCENE_AMBIENT[1], MATERIAL.ambient[1], MATERIAL.emissive[1]);
+        MATERIAL.baseColour[2] = MATH_fmac(SCENE_AMBIENT[2], MATERIAL.ambient[2], MATERIAL.emissive[2]);
+        MATERIAL.baseColour[3] = MATH_fmac(SCENE_AMBIENT[3], MATERIAL.ambient[3], MATERIAL.emissive[3]);
    }
 }

@ -326,11 +323,23 @@ void APIENTRY glColorMaterial(GLenum face, GLenum mode) {
    COLOR_MATERIAL_MODE = mode;
 }

-void _glUpdateColourMaterial(GLfloat* colour) {
+GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) {
+    static const float scale = 1.0f / 255.0f;
+
+    output[0] = ((float) input[R8IDX]) * scale;
+    output[1] = ((float) input[G8IDX]) * scale;
+    output[2] = ((float) input[B8IDX]) * scale;
+    output[3] = ((float) input[A8IDX]) * scale;
+}
+
+void _glUpdateColourMaterial(const GLubyte* argb) {
    if(!_glIsColorMaterialEnabled()) {
        return;
    }

+    float colour[4];
+    bgra_to_float(argb, colour);
+
    switch(COLOR_MATERIAL_MODE) {
        case GL_AMBIENT:
            vec4cpy(MATERIAL.ambient, colour);
@ -396,76 +405,56 @@ GL_FORCE_INLINE float faster_pow(const float x, const float p) {
 }

 GL_FORCE_INLINE void _glLightVertexDirectional(
-    uint8_t* final, uint8_t lid,
+    float* final, uint8_t lid,
    float LdotN, float NdotH) {

    float FI = (MATERIAL.exponent) ?
        faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f;

-#define _PROCESS_COMPONENT(T, X) \
-    do { \
-        float F = (LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
-            + (FI * LIGHTS[lid].specularMaterial[X]); \
-        uint8_t FO = (uint8_t) (_MIN(F * 255.0f, 255.0f)); \
-        final[T] += _MIN(FO, 255 - final[T]); \
-    } while(0);
+#define _PROCESS_COMPONENT(X) \
+    final[X] += (LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
+        + (FI * LIGHTS[lid].specularMaterial[X]); \

-    _PROCESS_COMPONENT(R8IDX, 0);
-    _PROCESS_COMPONENT(G8IDX, 1);
-    _PROCESS_COMPONENT(B8IDX, 2);
+    _PROCESS_COMPONENT(0);
+    _PROCESS_COMPONENT(1);
+    _PROCESS_COMPONENT(2);

 #undef _PROCESS_COMPONENT
 }

 GL_FORCE_INLINE void _glLightVertexPoint(
-    uint8_t* final, uint8_t lid,
+    float* final, uint8_t lid,
    float LdotN, float NdotH, float att) {

    float FI = (MATERIAL.exponent) ?
        faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f;

-#define _PROCESS_COMPONENT(T, X) \
-    do { \
-        float F = (LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
-            + (FI * LIGHTS[lid].specularMaterial[X]); \
-        uint8_t FO = (uint8_t) (_MIN(F * att * 255.0f, 255.0f)); \
-        final[T] += _MIN(FO, 255 - final[T]); \
-    } while(0); \
+#define _PROCESS_COMPONENT(X) \
+    final[X] += ((LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
+        + (FI * LIGHTS[lid].specularMaterial[X])) * att; \

-    _PROCESS_COMPONENT(R8IDX, 0);
-    _PROCESS_COMPONENT(G8IDX, 1);
-    _PROCESS_COMPONENT(B8IDX, 2);
+    _PROCESS_COMPONENT(0);
+    _PROCESS_COMPONENT(1);
+    _PROCESS_COMPONENT(2);

 #undef _PROCESS_COMPONENT
 }

-GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) {
-    static const float scale = 1.0f / 255.0f;
-
-    output[0] = ((float) input[R8IDX]) * scale;
-    output[1] = ((float) input[G8IDX]) * scale;
-    output[2] = ((float) input[B8IDX]) * scale;
-    output[3] = ((float) input[A8IDX]) * scale;
-}
-
 void _glPerformLighting(Vertex* vertices, const EyeSpaceData* es, const int32_t count) {
-    uint8_t i;
+    int16_t i;
    int32_t j;

    Vertex* vertex = vertices;
    const EyeSpaceData* data = es;

-    /* This is the original vertex colour, before we replace it. It's
-     * used for colour material */
-    float vdiffuse[4];
+    /* Final colour of lighting output (will be clamped to argb) */
+    float final[4];

    for(j = 0; j < count; ++j, ++vertex, ++data) {
-        /* Unpack the colour for use in glColorMaterial */
-        bgra_to_float(vertex->bgra, vdiffuse);
-        _glUpdateColourMaterial(vdiffuse);
+        _glUpdateColourMaterial(vertex->bgra);

        /* Copy the base colour across */
-        argbcpy(vertex->bgra, MATERIAL.baseColour);
+        vec4cpy(final, MATERIAL.baseColour);

        /* Direction to vertex in eye space */
        float Vx = -data->xyz[0];
@ -505,7 +494,7 @@ void _glPerformLighting(Vertex* vertices, const EyeSpaceData* es, const int32_t
                if(NdotH < 0.0f) NdotH = 0.0f;

                _glLightVertexDirectional(
-                    vertex->bgra,
+                    final,
                    i, LdotN, NdotH
                );
            } else {
@ -545,12 +534,17 @@ void _glPerformLighting(Vertex* vertices, const EyeSpaceData* es, const int32_t
                    if(NdotH < 0.0f) NdotH = 0.0f;

                    _glLightVertexPoint(
-                        vertex->bgra,
+                        final,
                        i, LdotN, NdotH, att
                    );
                }
            }
        }
+
+        vertex->bgra[R8IDX] = clamp(final[0] * 255.0f, 0, 255);
+        vertex->bgra[G8IDX] = clamp(final[1] * 255.0f, 0, 255);
+        vertex->bgra[B8IDX] = clamp(final[2] * 255.0f, 0, 255);
+        vertex->bgra[A8IDX] = clamp(final[3] * 255.0f, 0, 255);
    }
 }

--- a/GL/private.h
+++ b/GL/private.h
@ -166,7 +166,7 @@ typedef struct {

    /* Base ambient + emission colour for
     * the current material + light */
-    GLubyte baseColour[4];
+    GLfloat baseColour[4];
 } Material;

 typedef struct {
@ -206,18 +206,34 @@ typedef struct {
 } Vertex;


-#define argbcpy(src, dst) \
-    *((GLuint*) src) = *((GLuint*) dst) \
+#define argbcpy(dst, src) \
+    *((GLuint*) dst) = *((GLuint*) src) \


-#define vec4cpy(src, dst) \
-    do { \
-        src[0] = dst[0]; \
-        src[1] = dst[1]; \
-        src[2] = dst[2]; \
-        src[3] = dst[3]; \
-    } while(0) \
+typedef struct {
+    float xy[2];
+} _glvec2;

+typedef struct {
+    float xyz[3];
+} _glvec3;
+
+typedef struct {
+    float xyzw[4];
+} _glvec4;
+
+#define vec2cpy(dst, src) \
+    *((_glvec2*) dst) = *((_glvec2*) src)
+
+#define vec3cpy(dst, src) \
+    *((_glvec3*) dst) = *((_glvec3*) src)
+
+#define vec4cpy(dst, src) \
+    *((_glvec4*) dst) = *((_glvec4*) src)
+
+GL_FORCE_INLINE float clamp(float d, float min, float max) {
+    return (d < min) ? min : (d > max) ? max : d;
+}

 #define swapVertex(a, b)   \
 do {                 \