Prefer relying on built in codegen rather than relying on sh4_math.h

2025-02-01 13:06:37 +11:00 · 2025-02-01 13:06:37 +11:00 · 71d0094ac4
commit 71d0094ac4
parent 2a2849d5e9
7 changed files with 46 additions and 2155 deletions
--- a/GL/alloc/alloc.c
+++ b/GL/alloc/alloc.c
@ -538,6 +538,6 @@ size_t alloc_count_continuous(void* pool) {
    if(free_bits && (free_bits * 256) > largest_block) {
        largest_block = (free_bits * 256);
    }
-
+printf("LARGEST: %d\n", largest_block);
    return largest_block;
 }
--- a/GL/draw.c
+++ b/GL/draw.c
@ -757,9 +757,7 @@ static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuin

        if(_glIsNormalizeEnabled()) {
            GLfloat* n = (GLfloat*) it->nxyz;
-            float temp = n[0] * n[0];
-            temp = MATH_fmac(n[1], n[1], temp);
-            temp = MATH_fmac(n[2], n[2], temp);
+            float temp = n[0] * n[0] + n[1] * n[1] + n[2] * n[2];

            float ilength = MATH_fsrra(temp);
            n[0] *= ilength;
--- a/GL/lighting.c
+++ b/GL/lighting.c
@ -58,10 +58,10 @@ void _glPrecalcLightingValues(GLuint mask) {
    if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) {
        GLfloat* scene_ambient = _glLightModelSceneAmbient();

-        material->baseColour[0] = MATH_fmac(scene_ambient[0], material->ambient[0], material->emissive[0]);
-        material->baseColour[1] = MATH_fmac(scene_ambient[1], material->ambient[1], material->emissive[1]);
-        material->baseColour[2] = MATH_fmac(scene_ambient[2], material->ambient[2], material->emissive[2]);
-        material->baseColour[3] = MATH_fmac(scene_ambient[3], material->ambient[3], material->emissive[3]);
+        material->baseColour[0] = scene_ambient[0] * material->ambient[0] + material->emissive[0];
+        material->baseColour[1] = scene_ambient[1] * material->ambient[1] + material->emissive[1];
+        material->baseColour[2] = scene_ambient[2] * material->ambient[2] + material->emissive[2];
+        material->baseColour[3] = scene_ambient[3] * material->ambient[3] + material->emissive[3];
    }
 }

--- a/GL/matrix.c
+++ b/GL/matrix.c
@ -372,11 +372,12 @@ static inline void vec3f_cross(const GLfloat* v1, const GLfloat* v2, GLfloat* re
 }

 GL_FORCE_INLINE void vec3f_normalize_sh4(float *v){
-    float length, ilength;
+    float lengthSq, ilength;

-    ilength = MATH_fsrra(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
-    length = MATH_Fast_Invert(ilength);
-    if (length)
+    lengthSq = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
+    ilength  = MATH_fsrra(lengthSq);
+
+    if (lengthSq)
    {
        v[0] *= ilength;
        v[1] *= ilength;
--- a/GL/platforms/sh4.h
+++ b/GL/platforms/sh4.h
@ -10,7 +10,41 @@
 #include "../types.h"
 #include "../private.h"

-#include "sh4_math.h"
+// ---- sh4_math.h - SH7091 Math Module ----
+//
+// This file is part of the DreamHAL project, a hardware abstraction library
+// primarily intended for use on the SH7091 found in hardware such as the SEGA
+// Dreamcast game console.
+//
+// This math module is hereby released into the public domain in the hope that it
+// may prove useful. Now go hit 60 fps! :)
+//
+// --Moopthehedgehog
+
+// 1/sqrt(x)
+GL_FORCE_INLINE float MATH_fsrra(float x)
+{
+  asm volatile ("fsrra %[one_div_sqrt]\n"
+  : [one_div_sqrt] "+f" (x) // outputs, "+" means r/w
+  : // no inputs
+  : // no clobbers
+  );
+
+  return x;
+}
+
+// 1/x = 1 / sqrt(x^2)
+GL_FORCE_INLINE float MATH_Fast_Invert(float x)
+{
+  int neg = x < 0.0f;
+
+  x = MATH_fsrra(x * x);
+
+  if (neg) x = -x;
+  return x;
+}
+// end of ---- sh4_math.h ----
+

 #ifndef NDEBUG
 #define PERF_WARNING(msg) printf("[PERF] %s\n", msg)
--- a/GL/platforms/sh4_math.h
+++ b/GL/platforms/sh4_math.h
--- a/GL/platforms/software.h
+++ b/GL/platforms/software.h
@ -7,9 +7,6 @@

 #define PREFETCH(addr) do {} while(0)

-#define MATH_Fast_Divide(n, d) (n / d)
-#define MATH_fmac(a, b, c) (a * b + c)
-#define MATH_Fast_Sqrt(x) sqrtf((x))
 #define MATH_fsrra(x) (1.0f / sqrtf((x)))
 #define MATH_Fast_Invert(x) (1.0f / (x))