Prefer relying on built in codegen rather than relying on sh4_math.h

This commit is contained in:
UnknownShadow200 2025-02-01 13:06:37 +11:00
parent 2a2849d5e9
commit 71d0094ac4
7 changed files with 46 additions and 2155 deletions

View File

@ -538,6 +538,6 @@ size_t alloc_count_continuous(void* pool) {
if(free_bits && (free_bits * 256) > largest_block) {
largest_block = (free_bits * 256);
}
printf("LARGEST: %d\n", largest_block);
return largest_block;
}

View File

@ -757,9 +757,7 @@ static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuin
if(_glIsNormalizeEnabled()) {
GLfloat* n = (GLfloat*) it->nxyz;
float temp = n[0] * n[0];
temp = MATH_fmac(n[1], n[1], temp);
temp = MATH_fmac(n[2], n[2], temp);
float temp = n[0] * n[0] + n[1] * n[1] + n[2] * n[2];
float ilength = MATH_fsrra(temp);
n[0] *= ilength;

View File

@ -58,10 +58,10 @@ void _glPrecalcLightingValues(GLuint mask) {
if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) {
GLfloat* scene_ambient = _glLightModelSceneAmbient();
material->baseColour[0] = MATH_fmac(scene_ambient[0], material->ambient[0], material->emissive[0]);
material->baseColour[1] = MATH_fmac(scene_ambient[1], material->ambient[1], material->emissive[1]);
material->baseColour[2] = MATH_fmac(scene_ambient[2], material->ambient[2], material->emissive[2]);
material->baseColour[3] = MATH_fmac(scene_ambient[3], material->ambient[3], material->emissive[3]);
material->baseColour[0] = scene_ambient[0] * material->ambient[0] + material->emissive[0];
material->baseColour[1] = scene_ambient[1] * material->ambient[1] + material->emissive[1];
material->baseColour[2] = scene_ambient[2] * material->ambient[2] + material->emissive[2];
material->baseColour[3] = scene_ambient[3] * material->ambient[3] + material->emissive[3];
}
}

View File

@ -372,11 +372,12 @@ static inline void vec3f_cross(const GLfloat* v1, const GLfloat* v2, GLfloat* re
}
GL_FORCE_INLINE void vec3f_normalize_sh4(float *v){
float length, ilength;
float lengthSq, ilength;
ilength = MATH_fsrra(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
length = MATH_Fast_Invert(ilength);
if (length)
lengthSq = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
ilength = MATH_fsrra(lengthSq);
if (lengthSq)
{
v[0] *= ilength;
v[1] *= ilength;

View File

@ -10,7 +10,41 @@
#include "../types.h"
#include "../private.h"
#include "sh4_math.h"
// ---- sh4_math.h - SH7091 Math Module ----
//
// This file is part of the DreamHAL project, a hardware abstraction library
// primarily intended for use on the SH7091 found in hardware such as the SEGA
// Dreamcast game console.
//
// This math module is hereby released into the public domain in the hope that it
// may prove useful. Now go hit 60 fps! :)
//
// --Moopthehedgehog
// 1/sqrt(x)
GL_FORCE_INLINE float MATH_fsrra(float x)
{
asm volatile ("fsrra %[one_div_sqrt]\n"
: [one_div_sqrt] "+f" (x) // outputs, "+" means r/w
: // no inputs
: // no clobbers
);
return x;
}
// 1/x = 1 / sqrt(x^2)
GL_FORCE_INLINE float MATH_Fast_Invert(float x)
{
int neg = x < 0.0f;
x = MATH_fsrra(x * x);
if (neg) x = -x;
return x;
}
// end of ---- sh4_math.h ----
#ifndef NDEBUG
#define PERF_WARNING(msg) printf("[PERF] %s\n", msg)

File diff suppressed because it is too large Load Diff

View File

@ -7,9 +7,6 @@
#define PREFETCH(addr) do {} while(0)
#define MATH_Fast_Divide(n, d) (n / d)
#define MATH_fmac(a, b, c) (a * b + c)
#define MATH_Fast_Sqrt(x) sqrtf((x))
#define MATH_fsrra(x) (1.0f / sqrtf((x)))
#define MATH_Fast_Invert(x) (1.0f / (x))