Merge branch 'builtin_math' into 'master'

Prefer relying on built in codegen rather than relying on sh4_math.h

See merge request simulant/GLdc!141
This commit is contained in:
Luke Benstead 2025-02-01 11:09:59 +00:00
commit 8a572f5a51
7 changed files with 48 additions and 2157 deletions

View File

@ -538,6 +538,6 @@ size_t alloc_count_continuous(void* pool) {
if(free_bits && (free_bits * 256) > largest_block) {
largest_block = (free_bits * 256);
}
printf("LARGEST: %d\n", largest_block);
return largest_block;
}

View File

@ -757,9 +757,7 @@ static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuin
if(_glIsNormalizeEnabled()) {
GLfloat* n = (GLfloat*) it->nxyz;
float temp = n[0] * n[0];
temp = MATH_fmac(n[1], n[1], temp);
temp = MATH_fmac(n[2], n[2], temp);
float temp = n[0] * n[0] + n[1] * n[1] + n[2] * n[2];
float ilength = MATH_fsrra(temp);
n[0] *= ilength;

View File

@ -58,10 +58,10 @@ void _glPrecalcLightingValues(GLuint mask) {
if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) {
GLfloat* scene_ambient = _glLightModelSceneAmbient();
material->baseColour[0] = MATH_fmac(scene_ambient[0], material->ambient[0], material->emissive[0]);
material->baseColour[1] = MATH_fmac(scene_ambient[1], material->ambient[1], material->emissive[1]);
material->baseColour[2] = MATH_fmac(scene_ambient[2], material->ambient[2], material->emissive[2]);
material->baseColour[3] = MATH_fmac(scene_ambient[3], material->ambient[3], material->emissive[3]);
material->baseColour[0] = scene_ambient[0] * material->ambient[0] + material->emissive[0];
material->baseColour[1] = scene_ambient[1] * material->ambient[1] + material->emissive[1];
material->baseColour[2] = scene_ambient[2] * material->ambient[2] + material->emissive[2];
material->baseColour[3] = scene_ambient[3] * material->ambient[3] + material->emissive[3];
}
}

View File

@ -372,11 +372,12 @@ static inline void vec3f_cross(const GLfloat* v1, const GLfloat* v2, GLfloat* re
}
GL_FORCE_INLINE void vec3f_normalize_sh4(float *v){
float length, ilength;
float lengthSq, ilength;
ilength = MATH_fsrra(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
length = MATH_Fast_Invert(ilength);
if (length)
lengthSq = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
ilength = MATH_fsrra(lengthSq);
if (lengthSq)
{
v[0] *= ilength;
v[1] *= ilength;

View File

@ -10,8 +10,6 @@
#include "../types.h"
#include "../private.h"
#include "sh4_math.h"
#ifndef NDEBUG
#define PERF_WARNING(msg) printf("[PERF] %s\n", msg)
#else
@ -24,6 +22,42 @@
#define GL_FORCE_INLINE static GL_INLINE_DEBUG
#endif
// ---- sh4_math.h - SH7091 Math Module ----
//
// This file is part of the DreamHAL project, a hardware abstraction library
// primarily intended for use on the SH7091 found in hardware such as the SEGA
// Dreamcast game console.
//
// This math module is hereby released into the public domain in the hope that it
// may prove useful. Now go hit 60 fps! :)
//
// --Moopthehedgehog
// 1/sqrt(x)
GL_FORCE_INLINE float MATH_fsrra(float x)
{
asm volatile ("fsrra %[one_div_sqrt]\n"
: [one_div_sqrt] "+f" (x) // outputs, "+" means r/w
: // no inputs
: // no clobbers
);
return x;
}
// 1/x = 1 / sqrt(x^2)
GL_FORCE_INLINE float MATH_Fast_Invert(float x)
{
int neg = x < 0.0f;
x = MATH_fsrra(x * x);
if (neg) x = -x;
return x;
}
// end of ---- sh4_math.h ----
#define PREFETCH(addr) __builtin_prefetch((addr))
GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {

File diff suppressed because it is too large Load Diff

View File

@ -7,9 +7,6 @@
#define PREFETCH(addr) do {} while(0)
#define MATH_Fast_Divide(n, d) (n / d)
#define MATH_fmac(a, b, c) (a * b + c)
#define MATH_Fast_Sqrt(x) sqrtf((x))
#define MATH_fsrra(x) (1.0f / sqrtf((x)))
#define MATH_Fast_Invert(x) (1.0f / (x))
@ -28,7 +25,7 @@
} while(0)
#define VEC3_LENGTH(x, y, z, d) \
d = MATH_Fast_Sqrt((x) * (x) + (y) * (y) + (z) * (z))
d = sqrtf((x) * (x) + (y) * (y) + (z) * (z))
#define VEC3_DOT(x1, y1, z1, x2, y2, z2, d) \
d = (x1 * x2) + (y1 * y2) + (z1 * z2)